aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig107
-rw-r--r--arch/i386/Makefile7
-rw-r--r--arch/i386/boot/Makefile6
-rw-r--r--arch/i386/boot/compressed/head.S7
-rw-r--r--arch/i386/boot/compressed/misc.c8
-rw-r--r--arch/i386/boot/edd.S2
-rw-r--r--arch/i386/boot/install.sh4
-rw-r--r--arch/i386/boot/setup.S6
-rw-r--r--arch/i386/boot/tools/build.c8
-rw-r--r--arch/i386/crypto/aes.c2
-rw-r--r--arch/i386/defconfig1
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/acpi/boot.c277
-rw-r--r--arch/i386/kernel/acpi/sleep.c27
-rw-r--r--arch/i386/kernel/apic.c84
-rw-r--r--arch/i386/kernel/apm.c13
-rw-r--r--arch/i386/kernel/cpu/common.c47
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/i386/kernel/cpu/intel.c12
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/k7.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/p6.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c3
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c23
-rw-r--r--arch/i386/kernel/cpu/proc.c2
-rw-r--r--arch/i386/kernel/cpuid.c22
-rw-r--r--arch/i386/kernel/crash.c223
-rw-r--r--arch/i386/kernel/dmi_scan.c391
-rw-r--r--arch/i386/kernel/efi.c4
-rw-r--r--arch/i386/kernel/head.S6
-rw-r--r--arch/i386/kernel/i386_ksyms.c160
-rw-r--r--arch/i386/kernel/i387.c3
-rw-r--r--arch/i386/kernel/i8259.c12
-rw-r--r--arch/i386/kernel/io_apic.c58
-rw-r--r--arch/i386/kernel/irq.c72
-rw-r--r--arch/i386/kernel/kprobes.c181
-rw-r--r--arch/i386/kernel/machine_kexec.c226
-rw-r--r--arch/i386/kernel/mpparse.c33
-rw-r--r--arch/i386/kernel/msr.c22
-rw-r--r--arch/i386/kernel/nmi.c24
-rw-r--r--arch/i386/kernel/pci-dma.c3
-rw-r--r--arch/i386/kernel/process.c104
-rw-r--r--arch/i386/kernel/ptrace.c2
-rw-r--r--arch/i386/kernel/reboot.c87
-rw-r--r--arch/i386/kernel/relocate_kernel.S120
-rw-r--r--arch/i386/kernel/setup.c97
-rw-r--r--arch/i386/kernel/signal.c35
-rw-r--r--arch/i386/kernel/smp.c37
-rw-r--r--arch/i386/kernel/smpboot.c357
-rw-r--r--arch/i386/kernel/syscall_table.S6
-rw-r--r--arch/i386/kernel/sysenter.c12
-rw-r--r--arch/i386/kernel/time.c11
-rw-r--r--arch/i386/kernel/time_hpet.c2
-rw-r--r--arch/i386/kernel/timers/common.c14
-rw-r--r--arch/i386/kernel/timers/timer.c9
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c4
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c3
-rw-r--r--arch/i386/kernel/timers/timer_pit.c4
-rw-r--r--arch/i386/kernel/timers/timer_pm.c1
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c13
-rw-r--r--arch/i386/kernel/traps.c74
-rw-r--r--arch/i386/kernel/vmlinux.lds.S59
-rw-r--r--arch/i386/lib/dec_and_lock.c2
-rw-r--r--arch/i386/lib/delay.c8
-rw-r--r--arch/i386/lib/mmx.c5
-rw-r--r--arch/i386/lib/usercopy.c8
-rw-r--r--arch/i386/mach-default/setup.c27
-rw-r--r--arch/i386/mach-default/topology.c15
-rw-r--r--arch/i386/mach-visws/mpparse.c5
-rw-r--r--arch/i386/mach-voyager/voyager_basic.c2
-rw-r--r--arch/i386/mach-voyager/voyager_smp.c2
-rw-r--r--arch/i386/mm/Makefile2
-rw-r--r--arch/i386/mm/discontig.c129
-rw-r--r--arch/i386/mm/fault.c7
-rw-r--r--arch/i386/mm/highmem.c24
-rw-r--r--arch/i386/mm/hugetlbpage.c204
-rw-r--r--arch/i386/mm/init.c24
-rw-r--r--arch/i386/mm/ioremap.c7
-rw-r--r--arch/i386/mm/pgtable.c30
-rw-r--r--arch/i386/oprofile/backtrace.c2
-rw-r--r--arch/i386/pci/common.c8
-rw-r--r--arch/i386/pci/irq.c73
-rw-r--r--arch/i386/pci/legacy.c2
-rw-r--r--arch/i386/pci/mmconfig.c39
-rw-r--r--arch/i386/pci/numa.c2
-rw-r--r--arch/i386/pci/pcbios.c4
-rw-r--r--arch/i386/pci/pci.h1
-rw-r--r--arch/i386/power/cpu.c37
92 files changed, 2601 insertions, 1230 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index dfd904f6883b..6c02336fe2e4 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -68,7 +68,6 @@ config X86_VOYAGER
68 68
69config X86_NUMAQ 69config X86_NUMAQ
70 bool "NUMAQ (IBM/Sequent)" 70 bool "NUMAQ (IBM/Sequent)"
71 select DISCONTIGMEM
72 select NUMA 71 select NUMA
73 help 72 help
74 This option is used for getting Linux to run on a (IBM/Sequent) NUMA 73 This option is used for getting Linux to run on a (IBM/Sequent) NUMA
@@ -511,28 +510,7 @@ config SCHED_SMT
511 cost of slightly increased overhead in some places. If unsure say 510 cost of slightly increased overhead in some places. If unsure say
512 N here. 511 N here.
513 512
514config PREEMPT 513source "kernel/Kconfig.preempt"
515 bool "Preemptible Kernel"
516 help
517 This option reduces the latency of the kernel when reacting to
518 real-time or interactive events by allowing a low priority process to
519 be preempted even if it is in kernel mode executing a system call.
520 This allows applications to run more reliably even when the system is
521 under load.
522
523 Say Y here if you are building a kernel for a desktop, embedded
524 or real-time system. Say N if you are unsure.
525
526config PREEMPT_BKL
527 bool "Preempt The Big Kernel Lock"
528 depends on PREEMPT
529 default y
530 help
531 This option reduces the latency of the kernel by making the
532 big kernel lock preemptible.
533
534 Say Y here if you are building a kernel for a desktop system.
535 Say N if you are unsure.
536 514
537config X86_UP_APIC 515config X86_UP_APIC
538 bool "Local APIC support on uniprocessors" 516 bool "Local APIC support on uniprocessors"
@@ -783,25 +761,48 @@ comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
783comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" 761comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
784 depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) 762 depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
785 763
786config DISCONTIGMEM
787 bool
788 depends on NUMA
789 default y
790
791config HAVE_ARCH_BOOTMEM_NODE 764config HAVE_ARCH_BOOTMEM_NODE
792 bool 765 bool
793 depends on NUMA 766 depends on NUMA
794 default y 767 default y
795 768
796config HAVE_MEMORY_PRESENT 769config ARCH_HAVE_MEMORY_PRESENT
797 bool 770 bool
798 depends on DISCONTIGMEM 771 depends on DISCONTIGMEM
799 default y 772 default y
800 773
801config NEED_NODE_MEMMAP_SIZE 774config NEED_NODE_MEMMAP_SIZE
802 bool 775 bool
803 depends on DISCONTIGMEM 776 depends on DISCONTIGMEM || SPARSEMEM
777 default y
778
779config HAVE_ARCH_ALLOC_REMAP
780 bool
781 depends on NUMA
782 default y
783
784config ARCH_DISCONTIGMEM_ENABLE
785 def_bool y
786 depends on NUMA
787
788config ARCH_DISCONTIGMEM_DEFAULT
789 def_bool y
790 depends on NUMA
791
792config ARCH_SPARSEMEM_ENABLE
793 def_bool y
794 depends on NUMA
795
796config ARCH_SELECT_MEMORY_MODEL
797 def_bool y
798 depends on ARCH_SPARSEMEM_ENABLE
799
800source "mm/Kconfig"
801
802config HAVE_ARCH_EARLY_PFN_TO_NID
803 bool
804 default y 804 default y
805 depends on NUMA
805 806
806config HIGHPTE 807config HIGHPTE
807 bool "Allocate 3rd-level pagetables from highmem" 808 bool "Allocate 3rd-level pagetables from highmem"
@@ -939,6 +940,43 @@ config SECCOMP
939 940
940 If unsure, say Y. Only embedded should say N here. 941 If unsure, say Y. Only embedded should say N here.
941 942
943source kernel/Kconfig.hz
944
945config PHYSICAL_START
946 hex "Physical address where the kernel is loaded" if EMBEDDED
947 default "0x100000"
948 help
949 This gives the physical address where the kernel is loaded.
950 Primarily used in the case of kexec on panic where the
951 fail safe kernel needs to run at a different address than
952 the panic-ed kernel.
953
954 Don't change this unless you know what you are doing.
955
956config KEXEC
957 bool "kexec system call (EXPERIMENTAL)"
958 depends on EXPERIMENTAL
959 help
960 kexec is a system call that implements the ability to shutdown your
961 current kernel, and to start another kernel. It is like a reboot
962 but it is indepedent of the system firmware. And like a reboot
963 you can start any kernel with it, not just Linux.
964
965 The name comes from the similiarity to the exec system call.
966
967 It is an ongoing process to be certain the hardware in a machine
968 is properly shutdown, so do not be surprised if this code does not
969 initially work for you. It may help to enable device hotplugging
970 support. As of this writing the exact hardware interface is
971 strongly in flux, so no good recommendation can be made.
972
973config CRASH_DUMP
974 bool "kernel crash dumps (EXPERIMENTAL)"
975 depends on EMBEDDED
976 depends on EXPERIMENTAL
977 depends on HIGHMEM
978 help
979 Generate crash dump after being started by kexec.
942endmenu 980endmenu
943 981
944 982
@@ -1226,6 +1264,15 @@ config SCx200
1226 This support is also available as a module. If compiled as a 1264 This support is also available as a module. If compiled as a
1227 module, it will be called scx200. 1265 module, it will be called scx200.
1228 1266
1267config HOTPLUG_CPU
1268 bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
1269 depends on SMP && HOTPLUG && EXPERIMENTAL
1270 ---help---
1271 Say Y here to experiment with turning CPUs off and on. CPUs
1272 can be controlled through /sys/devices/system/cpu.
1273
1274 Say N.
1275
1229source "drivers/pcmcia/Kconfig" 1276source "drivers/pcmcia/Kconfig"
1230 1277
1231source "drivers/pci/hotplug/Kconfig" 1278source "drivers/pci/hotplug/Kconfig"
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index 1c36ca332a96..bf7c9ba709f3 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -17,6 +17,13 @@
17# 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net> 17# 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net>
18# Added support for GEODE CPU 18# Added support for GEODE CPU
19 19
20HAS_BIARCH := $(call cc-option-yn, -m32)
21ifeq ($(HAS_BIARCH),y)
22AS := $(AS) --32
23LD := $(LD) -m elf_i386
24CC := $(CC) -m32
25endif
26
20LDFLAGS := -m elf_i386 27LDFLAGS := -m elf_i386
21OBJCOPYFLAGS := -O binary -R .note -R .comment -S 28OBJCOPYFLAGS := -O binary -R .note -R .comment -S
22LDFLAGS_vmlinux := 29LDFLAGS_vmlinux :=
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index aa7064a75ee6..1e71382d413a 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -25,8 +25,8 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
25 25
26#RAMDISK := -DRAMDISK=512 26#RAMDISK := -DRAMDISK=512
27 27
28targets := vmlinux.bin bootsect bootsect.o setup setup.o \ 28targets := vmlinux.bin bootsect bootsect.o \
29 zImage bzImage 29 setup setup.o zImage bzImage
30subdir- := compressed 30subdir- := compressed
31 31
32hostprogs-y := tools/build 32hostprogs-y := tools/build
@@ -48,7 +48,7 @@ cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
48$(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \ 48$(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
49 $(obj)/vmlinux.bin $(obj)/tools/build FORCE 49 $(obj)/vmlinux.bin $(obj)/tools/build FORCE
50 $(call if_changed,image) 50 $(call if_changed,image)
51 @echo 'Kernel: $@ is ready' 51 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
52 52
53$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE 53$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
54 $(call if_changed,objcopy) 54 $(call if_changed,objcopy)
diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
index c5e80b69e7d4..b5893e4ecd37 100644
--- a/arch/i386/boot/compressed/head.S
+++ b/arch/i386/boot/compressed/head.S
@@ -25,6 +25,7 @@
25 25
26#include <linux/linkage.h> 26#include <linux/linkage.h>
27#include <asm/segment.h> 27#include <asm/segment.h>
28#include <asm/page.h>
28 29
29 .globl startup_32 30 .globl startup_32
30 31
@@ -74,7 +75,7 @@ startup_32:
74 popl %esi # discard address 75 popl %esi # discard address
75 popl %esi # real mode pointer 76 popl %esi # real mode pointer
76 xorl %ebx,%ebx 77 xorl %ebx,%ebx
77 ljmp $(__BOOT_CS), $0x100000 78 ljmp $(__BOOT_CS), $__PHYSICAL_START
78 79
79/* 80/*
80 * We come here, if we were loaded high. 81 * We come here, if we were loaded high.
@@ -99,7 +100,7 @@ startup_32:
99 popl %ecx # lcount 100 popl %ecx # lcount
100 popl %edx # high_buffer_start 101 popl %edx # high_buffer_start
101 popl %eax # hcount 102 popl %eax # hcount
102 movl $0x100000,%edi 103 movl $__PHYSICAL_START,%edi
103 cli # make sure we don't get interrupted 104 cli # make sure we don't get interrupted
104 ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine 105 ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
105 106
@@ -124,5 +125,5 @@ move_routine_start:
124 movsl 125 movsl
125 movl %ebx,%esi # Restore setup pointer 126 movl %ebx,%esi # Restore setup pointer
126 xorl %ebx,%ebx 127 xorl %ebx,%ebx
127 ljmp $(__BOOT_CS), $0x100000 128 ljmp $(__BOOT_CS), $__PHYSICAL_START
128move_routine_end: 129move_routine_end:
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index cedc55cc47de..82a807f9f5e6 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -13,6 +13,7 @@
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <asm/io.h> 15#include <asm/io.h>
16#include <asm/page.h>
16 17
17/* 18/*
18 * gzip declarations 19 * gzip declarations
@@ -308,7 +309,7 @@ static void setup_normal_output_buffer(void)
308#else 309#else
309 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); 310 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
310#endif 311#endif
311 output_data = (char *)0x100000; /* Points to 1M */ 312 output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
312 free_mem_end_ptr = (long)real_mode; 313 free_mem_end_ptr = (long)real_mode;
313} 314}
314 315
@@ -333,8 +334,8 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
333 low_buffer_size = low_buffer_end - LOW_BUFFER_START; 334 low_buffer_size = low_buffer_end - LOW_BUFFER_START;
334 high_loaded = 1; 335 high_loaded = 1;
335 free_mem_end_ptr = (long)high_buffer_start; 336 free_mem_end_ptr = (long)high_buffer_start;
336 if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) { 337 if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
337 high_buffer_start = (uch *)(0x100000 + low_buffer_size); 338 high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
338 mv->hcount = 0; /* say: we need not to move high_buffer */ 339 mv->hcount = 0; /* say: we need not to move high_buffer */
339 } 340 }
340 else mv->hcount = -1; 341 else mv->hcount = -1;
@@ -353,7 +354,6 @@ static void close_output_buffer_if_we_run_high(struct moveparams *mv)
353 } 354 }
354} 355}
355 356
356
357asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode) 357asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode)
358{ 358{
359 real_mode = rmode; 359 real_mode = rmode;
diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S
index 027d6b354ffb..d8d69f2b911d 100644
--- a/arch/i386/boot/edd.S
+++ b/arch/i386/boot/edd.S
@@ -6,7 +6,7 @@
6 * projects 1572D, 1484D, 1386D, 1226DT 6 * projects 1572D, 1484D, 1386D, 1226DT
7 * disk signature read by Matt Domsch <Matt_Domsch@dell.com> 7 * disk signature read by Matt Domsch <Matt_Domsch@dell.com>
8 * and Andrew Wilks <Andrew_Wilks@dell.com> September 2003, June 2004 8 * and Andrew Wilks <Andrew_Wilks@dell.com> September 2003, June 2004
9 * legacy CHS retreival by Patrick J. LoPresti <patl@users.sourceforge.net> 9 * legacy CHS retrieval by Patrick J. LoPresti <patl@users.sourceforge.net>
10 * March 2004 10 * March 2004
11 * Command line option parsing, Matt Domsch, November 2004 11 * Command line option parsing, Matt Domsch, November 2004
12 */ 12 */
diff --git a/arch/i386/boot/install.sh b/arch/i386/boot/install.sh
index 90f2452b3b9e..f17b40dfc0f4 100644
--- a/arch/i386/boot/install.sh
+++ b/arch/i386/boot/install.sh
@@ -21,8 +21,8 @@
21 21
22# User may have a custom install script 22# User may have a custom install script
23 23
24if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi 24if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
25if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi 25if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
26 26
27# Default install - same as make zlilo 27# Default install - same as make zlilo
28 28
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
index caa1fde6904e..8cb420f40c58 100644
--- a/arch/i386/boot/setup.S
+++ b/arch/i386/boot/setup.S
@@ -33,7 +33,7 @@
33 * Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999. 33 * Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999.
34 * <stiker@northlink.com> 34 * <stiker@northlink.com>
35 * 35 *
36 * Fix to work around buggy BIOSes which dont use carry bit correctly 36 * Fix to work around buggy BIOSes which don't use carry bit correctly
37 * and/or report extended memory in CX/DX for e801h memory size detection 37 * and/or report extended memory in CX/DX for e801h memory size detection
38 * call. As a result the kernel got wrong figures. The int15/e801h docs 38 * call. As a result the kernel got wrong figures. The int15/e801h docs
39 * from Ralf Brown interrupt list seem to indicate AX/BX should be used 39 * from Ralf Brown interrupt list seem to indicate AX/BX should be used
@@ -357,7 +357,7 @@ bail820:
357 357
358meme801: 358meme801:
359 stc # fix to work around buggy 359 stc # fix to work around buggy
360 xorw %cx,%cx # BIOSes which dont clear/set 360 xorw %cx,%cx # BIOSes which don't clear/set
361 xorw %dx,%dx # carry on pass/error of 361 xorw %dx,%dx # carry on pass/error of
362 # e801h memory size call 362 # e801h memory size call
363 # or merely pass cx,dx though 363 # or merely pass cx,dx though
@@ -847,7 +847,7 @@ flush_instr:
847# 847#
848# but we yet haven't reloaded the CS register, so the default size 848# but we yet haven't reloaded the CS register, so the default size
849# of the target offset still is 16 bit. 849# of the target offset still is 16 bit.
850# However, using an operand prefix (0x66), the CPU will properly 850# However, using an operand prefix (0x66), the CPU will properly
851# take our 48 bit far pointer. (INTeL 80386 Programmer's Reference 851# take our 48 bit far pointer. (INTeL 80386 Programmer's Reference
852# Manual, Mixing 16-bit and 32-bit code, page 16-6) 852# Manual, Mixing 16-bit and 32-bit code, page 16-6)
853 853
diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c
index 26509b826aed..6835f6d47c31 100644
--- a/arch/i386/boot/tools/build.c
+++ b/arch/i386/boot/tools/build.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * $Id: build.c,v 1.5 1997/05/19 12:29:58 mj Exp $
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares 3 * Copyright (C) 1997 Martin Mares
6 */ 4 */
@@ -8,7 +6,8 @@
8/* 6/*
9 * This file builds a disk-image from three different files: 7 * This file builds a disk-image from three different files:
10 * 8 *
11 * - bootsect: exactly 512 bytes of 8086 machine code, loads the rest 9 * - bootsect: compatibility mbr which prints an error message if
10 * someone tries to boot the kernel directly.
12 * - setup: 8086 machine code, sets up system parm 11 * - setup: 8086 machine code, sets up system parm
13 * - system: 80386 code for actual system 12 * - system: 80386 code for actual system
14 * 13 *
@@ -71,7 +70,8 @@ void usage(void)
71 70
72int main(int argc, char ** argv) 71int main(int argc, char ** argv)
73{ 72{
74 unsigned int i, c, sz, setup_sectors; 73 unsigned int i, sz, setup_sectors;
74 int c;
75 u32 sys_size; 75 u32 sys_size;
76 byte major_root, minor_root; 76 byte major_root, minor_root;
77 struct stat sb; 77 struct stat sb;
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
index 1019430fc1f1..88ee85c3b43b 100644
--- a/arch/i386/crypto/aes.c
+++ b/arch/i386/crypto/aes.c
@@ -59,7 +59,7 @@ struct aes_ctx {
59}; 59};
60 60
61#define WPOLY 0x011b 61#define WPOLY 0x011b
62#define u32_in(x) le32_to_cpu(*(const u32 *)(x)) 62#define u32_in(x) le32_to_cpup((const __le32 *)(x))
63#define bytes2word(b0, b1, b2, b3) \ 63#define bytes2word(b0, b1, b2, b3) \
64 (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) 64 (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
65 65
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 28e620383799..ca07b95c06b8 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -126,7 +126,6 @@ CONFIG_HAVE_DEC_LOCK=y
126# 126#
127CONFIG_PM=y 127CONFIG_PM=y
128CONFIG_SOFTWARE_SUSPEND=y 128CONFIG_SOFTWARE_SUSPEND=y
129# CONFIG_PM_DISK is not set
130 129
131# 130#
132# ACPI (Advanced Configuration and Power Interface) Support 131# ACPI (Advanced Configuration and Power Interface) Support
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 51ecd512603d..4cc83b322b36 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
25obj-$(CONFIG_X86_IO_APIC) += io_apic.o 25obj-$(CONFIG_X86_IO_APIC) += io_apic.o
26obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o 26obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o
27obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
27obj-$(CONFIG_X86_NUMAQ) += numaq.o 28obj-$(CONFIG_X86_NUMAQ) += numaq.o
28obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o 29obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
29obj-$(CONFIG_KPROBES) += kprobes.o 30obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 848bb97af7ca..b7808a89d945 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -29,6 +29,7 @@
29#include <linux/efi.h> 29#include <linux/efi.h>
30#include <linux/irq.h> 30#include <linux/irq.h>
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/dmi.h>
32 33
33#include <asm/pgtable.h> 34#include <asm/pgtable.h>
34#include <asm/io_apic.h> 35#include <asm/io_apic.h>
@@ -158,9 +159,15 @@ char *__acpi_map_table(unsigned long phys, unsigned long size)
158#endif 159#endif
159 160
160#ifdef CONFIG_PCI_MMCONFIG 161#ifdef CONFIG_PCI_MMCONFIG
161static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) 162/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
163struct acpi_table_mcfg_config *pci_mmcfg_config;
164int pci_mmcfg_config_num;
165
166int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
162{ 167{
163 struct acpi_table_mcfg *mcfg; 168 struct acpi_table_mcfg *mcfg;
169 unsigned long i;
170 int config_size;
164 171
165 if (!phys_addr || !size) 172 if (!phys_addr || !size)
166 return -EINVAL; 173 return -EINVAL;
@@ -171,18 +178,38 @@ static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
171 return -ENODEV; 178 return -ENODEV;
172 } 179 }
173 180
174 if (mcfg->base_reserved) { 181 /* how many config structures do we have */
175 printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); 182 pci_mmcfg_config_num = 0;
183 i = size - sizeof(struct acpi_table_mcfg);
184 while (i >= sizeof(struct acpi_table_mcfg_config)) {
185 ++pci_mmcfg_config_num;
186 i -= sizeof(struct acpi_table_mcfg_config);
187 };
188 if (pci_mmcfg_config_num == 0) {
189 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
176 return -ENODEV; 190 return -ENODEV;
177 } 191 }
178 192
179 pci_mmcfg_base_addr = mcfg->base_address; 193 config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
194 pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
195 if (!pci_mmcfg_config) {
196 printk(KERN_WARNING PREFIX
197 "No memory for MCFG config tables\n");
198 return -ENOMEM;
199 }
200
201 memcpy(pci_mmcfg_config, &mcfg->config, config_size);
202 for (i = 0; i < pci_mmcfg_config_num; ++i) {
203 if (mcfg->config[i].base_reserved) {
204 printk(KERN_ERR PREFIX
205 "MMCONFIG not in low 4GB of memory\n");
206 return -ENODEV;
207 }
208 }
180 209
181 return 0; 210 return 0;
182} 211}
183#else 212#endif /* CONFIG_PCI_MMCONFIG */
184#define acpi_parse_mcfg NULL
185#endif /* !CONFIG_PCI_MMCONFIG */
186 213
187#ifdef CONFIG_X86_LOCAL_APIC 214#ifdef CONFIG_X86_LOCAL_APIC
188static int __init 215static int __init
@@ -506,6 +533,22 @@ acpi_unmap_lsapic(int cpu)
506EXPORT_SYMBOL(acpi_unmap_lsapic); 533EXPORT_SYMBOL(acpi_unmap_lsapic);
507#endif /* CONFIG_ACPI_HOTPLUG_CPU */ 534#endif /* CONFIG_ACPI_HOTPLUG_CPU */
508 535
536int
537acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
538{
539 /* TBD */
540 return -EINVAL;
541}
542EXPORT_SYMBOL(acpi_register_ioapic);
543
544int
545acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
546{
547 /* TBD */
548 return -EINVAL;
549}
550EXPORT_SYMBOL(acpi_unregister_ioapic);
551
509static unsigned long __init 552static unsigned long __init
510acpi_scan_rsdp ( 553acpi_scan_rsdp (
511 unsigned long start, 554 unsigned long start,
@@ -815,6 +858,219 @@ acpi_process_madt(void)
815 return; 858 return;
816} 859}
817 860
861extern int acpi_force;
862
863#ifdef __i386__
864
865#ifdef CONFIG_ACPI_PCI
866static int __init disable_acpi_irq(struct dmi_system_id *d)
867{
868 if (!acpi_force) {
869 printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
870 d->ident);
871 acpi_noirq_set();
872 }
873 return 0;
874}
875
876static int __init disable_acpi_pci(struct dmi_system_id *d)
877{
878 if (!acpi_force) {
879 printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
880 d->ident);
881 acpi_disable_pci();
882 }
883 return 0;
884}
885#endif
886
887static int __init dmi_disable_acpi(struct dmi_system_id *d)
888{
889 if (!acpi_force) {
890 printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
891 disable_acpi();
892 } else {
893 printk(KERN_NOTICE
894 "Warning: DMI blacklist says broken, but acpi forced\n");
895 }
896 return 0;
897}
898
899/*
900 * Limit ACPI to CPU enumeration for HT
901 */
902static int __init force_acpi_ht(struct dmi_system_id *d)
903{
904 if (!acpi_force) {
905 printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
906 disable_acpi();
907 acpi_ht = 1;
908 } else {
909 printk(KERN_NOTICE
910 "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
911 }
912 return 0;
913}
914
915/*
916 * If your system is blacklisted here, but you find that acpi=force
917 * works for you, please contact acpi-devel@sourceforge.net
918 */
919static struct dmi_system_id __initdata acpi_dmi_table[] = {
920 /*
921 * Boxes that need ACPI disabled
922 */
923 {
924 .callback = dmi_disable_acpi,
925 .ident = "IBM Thinkpad",
926 .matches = {
927 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
928 DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
929 },
930 },
931
932 /*
933 * Boxes that need acpi=ht
934 */
935 {
936 .callback = force_acpi_ht,
937 .ident = "FSC Primergy T850",
938 .matches = {
939 DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
940 DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
941 },
942 },
943 {
944 .callback = force_acpi_ht,
945 .ident = "DELL GX240",
946 .matches = {
947 DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
948 DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
949 },
950 },
951 {
952 .callback = force_acpi_ht,
953 .ident = "HP VISUALIZE NT Workstation",
954 .matches = {
955 DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
956 DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
957 },
958 },
959 {
960 .callback = force_acpi_ht,
961 .ident = "Compaq Workstation W8000",
962 .matches = {
963 DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
964 DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
965 },
966 },
967 {
968 .callback = force_acpi_ht,
969 .ident = "ASUS P4B266",
970 .matches = {
971 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
972 DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
973 },
974 },
975 {
976 .callback = force_acpi_ht,
977 .ident = "ASUS P2B-DS",
978 .matches = {
979 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
980 DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
981 },
982 },
983 {
984 .callback = force_acpi_ht,
985 .ident = "ASUS CUR-DLS",
986 .matches = {
987 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
988 DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
989 },
990 },
991 {
992 .callback = force_acpi_ht,
993 .ident = "ABIT i440BX-W83977",
994 .matches = {
995 DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
996 DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
997 },
998 },
999 {
1000 .callback = force_acpi_ht,
1001 .ident = "IBM Bladecenter",
1002 .matches = {
1003 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1004 DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
1005 },
1006 },
1007 {
1008 .callback = force_acpi_ht,
1009 .ident = "IBM eServer xSeries 360",
1010 .matches = {
1011 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1012 DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
1013 },
1014 },
1015 {
1016 .callback = force_acpi_ht,
1017 .ident = "IBM eserver xSeries 330",
1018 .matches = {
1019 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1020 DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
1021 },
1022 },
1023 {
1024 .callback = force_acpi_ht,
1025 .ident = "IBM eserver xSeries 440",
1026 .matches = {
1027 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1028 DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
1029 },
1030 },
1031
1032#ifdef CONFIG_ACPI_PCI
1033 /*
1034 * Boxes that need ACPI PCI IRQ routing disabled
1035 */
1036 {
1037 .callback = disable_acpi_irq,
1038 .ident = "ASUS A7V",
1039 .matches = {
1040 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
1041 DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
1042 /* newer BIOS, Revision 1011, does work */
1043 DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
1044 },
1045 },
1046
1047 /*
1048 * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
1049 */
1050 { /* _BBN 0 bug */
1051 .callback = disable_acpi_pci,
1052 .ident = "ASUS PR-DLS",
1053 .matches = {
1054 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
1055 DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
1056 DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
1057 DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
1058 },
1059 },
1060 {
1061 .callback = disable_acpi_pci,
1062 .ident = "Acer TravelMate 36x Laptop",
1063 .matches = {
1064 DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
1065 DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
1066 },
1067 },
1068#endif
1069 { }
1070};
1071
1072#endif /* __i386__ */
1073
818/* 1074/*
819 * acpi_boot_table_init() and acpi_boot_init() 1075 * acpi_boot_table_init() and acpi_boot_init()
820 * called from setup_arch(), always. 1076 * called from setup_arch(), always.
@@ -843,6 +1099,10 @@ acpi_boot_table_init(void)
843{ 1099{
844 int error; 1100 int error;
845 1101
1102#ifdef __i386__
1103 dmi_check_system(acpi_dmi_table);
1104#endif
1105
846 /* 1106 /*
847 * If acpi_disabled, bail out 1107 * If acpi_disabled, bail out
848 * One exception: acpi=ht continues far enough to enumerate LAPICs 1108 * One exception: acpi=ht continues far enough to enumerate LAPICs
@@ -870,8 +1130,6 @@ acpi_boot_table_init(void)
870 */ 1130 */
871 error = acpi_blacklisted(); 1131 error = acpi_blacklisted();
872 if (error) { 1132 if (error) {
873 extern int acpi_force;
874
875 if (acpi_force) { 1133 if (acpi_force) {
876 printk(KERN_WARNING PREFIX "acpi=force override\n"); 1134 printk(KERN_WARNING PREFIX "acpi=force override\n");
877 } else { 1135 } else {
@@ -907,7 +1165,6 @@ int __init acpi_boot_init(void)
907 acpi_process_madt(); 1165 acpi_process_madt();
908 1166
909 acpi_table_parse(ACPI_HPET, acpi_parse_hpet); 1167 acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
910 acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
911 1168
912 return 0; 1169 return 0;
913} 1170}
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
index 28bb0514bb6e..c1af93032ff3 100644
--- a/arch/i386/kernel/acpi/sleep.c
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/acpi.h> 8#include <linux/acpi.h>
9#include <linux/bootmem.h> 9#include <linux/bootmem.h>
10#include <linux/dmi.h>
10#include <asm/smp.h> 11#include <asm/smp.h>
11#include <asm/tlbflush.h> 12#include <asm/tlbflush.h>
12 13
@@ -91,3 +92,29 @@ static int __init acpi_sleep_setup(char *str)
91 92
92 93
93__setup("acpi_sleep=", acpi_sleep_setup); 94__setup("acpi_sleep=", acpi_sleep_setup);
95
96
97static __init int reset_videomode_after_s3(struct dmi_system_id *d)
98{
99 acpi_video_flags |= 2;
100 return 0;
101}
102
103static __initdata struct dmi_system_id acpisleep_dmi_table[] = {
104 { /* Reset video mode after returning from ACPI S3 sleep */
105 .callback = reset_videomode_after_s3,
106 .ident = "Toshiba Satellite 4030cdt",
107 .matches = {
108 DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
109 },
110 },
111 { }
112};
113
114static int __init acpisleep_dmi_init(void)
115{
116 dmi_check_system(acpisleep_dmi_table);
117 return 0;
118}
119
120core_initcall(acpisleep_dmi_init);
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index d509836b70c3..bd1dbf3bd223 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
26#include <linux/mc146818rtc.h> 26#include <linux/mc146818rtc.h>
27#include <linux/kernel_stat.h> 27#include <linux/kernel_stat.h>
28#include <linux/sysdev.h> 28#include <linux/sysdev.h>
29#include <linux/cpu.h>
29 30
30#include <asm/atomic.h> 31#include <asm/atomic.h>
31#include <asm/smp.h> 32#include <asm/smp.h>
@@ -34,12 +35,18 @@
34#include <asm/desc.h> 35#include <asm/desc.h>
35#include <asm/arch_hooks.h> 36#include <asm/arch_hooks.h>
36#include <asm/hpet.h> 37#include <asm/hpet.h>
38#include <asm/i8253.h>
37 39
38#include <mach_apic.h> 40#include <mach_apic.h>
39 41
40#include "io_ports.h" 42#include "io_ports.h"
41 43
42/* 44/*
45 * Knob to control our willingness to enable the local APIC.
46 */
47int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
48
49/*
43 * Debug level 50 * Debug level
44 */ 51 */
45int apic_verbosity; 52int apic_verbosity;
@@ -205,7 +212,7 @@ void __init connect_bsp_APIC(void)
205 enable_apic_mode(); 212 enable_apic_mode();
206} 213}
207 214
208void disconnect_bsp_APIC(void) 215void disconnect_bsp_APIC(int virt_wire_setup)
209{ 216{
210 if (pic_mode) { 217 if (pic_mode) {
211 /* 218 /*
@@ -219,6 +226,42 @@ void disconnect_bsp_APIC(void)
219 outb(0x70, 0x22); 226 outb(0x70, 0x22);
220 outb(0x00, 0x23); 227 outb(0x00, 0x23);
221 } 228 }
229 else {
230 /* Go back to Virtual Wire compatibility mode */
231 unsigned long value;
232
233 /* For the spurious interrupt use vector F, and enable it */
234 value = apic_read(APIC_SPIV);
235 value &= ~APIC_VECTOR_MASK;
236 value |= APIC_SPIV_APIC_ENABLED;
237 value |= 0xf;
238 apic_write_around(APIC_SPIV, value);
239
240 if (!virt_wire_setup) {
241 /* For LVT0 make it edge triggered, active high, external and enabled */
242 value = apic_read(APIC_LVT0);
243 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
244 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
245 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
246 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
247 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
248 apic_write_around(APIC_LVT0, value);
249 }
250 else {
251 /* Disable LVT0 */
252 apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
253 }
254
255 /* For LVT1 make it edge triggered, active high, nmi and enabled */
256 value = apic_read(APIC_LVT1);
257 value &= ~(
258 APIC_MODE_MASK | APIC_SEND_PENDING |
259 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
260 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
261 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
262 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
263 apic_write_around(APIC_LVT1, value);
264 }
222} 265}
223 266
224void disable_local_APIC(void) 267void disable_local_APIC(void)
@@ -363,7 +406,7 @@ void __init init_bsp_APIC(void)
363 apic_write_around(APIC_LVT1, value); 406 apic_write_around(APIC_LVT1, value);
364} 407}
365 408
366void __init setup_local_APIC (void) 409void __devinit setup_local_APIC(void)
367{ 410{
368 unsigned long oldvalue, value, ver, maxlvt; 411 unsigned long oldvalue, value, ver, maxlvt;
369 412
@@ -634,7 +677,7 @@ static struct sys_device device_lapic = {
634 .cls = &lapic_sysclass, 677 .cls = &lapic_sysclass,
635}; 678};
636 679
637static void __init apic_pm_activate(void) 680static void __devinit apic_pm_activate(void)
638{ 681{
639 apic_pm_state.active = 1; 682 apic_pm_state.active = 1;
640} 683}
@@ -665,26 +708,6 @@ static void apic_pm_activate(void) { }
665 * Original code written by Keir Fraser. 708 * Original code written by Keir Fraser.
666 */ 709 */
667 710
668/*
669 * Knob to control our willingness to enable the local APIC.
670 */
671int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
672
673static int __init lapic_disable(char *str)
674{
675 enable_local_apic = -1;
676 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
677 return 0;
678}
679__setup("nolapic", lapic_disable);
680
681static int __init lapic_enable(char *str)
682{
683 enable_local_apic = 1;
684 return 0;
685}
686__setup("lapic", lapic_enable);
687
688static int __init apic_set_verbosity(char *str) 711static int __init apic_set_verbosity(char *str)
689{ 712{
690 if (strcmp("debug", str) == 0) 713 if (strcmp("debug", str) == 0)
@@ -855,9 +878,8 @@ fake_ioapic_page:
855 * but we do not accept timer interrupts yet. We only allow the BP 878 * but we do not accept timer interrupts yet. We only allow the BP
856 * to calibrate. 879 * to calibrate.
857 */ 880 */
858static unsigned int __init get_8254_timer_count(void) 881static unsigned int __devinit get_8254_timer_count(void)
859{ 882{
860 extern spinlock_t i8253_lock;
861 unsigned long flags; 883 unsigned long flags;
862 884
863 unsigned int count; 885 unsigned int count;
@@ -874,7 +896,7 @@ static unsigned int __init get_8254_timer_count(void)
874} 896}
875 897
876/* next tick in 8254 can be caught by catching timer wraparound */ 898/* next tick in 8254 can be caught by catching timer wraparound */
877static void __init wait_8254_wraparound(void) 899static void __devinit wait_8254_wraparound(void)
878{ 900{
879 unsigned int curr_count, prev_count; 901 unsigned int curr_count, prev_count;
880 902
@@ -894,7 +916,7 @@ static void __init wait_8254_wraparound(void)
894 * Default initialization for 8254 timers. If we use other timers like HPET, 916 * Default initialization for 8254 timers. If we use other timers like HPET,
895 * we override this later 917 * we override this later
896 */ 918 */
897void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound; 919void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
898 920
899/* 921/*
900 * This function sets up the local APIC timer, with a timeout of 922 * This function sets up the local APIC timer, with a timeout of
@@ -930,7 +952,7 @@ static void __setup_APIC_LVTT(unsigned int clocks)
930 apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); 952 apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
931} 953}
932 954
933static void __init setup_APIC_timer(unsigned int clocks) 955static void __devinit setup_APIC_timer(unsigned int clocks)
934{ 956{
935 unsigned long flags; 957 unsigned long flags;
936 958
@@ -1043,12 +1065,12 @@ void __init setup_boot_APIC_clock(void)
1043 local_irq_enable(); 1065 local_irq_enable();
1044} 1066}
1045 1067
1046void __init setup_secondary_APIC_clock(void) 1068void __devinit setup_secondary_APIC_clock(void)
1047{ 1069{
1048 setup_APIC_timer(calibration_result); 1070 setup_APIC_timer(calibration_result);
1049} 1071}
1050 1072
1051void __init disable_APIC_timer(void) 1073void __devinit disable_APIC_timer(void)
1052{ 1074{
1053 if (using_apic_timer) { 1075 if (using_apic_timer) {
1054 unsigned long v; 1076 unsigned long v;
@@ -1133,7 +1155,7 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs)
1133 } 1155 }
1134 1156
1135#ifdef CONFIG_SMP 1157#ifdef CONFIG_SMP
1136 update_process_times(user_mode(regs)); 1158 update_process_times(user_mode_vm(regs));
1137#endif 1159#endif
1138 } 1160 }
1139 1161
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 0ff65abcd56c..064211d5f41b 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -228,10 +228,10 @@
228#include <asm/system.h> 228#include <asm/system.h>
229#include <asm/uaccess.h> 229#include <asm/uaccess.h>
230#include <asm/desc.h> 230#include <asm/desc.h>
231#include <asm/i8253.h>
231 232
232#include "io_ports.h" 233#include "io_ports.h"
233 234
234extern spinlock_t i8253_lock;
235extern unsigned long get_cmos_time(void); 235extern unsigned long get_cmos_time(void);
236extern void machine_real_restart(unsigned char *, int); 236extern void machine_real_restart(unsigned char *, int);
237 237
@@ -346,10 +346,10 @@ extern int (*console_blank_hook)(int);
346struct apm_user { 346struct apm_user {
347 int magic; 347 int magic;
348 struct apm_user * next; 348 struct apm_user * next;
349 int suser: 1; 349 unsigned int suser: 1;
350 int writer: 1; 350 unsigned int writer: 1;
351 int reader: 1; 351 unsigned int reader: 1;
352 int suspend_wait: 1; 352 unsigned int suspend_wait: 1;
353 int suspend_result; 353 int suspend_result;
354 int suspends_pending; 354 int suspends_pending;
355 int standbys_pending; 355 int standbys_pending;
@@ -1168,8 +1168,7 @@ static void get_time_diff(void)
1168static void reinit_timer(void) 1168static void reinit_timer(void)
1169{ 1169{
1170#ifdef INIT_TIMER_AFTER_SUSPEND 1170#ifdef INIT_TIMER_AFTER_SUSPEND
1171 unsigned long flags; 1171 unsigned long flags;
1172 extern spinlock_t i8253_lock;
1173 1172
1174 spin_lock_irqsave(&i8253_lock, flags); 1173 spin_lock_irqsave(&i8253_lock, flags);
1175 /* set the clock to 100 Hz */ 1174 /* set the clock to 100 Hz */
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index d199e525680a..2203a9d20212 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -24,9 +24,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
24DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); 24DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
25EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); 25EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
26 26
27static int cachesize_override __initdata = -1; 27static int cachesize_override __devinitdata = -1;
28static int disable_x86_fxsr __initdata = 0; 28static int disable_x86_fxsr __devinitdata = 0;
29static int disable_x86_serial_nr __initdata = 1; 29static int disable_x86_serial_nr __devinitdata = 1;
30 30
31struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; 31struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
32 32
@@ -59,7 +59,7 @@ static int __init cachesize_setup(char *str)
59} 59}
60__setup("cachesize=", cachesize_setup); 60__setup("cachesize=", cachesize_setup);
61 61
62int __init get_model_name(struct cpuinfo_x86 *c) 62int __devinit get_model_name(struct cpuinfo_x86 *c)
63{ 63{
64 unsigned int *v; 64 unsigned int *v;
65 char *p, *q; 65 char *p, *q;
@@ -89,7 +89,7 @@ int __init get_model_name(struct cpuinfo_x86 *c)
89} 89}
90 90
91 91
92void __init display_cacheinfo(struct cpuinfo_x86 *c) 92void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
93{ 93{
94 unsigned int n, dummy, ecx, edx, l2size; 94 unsigned int n, dummy, ecx, edx, l2size;
95 95
@@ -130,7 +130,7 @@ void __init display_cacheinfo(struct cpuinfo_x86 *c)
130/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ 130/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
131 131
132/* Look up CPU names by table lookup. */ 132/* Look up CPU names by table lookup. */
133static char __init *table_lookup_model(struct cpuinfo_x86 *c) 133static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
134{ 134{
135 struct cpu_model_info *info; 135 struct cpu_model_info *info;
136 136
@@ -151,7 +151,7 @@ static char __init *table_lookup_model(struct cpuinfo_x86 *c)
151} 151}
152 152
153 153
154void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early) 154void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
155{ 155{
156 char *v = c->x86_vendor_id; 156 char *v = c->x86_vendor_id;
157 int i; 157 int i;
@@ -202,7 +202,7 @@ static inline int flag_is_changeable_p(u32 flag)
202 202
203 203
204/* Probe for the CPUID instruction */ 204/* Probe for the CPUID instruction */
205static int __init have_cpuid_p(void) 205static int __devinit have_cpuid_p(void)
206{ 206{
207 return flag_is_changeable_p(X86_EFLAGS_ID); 207 return flag_is_changeable_p(X86_EFLAGS_ID);
208} 208}
@@ -249,7 +249,7 @@ static void __init early_cpu_detect(void)
249#endif 249#endif
250} 250}
251 251
252void __init generic_identify(struct cpuinfo_x86 * c) 252void __devinit generic_identify(struct cpuinfo_x86 * c)
253{ 253{
254 u32 tfms, xlvl; 254 u32 tfms, xlvl;
255 int junk; 255 int junk;
@@ -296,7 +296,7 @@ void __init generic_identify(struct cpuinfo_x86 * c)
296 } 296 }
297} 297}
298 298
299static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 299static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
300{ 300{
301 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { 301 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
302 /* Disable processor serial number */ 302 /* Disable processor serial number */
@@ -324,7 +324,7 @@ __setup("serialnumber", x86_serial_nr_setup);
324/* 324/*
325 * This does the hard work of actually picking apart the CPU stuff... 325 * This does the hard work of actually picking apart the CPU stuff...
326 */ 326 */
327void __init identify_cpu(struct cpuinfo_x86 *c) 327void __devinit identify_cpu(struct cpuinfo_x86 *c)
328{ 328{
329 int i; 329 int i;
330 330
@@ -432,10 +432,13 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
432#ifdef CONFIG_X86_MCE 432#ifdef CONFIG_X86_MCE
433 mcheck_init(c); 433 mcheck_init(c);
434#endif 434#endif
435 if (c == &boot_cpu_data)
436 sysenter_setup();
437 enable_sep_cpu();
435} 438}
436 439
437#ifdef CONFIG_X86_HT 440#ifdef CONFIG_X86_HT
438void __init detect_ht(struct cpuinfo_x86 *c) 441void __devinit detect_ht(struct cpuinfo_x86 *c)
439{ 442{
440 u32 eax, ebx, ecx, edx; 443 u32 eax, ebx, ecx, edx;
441 int index_msb, tmp; 444 int index_msb, tmp;
@@ -490,7 +493,7 @@ void __init detect_ht(struct cpuinfo_x86 *c)
490} 493}
491#endif 494#endif
492 495
493void __init print_cpu_info(struct cpuinfo_x86 *c) 496void __devinit print_cpu_info(struct cpuinfo_x86 *c)
494{ 497{
495 char *vendor = NULL; 498 char *vendor = NULL;
496 499
@@ -513,7 +516,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c)
513 printk("\n"); 516 printk("\n");
514} 517}
515 518
516cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; 519cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
517 520
518/* This is hacky. :) 521/* This is hacky. :)
519 * We're emulating future behavior. 522 * We're emulating future behavior.
@@ -560,7 +563,7 @@ void __init early_cpu_init(void)
560 * and IDT. We reload them nevertheless, this function acts as a 563 * and IDT. We reload them nevertheless, this function acts as a
561 * 'CPU state barrier', nothing should get across. 564 * 'CPU state barrier', nothing should get across.
562 */ 565 */
563void __init cpu_init (void) 566void __devinit cpu_init(void)
564{ 567{
565 int cpu = smp_processor_id(); 568 int cpu = smp_processor_id();
566 struct tss_struct * t = &per_cpu(init_tss, cpu); 569 struct tss_struct * t = &per_cpu(init_tss, cpu);
@@ -635,7 +638,7 @@ void __init cpu_init (void)
635 638
636 /* Clear all 6 debug registers: */ 639 /* Clear all 6 debug registers: */
637 640
638#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); 641#define CD(register) set_debugreg(0, register)
639 642
640 CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); 643 CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
641 644
@@ -648,3 +651,15 @@ void __init cpu_init (void)
648 clear_used_math(); 651 clear_used_math();
649 mxcsr_feature_mask_init(); 652 mxcsr_feature_mask_init();
650} 653}
654
655#ifdef CONFIG_HOTPLUG_CPU
656void __devinit cpu_uninit(void)
657{
658 int cpu = raw_smp_processor_id();
659 cpu_clear(cpu, cpu_initialized);
660
661 /* lazy TLB state */
662 per_cpu(cpu_tlbstate, cpu).state = 0;
663 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
664}
665#endif
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
index 5c530064eb74..73a5dc5b26b8 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -648,9 +648,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
648 } 648 }
649#endif 649#endif
650 650
651 if (powernow_table) 651 kfree(powernow_table);
652 kfree(powernow_table);
653
654 return 0; 652 return 0;
655} 653}
656 654
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 121aa2176e69..96a75d045835 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -28,7 +28,7 @@ extern int trap_init_f00f_bug(void);
28struct movsl_mask movsl_mask; 28struct movsl_mask movsl_mask;
29#endif 29#endif
30 30
31void __init early_intel_workaround(struct cpuinfo_x86 *c) 31void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
32{ 32{
33 if (c->x86_vendor != X86_VENDOR_INTEL) 33 if (c->x86_vendor != X86_VENDOR_INTEL)
34 return; 34 return;
@@ -43,7 +43,7 @@ void __init early_intel_workaround(struct cpuinfo_x86 *c)
43 * This is called before we do cpu ident work 43 * This is called before we do cpu ident work
44 */ 44 */
45 45
46int __init ppro_with_ram_bug(void) 46int __devinit ppro_with_ram_bug(void)
47{ 47{
48 /* Uses data from early_cpu_detect now */ 48 /* Uses data from early_cpu_detect now */
49 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 49 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -61,7 +61,7 @@ int __init ppro_with_ram_bug(void)
61 * P4 Xeon errata 037 workaround. 61 * P4 Xeon errata 037 workaround.
62 * Hardware prefetcher may cause stale data to be loaded into the cache. 62 * Hardware prefetcher may cause stale data to be loaded into the cache.
63 */ 63 */
64static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) 64static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
65{ 65{
66 unsigned long lo, hi; 66 unsigned long lo, hi;
67 67
@@ -80,7 +80,7 @@ static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
80/* 80/*
81 * find out the number of processor cores on the die 81 * find out the number of processor cores on the die
82 */ 82 */
83static int __init num_cpu_cores(struct cpuinfo_x86 *c) 83static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
84{ 84{
85 unsigned int eax; 85 unsigned int eax;
86 86
@@ -98,7 +98,7 @@ static int __init num_cpu_cores(struct cpuinfo_x86 *c)
98 return 1; 98 return 1;
99} 99}
100 100
101static void __init init_intel(struct cpuinfo_x86 *c) 101static void __devinit init_intel(struct cpuinfo_x86 *c)
102{ 102{
103 unsigned int l2 = 0; 103 unsigned int l2 = 0;
104 char *p = NULL; 104 char *p = NULL;
@@ -204,7 +204,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
204 return size; 204 return size;
205} 205}
206 206
207static struct cpu_dev intel_cpu_dev __initdata = { 207static struct cpu_dev intel_cpu_dev __devinitdata = {
208 .c_vendor = "Intel", 208 .c_vendor = "Intel",
209 .c_ident = { "GenuineIntel" }, 209 .c_ident = { "GenuineIntel" },
210 .c_models = { 210 .c_models = {
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index a710dc4eb20e..1d768b263269 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -28,7 +28,7 @@ struct _cache_table
28}; 28};
29 29
30/* all the cache descriptor types we care about (no TLB or trace cache entries) */ 30/* all the cache descriptor types we care about (no TLB or trace cache entries) */
31static struct _cache_table cache_table[] __initdata = 31static struct _cache_table cache_table[] __devinitdata =
32{ 32{
33 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 33 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
34 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 34 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -160,7 +160,7 @@ static int __init find_num_cache_leaves(void)
160 return retval; 160 return retval;
161} 161}
162 162
163unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c) 163unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
164{ 164{
165 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ 165 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
166 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 166 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
index 8df52e86c4d2..c4abe7657397 100644
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -69,7 +69,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
69 69
70 70
71/* AMD K7 machine check is Intel like */ 71/* AMD K7 machine check is Intel like */
72void __init amd_mcheck_init(struct cpuinfo_x86 *c) 72void __devinit amd_mcheck_init(struct cpuinfo_x86 *c)
73{ 73{
74 u32 l, h; 74 u32 l, h;
75 int i; 75 int i;
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index bf6d1aefafc0..2cf25d2ba0f1 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -16,7 +16,7 @@
16 16
17#include "mce.h" 17#include "mce.h"
18 18
19int mce_disabled __initdata = 0; 19int mce_disabled __devinitdata = 0;
20int nr_mce_banks; 20int nr_mce_banks;
21 21
22EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ 22EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
@@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_
31void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; 31void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
32 32
33/* This has to be run for each processor */ 33/* This has to be run for each processor */
34void __init mcheck_init(struct cpuinfo_x86 *c) 34void __devinit mcheck_init(struct cpuinfo_x86 *c)
35{ 35{
36 if (mce_disabled==1) 36 if (mce_disabled==1)
37 return; 37 return;
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 8b16ceb929b4..0abccb6fdf9e 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -78,7 +78,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
78} 78}
79 79
80/* P4/Xeon Thermal regulation detect and init */ 80/* P4/Xeon Thermal regulation detect and init */
81static void __init intel_init_thermal(struct cpuinfo_x86 *c) 81static void __devinit intel_init_thermal(struct cpuinfo_x86 *c)
82{ 82{
83 u32 l, h; 83 u32 l, h;
84 unsigned int cpu = smp_processor_id(); 84 unsigned int cpu = smp_processor_id();
@@ -232,7 +232,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
232} 232}
233 233
234 234
235void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c) 235void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c)
236{ 236{
237 u32 l, h; 237 u32 l, h;
238 int i; 238 int i;
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c
index c45a1b485c80..ec0614cd2925 100644
--- a/arch/i386/kernel/cpu/mcheck/p5.c
+++ b/arch/i386/kernel/cpu/mcheck/p5.c
@@ -29,7 +29,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod
29} 29}
30 30
31/* Set up machine check reporting for processors with Intel style MCE */ 31/* Set up machine check reporting for processors with Intel style MCE */
32void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c) 32void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c)
33{ 33{
34 u32 l, h; 34 u32 l, h;
35 35
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c
index 46640f8c2494..f01b73f947e1 100644
--- a/arch/i386/kernel/cpu/mcheck/p6.c
+++ b/arch/i386/kernel/cpu/mcheck/p6.c
@@ -80,7 +80,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
80} 80}
81 81
82/* Set up machine check reporting for processors with Intel style MCE */ 82/* Set up machine check reporting for processors with Intel style MCE */
83void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c) 83void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c)
84{ 84{
85 u32 l, h; 85 u32 l, h;
86 int i; 86 int i;
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c
index 753fa7acb984..7bae68fa168f 100644
--- a/arch/i386/kernel/cpu/mcheck/winchip.c
+++ b/arch/i386/kernel/cpu/mcheck/winchip.c
@@ -23,7 +23,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod
23} 23}
24 24
25/* Set up machine check reporting on the Winchip C6 series */ 25/* Set up machine check reporting on the Winchip C6 series */
26void __init winchip_mcheck_init(struct cpuinfo_x86 *c) 26void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c)
27{ 27{
28 u32 lo, hi; 28 u32 lo, hi;
29 machine_check_vector = winchip_machine_check; 29 machine_check_vector = winchip_machine_check;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f468a979e9aa..64d91f73a0a4 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -70,8 +70,7 @@ void __init get_mtrr_state(void)
70/* Free resources associated with a struct mtrr_state */ 70/* Free resources associated with a struct mtrr_state */
71void __init finalize_mtrr_state(void) 71void __init finalize_mtrr_state(void)
72{ 72{
73 if (mtrr_state.var_ranges) 73 kfree(mtrr_state.var_ranges);
74 kfree(mtrr_state.var_ranges);
75 mtrr_state.var_ranges = NULL; 74 mtrr_state.var_ranges = NULL;
76} 75}
77 76
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index e1c2042b9b7e..d66b09e0c820 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -375,6 +375,19 @@ int mtrr_add_page(unsigned long base, unsigned long size,
375 return error; 375 return error;
376} 376}
377 377
378static int mtrr_check(unsigned long base, unsigned long size)
379{
380 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
381 printk(KERN_WARNING
382 "mtrr: size and base must be multiples of 4 kiB\n");
383 printk(KERN_DEBUG
384 "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
385 dump_stack();
386 return -1;
387 }
388 return 0;
389}
390
378/** 391/**
379 * mtrr_add - Add a memory type region 392 * mtrr_add - Add a memory type region
380 * @base: Physical base address of region 393 * @base: Physical base address of region
@@ -415,11 +428,8 @@ int
415mtrr_add(unsigned long base, unsigned long size, unsigned int type, 428mtrr_add(unsigned long base, unsigned long size, unsigned int type,
416 char increment) 429 char increment)
417{ 430{
418 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 431 if (mtrr_check(base, size))
419 printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n");
420 printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
421 return -EINVAL; 432 return -EINVAL;
422 }
423 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, 433 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
424 increment); 434 increment);
425} 435}
@@ -511,11 +521,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
511int 521int
512mtrr_del(int reg, unsigned long base, unsigned long size) 522mtrr_del(int reg, unsigned long base, unsigned long size)
513{ 523{
514 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 524 if (mtrr_check(base, size))
515 printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
516 printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
517 return -EINVAL; 525 return -EINVAL;
518 }
519 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); 526 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
520} 527}
521 528
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 7323c19f354e..8bd77d948a84 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -86,7 +86,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
86 seq_printf(m, "stepping\t: unknown\n"); 86 seq_printf(m, "stepping\t: unknown\n");
87 87
88 if ( cpu_has(c, X86_FEATURE_TSC) ) { 88 if ( cpu_has(c, X86_FEATURE_TSC) ) {
89 seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", 89 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
90 cpu_khz / 1000, (cpu_khz % 1000)); 90 cpu_khz / 1000, (cpu_khz % 1000));
91 } 91 }
92 92
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
index 2e2756345bb2..4647db4ad6de 100644
--- a/arch/i386/kernel/cpuid.c
+++ b/arch/i386/kernel/cpuid.c
@@ -45,7 +45,7 @@
45#include <asm/uaccess.h> 45#include <asm/uaccess.h>
46#include <asm/system.h> 46#include <asm/system.h>
47 47
48static struct class_simple *cpuid_class; 48static struct class *cpuid_class;
49 49
50#ifdef CONFIG_SMP 50#ifdef CONFIG_SMP
51 51
@@ -158,12 +158,12 @@ static struct file_operations cpuid_fops = {
158 .open = cpuid_open, 158 .open = cpuid_open,
159}; 159};
160 160
161static int cpuid_class_simple_device_add(int i) 161static int cpuid_class_device_create(int i)
162{ 162{
163 int err = 0; 163 int err = 0;
164 struct class_device *class_err; 164 struct class_device *class_err;
165 165
166 class_err = class_simple_device_add(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); 166 class_err = class_device_create(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i);
167 if (IS_ERR(class_err)) 167 if (IS_ERR(class_err))
168 err = PTR_ERR(class_err); 168 err = PTR_ERR(class_err);
169 return err; 169 return err;
@@ -175,10 +175,10 @@ static int __devinit cpuid_class_cpu_callback(struct notifier_block *nfb, unsign
175 175
176 switch (action) { 176 switch (action) {
177 case CPU_ONLINE: 177 case CPU_ONLINE:
178 cpuid_class_simple_device_add(cpu); 178 cpuid_class_device_create(cpu);
179 break; 179 break;
180 case CPU_DEAD: 180 case CPU_DEAD:
181 class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu)); 181 class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
182 break; 182 break;
183 } 183 }
184 return NOTIFY_OK; 184 return NOTIFY_OK;
@@ -200,13 +200,13 @@ static int __init cpuid_init(void)
200 err = -EBUSY; 200 err = -EBUSY;
201 goto out; 201 goto out;
202 } 202 }
203 cpuid_class = class_simple_create(THIS_MODULE, "cpuid"); 203 cpuid_class = class_create(THIS_MODULE, "cpuid");
204 if (IS_ERR(cpuid_class)) { 204 if (IS_ERR(cpuid_class)) {
205 err = PTR_ERR(cpuid_class); 205 err = PTR_ERR(cpuid_class);
206 goto out_chrdev; 206 goto out_chrdev;
207 } 207 }
208 for_each_online_cpu(i) { 208 for_each_online_cpu(i) {
209 err = cpuid_class_simple_device_add(i); 209 err = cpuid_class_device_create(i);
210 if (err != 0) 210 if (err != 0)
211 goto out_class; 211 goto out_class;
212 } 212 }
@@ -218,9 +218,9 @@ static int __init cpuid_init(void)
218out_class: 218out_class:
219 i = 0; 219 i = 0;
220 for_each_online_cpu(i) { 220 for_each_online_cpu(i) {
221 class_simple_device_remove(MKDEV(CPUID_MAJOR, i)); 221 class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i));
222 } 222 }
223 class_simple_destroy(cpuid_class); 223 class_destroy(cpuid_class);
224out_chrdev: 224out_chrdev:
225 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); 225 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
226out: 226out:
@@ -232,8 +232,8 @@ static void __exit cpuid_exit(void)
232 int cpu = 0; 232 int cpu = 0;
233 233
234 for_each_online_cpu(cpu) 234 for_each_online_cpu(cpu)
235 class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu)); 235 class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
236 class_simple_destroy(cpuid_class); 236 class_destroy(cpuid_class);
237 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); 237 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
238 unregister_cpu_notifier(&cpuid_class_cpu_notifier); 238 unregister_cpu_notifier(&cpuid_class_cpu_notifier);
239} 239}
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
new file mode 100644
index 000000000000..e5fab12f7926
--- /dev/null
+++ b/arch/i386/kernel/crash.c
@@ -0,0 +1,223 @@
1/*
2 * Architecture specific (i386) functions for kexec based crash dumps.
3 *
4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
5 *
6 * Copyright (C) IBM Corporation, 2004. All rights reserved.
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/types.h>
12#include <linux/kernel.h>
13#include <linux/smp.h>
14#include <linux/irq.h>
15#include <linux/reboot.h>
16#include <linux/kexec.h>
17#include <linux/irq.h>
18#include <linux/delay.h>
19#include <linux/elf.h>
20#include <linux/elfcore.h>
21
22#include <asm/processor.h>
23#include <asm/hardirq.h>
24#include <asm/nmi.h>
25#include <asm/hw_irq.h>
26#include <asm/apic.h>
27#include <mach_ipi.h>
28
29
30note_buf_t crash_notes[NR_CPUS];
31/* This keeps a track of which one is crashing cpu. */
32static int crashing_cpu;
33
34static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
35 size_t data_len)
36{
37 struct elf_note note;
38
39 note.n_namesz = strlen(name) + 1;
40 note.n_descsz = data_len;
41 note.n_type = type;
42 memcpy(buf, &note, sizeof(note));
43 buf += (sizeof(note) +3)/4;
44 memcpy(buf, name, note.n_namesz);
45 buf += (note.n_namesz + 3)/4;
46 memcpy(buf, data, note.n_descsz);
47 buf += (note.n_descsz + 3)/4;
48
49 return buf;
50}
51
52static void final_note(u32 *buf)
53{
54 struct elf_note note;
55
56 note.n_namesz = 0;
57 note.n_descsz = 0;
58 note.n_type = 0;
59 memcpy(buf, &note, sizeof(note));
60}
61
62static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
63{
64 struct elf_prstatus prstatus;
65 u32 *buf;
66
67 if ((cpu < 0) || (cpu >= NR_CPUS))
68 return;
69
70 /* Using ELF notes here is opportunistic.
71 * I need a well defined structure format
72 * for the data I pass, and I need tags
73 * on the data to indicate what information I have
74 * squirrelled away. ELF notes happen to provide
75 * all of that that no need to invent something new.
76 */
77 buf = &crash_notes[cpu][0];
78 memset(&prstatus, 0, sizeof(prstatus));
79 prstatus.pr_pid = current->pid;
80 elf_core_copy_regs(&prstatus.pr_reg, regs);
81 buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
82 sizeof(prstatus));
83 final_note(buf);
84}
85
86static void crash_get_current_regs(struct pt_regs *regs)
87{
88 __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
89 __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
90 __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
91 __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
92 __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
93 __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
94 __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
95 __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
96 __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
97 __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
98 __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
99 __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
100 __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
101
102 regs->eip = (unsigned long)current_text_addr();
103}
104
105/* CPU does not save ss and esp on stack if execution is already
106 * running in kernel mode at the time of NMI occurrence. This code
107 * fixes it.
108 */
109static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
110{
111 memcpy(newregs, oldregs, sizeof(*newregs));
112 newregs->esp = (unsigned long)&(oldregs->esp);
113 __asm__ __volatile__("xorl %eax, %eax;");
114 __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss));
115}
116
117/* We may have saved_regs from where the error came from
118 * or it is NULL if via a direct panic().
119 */
120static void crash_save_self(struct pt_regs *saved_regs)
121{
122 struct pt_regs regs;
123 int cpu;
124
125 cpu = smp_processor_id();
126 if (saved_regs)
127 crash_setup_regs(&regs, saved_regs);
128 else
129 crash_get_current_regs(&regs);
130 crash_save_this_cpu(&regs, cpu);
131}
132
133#ifdef CONFIG_SMP
134static atomic_t waiting_for_crash_ipi;
135
136static int crash_nmi_callback(struct pt_regs *regs, int cpu)
137{
138 struct pt_regs fixed_regs;
139
140 /* Don't do anything if this handler is invoked on crashing cpu.
141 * Otherwise, system will completely hang. Crashing cpu can get
142 * an NMI if system was initially booted with nmi_watchdog parameter.
143 */
144 if (cpu == crashing_cpu)
145 return 1;
146 local_irq_disable();
147
148 if (!user_mode(regs)) {
149 crash_setup_regs(&fixed_regs, regs);
150 regs = &fixed_regs;
151 }
152 crash_save_this_cpu(regs, cpu);
153 disable_local_APIC();
154 atomic_dec(&waiting_for_crash_ipi);
155 /* Assume hlt works */
156 __asm__("hlt");
157 for(;;);
158
159 return 1;
160}
161
162/*
163 * By using the NMI code instead of a vector we just sneak thru the
164 * word generator coming out with just what we want. AND it does
165 * not matter if clustered_apic_mode is set or not.
166 */
167static void smp_send_nmi_allbutself(void)
168{
169 send_IPI_allbutself(APIC_DM_NMI);
170}
171
172static void nmi_shootdown_cpus(void)
173{
174 unsigned long msecs;
175
176 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
177 /* Would it be better to replace the trap vector here? */
178 set_nmi_callback(crash_nmi_callback);
179 /* Ensure the new callback function is set before sending
180 * out the NMI
181 */
182 wmb();
183
184 smp_send_nmi_allbutself();
185
186 msecs = 1000; /* Wait at most a second for the other cpus to stop */
187 while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
188 mdelay(1);
189 msecs--;
190 }
191
192 /* Leave the nmi callback set */
193 disable_local_APIC();
194}
195#else
196static void nmi_shootdown_cpus(void)
197{
198 /* There are no cpus to shootdown */
199}
200#endif
201
202void machine_crash_shutdown(struct pt_regs *regs)
203{
204 /* This function is only called after the system
205 * has paniced or is otherwise in a critical state.
206 * The minimum amount of code to allow a kexec'd kernel
207 * to run successfully needs to happen here.
208 *
209 * In practice this means shooting down the other cpus in
210 * an SMP system.
211 */
212 /* The kernel is broken so disable interrupts */
213 local_irq_disable();
214
215 /* Make a note of crashing cpu. Will be used in NMI callback.*/
216 crashing_cpu = smp_processor_id();
217 nmi_shootdown_cpus();
218 lapic_shutdown();
219#if defined(CONFIG_X86_IO_APIC)
220 disable_IO_APIC();
221#endif
222 crash_save_self(regs);
223}
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
index 6ed7e28f306c..a3cdf894302b 100644
--- a/arch/i386/kernel/dmi_scan.c
+++ b/arch/i386/kernel/dmi_scan.c
@@ -1,22 +1,15 @@
1#include <linux/types.h> 1#include <linux/types.h>
2#include <linux/kernel.h>
3#include <linux/string.h> 2#include <linux/string.h>
4#include <linux/init.h> 3#include <linux/init.h>
5#include <linux/module.h> 4#include <linux/module.h>
6#include <linux/slab.h>
7#include <linux/acpi.h>
8#include <asm/io.h>
9#include <linux/pm.h>
10#include <asm/system.h>
11#include <linux/dmi.h> 5#include <linux/dmi.h>
12#include <linux/bootmem.h> 6#include <linux/bootmem.h>
13 7
14 8
15struct dmi_header 9struct dmi_header {
16{ 10 u8 type;
17 u8 type; 11 u8 length;
18 u8 length; 12 u16 handle;
19 u16 handle;
20}; 13};
21 14
22#undef DMI_DEBUG 15#undef DMI_DEBUG
@@ -29,15 +22,13 @@ struct dmi_header
29 22
30static char * __init dmi_string(struct dmi_header *dm, u8 s) 23static char * __init dmi_string(struct dmi_header *dm, u8 s)
31{ 24{
32 u8 *bp=(u8 *)dm; 25 u8 *bp = ((u8 *) dm) + dm->length;
33 bp+=dm->length; 26
34 if(!s) 27 if (!s)
35 return ""; 28 return "";
36 s--; 29 s--;
37 while(s>0 && *bp) 30 while (s > 0 && *bp) {
38 { 31 bp += strlen(bp) + 1;
39 bp+=strlen(bp);
40 bp++;
41 s--; 32 s--;
42 } 33 }
43 return bp; 34 return bp;
@@ -47,16 +38,14 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s)
47 * We have to be cautious here. We have seen BIOSes with DMI pointers 38 * We have to be cautious here. We have seen BIOSes with DMI pointers
48 * pointing to completely the wrong place for example 39 * pointing to completely the wrong place for example
49 */ 40 */
50 41static int __init dmi_table(u32 base, int len, int num,
51static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *)) 42 void (*decode)(struct dmi_header *))
52{ 43{
53 u8 *buf; 44 u8 *buf, *data;
54 struct dmi_header *dm; 45 int i = 0;
55 u8 *data;
56 int i=0;
57 46
58 buf = bt_ioremap(base, len); 47 buf = bt_ioremap(base, len);
59 if(buf==NULL) 48 if (buf == NULL)
60 return -1; 49 return -1;
61 50
62 data = buf; 51 data = buf;
@@ -65,36 +54,34 @@ static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dm
65 * Stop when we see all the items the table claimed to have 54 * Stop when we see all the items the table claimed to have
66 * OR we run off the end of the table (also happens) 55 * OR we run off the end of the table (also happens)
67 */ 56 */
68 57 while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) {
69 while(i<num && data-buf+sizeof(struct dmi_header)<=len) 58 struct dmi_header *dm = (struct dmi_header *)data;
70 {
71 dm=(struct dmi_header *)data;
72 /* 59 /*
73 * We want to know the total length (formated area and strings) 60 * We want to know the total length (formated area and strings)
74 * before decoding to make sure we won't run off the table in 61 * before decoding to make sure we won't run off the table in
75 * dmi_decode or dmi_string 62 * dmi_decode or dmi_string
76 */ 63 */
77 data+=dm->length; 64 data += dm->length;
78 while(data-buf<len-1 && (data[0] || data[1])) 65 while ((data - buf < len - 1) && (data[0] || data[1]))
79 data++; 66 data++;
80 if(data-buf<len-1) 67 if (data - buf < len - 1)
81 decode(dm); 68 decode(dm);
82 data+=2; 69 data += 2;
83 i++; 70 i++;
84 } 71 }
85 bt_iounmap(buf, len); 72 bt_iounmap(buf, len);
86 return 0; 73 return 0;
87} 74}
88 75
89 76static int __init dmi_checksum(u8 *buf)
90inline static int __init dmi_checksum(u8 *buf)
91{ 77{
92 u8 sum=0; 78 u8 sum = 0;
93 int a; 79 int a;
94 80
95 for(a=0; a<15; a++) 81 for (a = 0; a < 15; a++)
96 sum+=buf[a]; 82 sum += buf[a];
97 return (sum==0); 83
84 return sum == 0;
98} 85}
99 86
100static int __init dmi_iterate(void (*decode)(struct dmi_header *)) 87static int __init dmi_iterate(void (*decode)(struct dmi_header *))
@@ -110,28 +97,30 @@ static int __init dmi_iterate(void (*decode)(struct dmi_header *))
110 p = ioremap(0xF0000, 0x10000); 97 p = ioremap(0xF0000, 0x10000);
111 if (p == NULL) 98 if (p == NULL)
112 return -1; 99 return -1;
100
113 for (q = p; q < p + 0x10000; q += 16) { 101 for (q = p; q < p + 0x10000; q += 16) {
114 memcpy_fromio(buf, q, 15); 102 memcpy_fromio(buf, q, 15);
115 if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) 103 if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
116 { 104 u16 num = (buf[13] << 8) | buf[12];
117 u16 num=buf[13]<<8|buf[12]; 105 u16 len = (buf[7] << 8) | buf[6];
118 u16 len=buf[7]<<8|buf[6]; 106 u32 base = (buf[11] << 24) | (buf[10] << 16) |
119 u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8]; 107 (buf[9] << 8) | buf[8];
120 108
121 /* 109 /*
122 * DMI version 0.0 means that the real version is taken from 110 * DMI version 0.0 means that the real version is taken from
123 * the SMBIOS version, which we don't know at this point. 111 * the SMBIOS version, which we don't know at this point.
124 */ 112 */
125 if(buf[14]!=0) 113 if (buf[14] != 0)
126 printk(KERN_INFO "DMI %d.%d present.\n", 114 printk(KERN_INFO "DMI %d.%d present.\n",
127 buf[14]>>4, buf[14]&0x0F); 115 buf[14] >> 4, buf[14] & 0xF);
128 else 116 else
129 printk(KERN_INFO "DMI present.\n"); 117 printk(KERN_INFO "DMI present.\n");
118
130 dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", 119 dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
131 num, len)); 120 num, len));
132 dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", 121 dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base));
133 base)); 122
134 if(dmi_table(base,len, num, decode)==0) 123 if (dmi_table(base,len, num, decode) == 0)
135 return 0; 124 return 0;
136 } 125 }
137 } 126 }
@@ -143,16 +132,17 @@ static char *dmi_ident[DMI_STRING_MAX];
143/* 132/*
144 * Save a DMI string 133 * Save a DMI string
145 */ 134 */
146
147static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) 135static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
148{ 136{
149 char *d = (char*)dm; 137 char *d = (char*)dm;
150 char *p = dmi_string(dm, d[string]); 138 char *p = dmi_string(dm, d[string]);
151 if(p==NULL || *p == 0) 139
140 if (p == NULL || *p == 0)
152 return; 141 return;
153 if (dmi_ident[slot]) 142 if (dmi_ident[slot])
154 return; 143 return;
155 dmi_ident[slot] = alloc_bootmem(strlen(p)+1); 144
145 dmi_ident[slot] = alloc_bootmem(strlen(p) + 1);
156 if(dmi_ident[slot]) 146 if(dmi_ident[slot])
157 strcpy(dmi_ident[slot], p); 147 strcpy(dmi_ident[slot], p);
158 else 148 else
@@ -160,281 +150,47 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
160} 150}
161 151
162/* 152/*
163 * Ugly compatibility crap.
164 */
165#define dmi_blacklist dmi_system_id
166#define NO_MATCH { DMI_NONE, NULL}
167#define MATCH DMI_MATCH
168
169/*
170 * Toshiba keyboard likes to repeat keys when they are not repeated.
171 */
172
173static __init int broken_toshiba_keyboard(struct dmi_blacklist *d)
174{
175 printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n");
176 return 0;
177}
178
179
180#ifdef CONFIG_ACPI_SLEEP
181static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
182{
183 /* See acpi_wakeup.S */
184 extern long acpi_video_flags;
185 acpi_video_flags |= 2;
186 return 0;
187}
188#endif
189
190
191#ifdef CONFIG_ACPI_BOOT
192extern int acpi_force;
193
194static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d)
195{
196 if (!acpi_force) {
197 printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
198 disable_acpi();
199 } else {
200 printk(KERN_NOTICE
201 "Warning: DMI blacklist says broken, but acpi forced\n");
202 }
203 return 0;
204}
205
206/*
207 * Limit ACPI to CPU enumeration for HT
208 */
209static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d)
210{
211 if (!acpi_force) {
212 printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
213 disable_acpi();
214 acpi_ht = 1;
215 } else {
216 printk(KERN_NOTICE
217 "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
218 }
219 return 0;
220}
221#endif
222
223#ifdef CONFIG_ACPI_PCI
224static __init int disable_acpi_irq(struct dmi_blacklist *d)
225{
226 if (!acpi_force) {
227 printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
228 d->ident);
229 acpi_noirq_set();
230 }
231 return 0;
232}
233static __init int disable_acpi_pci(struct dmi_blacklist *d)
234{
235 if (!acpi_force) {
236 printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
237 d->ident);
238 acpi_disable_pci();
239 }
240 return 0;
241}
242#endif
243
244/*
245 * Process the DMI blacklists
246 */
247
248
249/*
250 * This will be expanded over time to force things like the APM
251 * interrupt mask settings according to the laptop
252 */
253
254static __initdata struct dmi_blacklist dmi_blacklist[]={
255
256 { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */
257 MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
258 NO_MATCH, NO_MATCH, NO_MATCH
259 } },
260#ifdef CONFIG_ACPI_SLEEP
261 { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */
262 MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
263 NO_MATCH, NO_MATCH, NO_MATCH
264 } },
265#endif
266
267#ifdef CONFIG_ACPI_BOOT
268 /*
269 * If your system is blacklisted here, but you find that acpi=force
270 * works for you, please contact acpi-devel@sourceforge.net
271 */
272
273 /*
274 * Boxes that need ACPI disabled
275 */
276
277 { dmi_disable_acpi, "IBM Thinkpad", {
278 MATCH(DMI_BOARD_VENDOR, "IBM"),
279 MATCH(DMI_BOARD_NAME, "2629H1G"),
280 NO_MATCH, NO_MATCH }},
281
282 /*
283 * Boxes that need acpi=ht
284 */
285
286 { force_acpi_ht, "FSC Primergy T850", {
287 MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
288 MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
289 NO_MATCH, NO_MATCH }},
290
291 { force_acpi_ht, "DELL GX240", {
292 MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
293 MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
294 NO_MATCH, NO_MATCH }},
295
296 { force_acpi_ht, "HP VISUALIZE NT Workstation", {
297 MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
298 MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
299 NO_MATCH, NO_MATCH }},
300
301 { force_acpi_ht, "Compaq Workstation W8000", {
302 MATCH(DMI_SYS_VENDOR, "Compaq"),
303 MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
304 NO_MATCH, NO_MATCH }},
305
306 { force_acpi_ht, "ASUS P4B266", {
307 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
308 MATCH(DMI_BOARD_NAME, "P4B266"),
309 NO_MATCH, NO_MATCH }},
310
311 { force_acpi_ht, "ASUS P2B-DS", {
312 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
313 MATCH(DMI_BOARD_NAME, "P2B-DS"),
314 NO_MATCH, NO_MATCH }},
315
316 { force_acpi_ht, "ASUS CUR-DLS", {
317 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
318 MATCH(DMI_BOARD_NAME, "CUR-DLS"),
319 NO_MATCH, NO_MATCH }},
320
321 { force_acpi_ht, "ABIT i440BX-W83977", {
322 MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
323 MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
324 NO_MATCH, NO_MATCH }},
325
326 { force_acpi_ht, "IBM Bladecenter", {
327 MATCH(DMI_BOARD_VENDOR, "IBM"),
328 MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
329 NO_MATCH, NO_MATCH }},
330
331 { force_acpi_ht, "IBM eServer xSeries 360", {
332 MATCH(DMI_BOARD_VENDOR, "IBM"),
333 MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
334 NO_MATCH, NO_MATCH }},
335
336 { force_acpi_ht, "IBM eserver xSeries 330", {
337 MATCH(DMI_BOARD_VENDOR, "IBM"),
338 MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
339 NO_MATCH, NO_MATCH }},
340
341 { force_acpi_ht, "IBM eserver xSeries 440", {
342 MATCH(DMI_BOARD_VENDOR, "IBM"),
343 MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
344 NO_MATCH, NO_MATCH }},
345
346#endif // CONFIG_ACPI_BOOT
347
348#ifdef CONFIG_ACPI_PCI
349 /*
350 * Boxes that need ACPI PCI IRQ routing disabled
351 */
352
353 { disable_acpi_irq, "ASUS A7V", {
354 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
355 MATCH(DMI_BOARD_NAME, "<A7V>"),
356 /* newer BIOS, Revision 1011, does work */
357 MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
358 NO_MATCH }},
359
360 /*
361 * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
362 */
363 { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */
364 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
365 MATCH(DMI_BOARD_NAME, "PR-DLS"),
366 MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
367 MATCH(DMI_BIOS_DATE, "03/21/2003") }},
368
369 { disable_acpi_pci, "Acer TravelMate 36x Laptop", {
370 MATCH(DMI_SYS_VENDOR, "Acer"),
371 MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
372 NO_MATCH, NO_MATCH
373 } },
374
375#endif
376
377 { NULL, }
378};
379
380/*
381 * Process a DMI table entry. Right now all we care about are the BIOS 153 * Process a DMI table entry. Right now all we care about are the BIOS
382 * and machine entries. For 2.5 we should pull the smbus controller info 154 * and machine entries. For 2.5 we should pull the smbus controller info
383 * out of here. 155 * out of here.
384 */ 156 */
385
386static void __init dmi_decode(struct dmi_header *dm) 157static void __init dmi_decode(struct dmi_header *dm)
387{ 158{
388#ifdef DMI_DEBUG 159 u8 *data __attribute__((__unused__)) = (u8 *)dm;
389 u8 *data = (u8 *)dm;
390#endif
391 160
392 switch(dm->type) 161 switch(dm->type) {
393 { 162 case 0:
394 case 0: 163 dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4])));
395 dmi_printk(("BIOS Vendor: %s\n", 164 dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
396 dmi_string(dm, data[4]))); 165 dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5])));
397 dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); 166 dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
398 dmi_printk(("BIOS Version: %s\n", 167 dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8])));
399 dmi_string(dm, data[5]))); 168 dmi_save_ident(dm, DMI_BIOS_DATE, 8);
400 dmi_save_ident(dm, DMI_BIOS_VERSION, 5); 169 break;
401 dmi_printk(("BIOS Release: %s\n", 170 case 1:
402 dmi_string(dm, data[8]))); 171 dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4])));
403 dmi_save_ident(dm, DMI_BIOS_DATE, 8); 172 dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
404 break; 173 dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5])));
405 case 1: 174 dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
406 dmi_printk(("System Vendor: %s\n", 175 dmi_printk(("Version: %s\n", dmi_string(dm, data[6])));
407 dmi_string(dm, data[4]))); 176 dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
408 dmi_save_ident(dm, DMI_SYS_VENDOR, 4); 177 dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7])));
409 dmi_printk(("Product Name: %s\n", 178 dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
410 dmi_string(dm, data[5]))); 179 break;
411 dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); 180 case 2:
412 dmi_printk(("Version: %s\n", 181 dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4])));
413 dmi_string(dm, data[6]))); 182 dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
414 dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); 183 dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5])));
415 dmi_printk(("Serial Number: %s\n", 184 dmi_save_ident(dm, DMI_BOARD_NAME, 5);
416 dmi_string(dm, data[7]))); 185 dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6])));
417 break; 186 dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
418 case 2: 187 break;
419 dmi_printk(("Board Vendor: %s\n",
420 dmi_string(dm, data[4])));
421 dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
422 dmi_printk(("Board Name: %s\n",
423 dmi_string(dm, data[5])));
424 dmi_save_ident(dm, DMI_BOARD_NAME, 5);
425 dmi_printk(("Board Version: %s\n",
426 dmi_string(dm, data[6])));
427 dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
428 break;
429 } 188 }
430} 189}
431 190
432void __init dmi_scan_machine(void) 191void __init dmi_scan_machine(void)
433{ 192{
434 int err = dmi_iterate(dmi_decode); 193 if (dmi_iterate(dmi_decode))
435 if(err == 0)
436 dmi_check_system(dmi_blacklist);
437 else
438 printk(KERN_INFO "DMI not present.\n"); 194 printk(KERN_INFO "DMI not present.\n");
439} 195}
440 196
@@ -470,7 +226,6 @@ fail: d++;
470 226
471 return count; 227 return count;
472} 228}
473
474EXPORT_SYMBOL(dmi_check_system); 229EXPORT_SYMBOL(dmi_check_system);
475 230
476/** 231/**
@@ -480,8 +235,8 @@ EXPORT_SYMBOL(dmi_check_system);
480 * Returns one DMI data value, can be used to perform 235 * Returns one DMI data value, can be used to perform
481 * complex DMI data checks. 236 * complex DMI data checks.
482 */ 237 */
483char * dmi_get_system_info(int field) 238char *dmi_get_system_info(int field)
484{ 239{
485 return dmi_ident[field]; 240 return dmi_ident[field];
486} 241}
487 242EXPORT_SYMBOL(dmi_get_system_info);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index f732f427b418..385883ea8c19 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -30,6 +30,7 @@
30#include <linux/ioport.h> 30#include <linux/ioport.h>
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/efi.h> 32#include <linux/efi.h>
33#include <linux/kexec.h>
33 34
34#include <asm/setup.h> 35#include <asm/setup.h>
35#include <asm/io.h> 36#include <asm/io.h>
@@ -598,6 +599,9 @@ efi_initialize_iomem_resources(struct resource *code_resource,
598 if (md->type == EFI_CONVENTIONAL_MEMORY) { 599 if (md->type == EFI_CONVENTIONAL_MEMORY) {
599 request_resource(res, code_resource); 600 request_resource(res, code_resource);
600 request_resource(res, data_resource); 601 request_resource(res, data_resource);
602#ifdef CONFIG_KEXEC
603 request_resource(res, &crashk_res);
604#endif
601 } 605 }
602 } 606 }
603} 607}
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index e966fc8c44c4..4477bb107098 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -299,7 +299,6 @@ is386: movl $2,%ecx # set MP
299 movl %eax,%cr0 299 movl %eax,%cr0
300 300
301 call check_x87 301 call check_x87
302 incb ready
303 lgdt cpu_gdt_descr 302 lgdt cpu_gdt_descr
304 lidt idt_descr 303 lidt idt_descr
305 ljmp $(__KERNEL_CS),$1f 304 ljmp $(__KERNEL_CS),$1f
@@ -316,8 +315,9 @@ is386: movl $2,%ecx # set MP
316 lldt %ax 315 lldt %ax
317 cld # gcc2 wants the direction flag cleared at all times 316 cld # gcc2 wants the direction flag cleared at all times
318#ifdef CONFIG_SMP 317#ifdef CONFIG_SMP
319 movb ready, %cl 318 movb ready, %cl
320 cmpb $1,%cl 319 movb $1, ready
320 cmpb $0,%cl
321 je 1f # the first CPU calls start_kernel 321 je 1f # the first CPU calls start_kernel
322 # all other CPUs call initialize_secondary 322 # all other CPUs call initialize_secondary
323 call initialize_secondary 323 call initialize_secondary
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index 903190a4b3ff..180f070d03cb 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -1,97 +1,17 @@
1#include <linux/config.h> 1#include <linux/config.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/smp.h>
4#include <linux/user.h>
5#include <linux/elfcore.h>
6#include <linux/mca.h>
7#include <linux/sched.h>
8#include <linux/in6.h>
9#include <linux/interrupt.h>
10#include <linux/smp_lock.h>
11#include <linux/pm.h>
12#include <linux/pci.h>
13#include <linux/apm_bios.h>
14#include <linux/kernel.h>
15#include <linux/string.h>
16#include <linux/tty.h>
17#include <linux/highmem.h>
18#include <linux/time.h>
19
20#include <asm/semaphore.h>
21#include <asm/processor.h>
22#include <asm/i387.h>
23#include <asm/uaccess.h>
24#include <asm/checksum.h> 3#include <asm/checksum.h>
25#include <asm/io.h>
26#include <asm/delay.h>
27#include <asm/irq.h>
28#include <asm/mmx.h>
29#include <asm/desc.h> 4#include <asm/desc.h>
30#include <asm/pgtable.h>
31#include <asm/tlbflush.h>
32#include <asm/nmi.h>
33#include <asm/ist.h>
34#include <asm/kdebug.h>
35
36extern void dump_thread(struct pt_regs *, struct user *);
37extern spinlock_t rtc_lock;
38 5
39/* This is definitely a GPL-only symbol */ 6/* This is definitely a GPL-only symbol */
40EXPORT_SYMBOL_GPL(cpu_gdt_table); 7EXPORT_SYMBOL_GPL(cpu_gdt_table);
41 8
42#if defined(CONFIG_APM_MODULE)
43extern void machine_real_restart(unsigned char *, int);
44EXPORT_SYMBOL(machine_real_restart);
45extern void default_idle(void);
46EXPORT_SYMBOL(default_idle);
47#endif
48
49#ifdef CONFIG_SMP
50extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
51extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
52#endif
53
54#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
55extern struct drive_info_struct drive_info;
56EXPORT_SYMBOL(drive_info);
57#endif
58
59extern unsigned long cpu_khz;
60extern unsigned long get_cmos_time(void);
61
62/* platform dependent support */
63EXPORT_SYMBOL(boot_cpu_data);
64#ifdef CONFIG_DISCONTIGMEM
65EXPORT_SYMBOL(node_data);
66EXPORT_SYMBOL(physnode_map);
67#endif
68#ifdef CONFIG_X86_NUMAQ
69EXPORT_SYMBOL(xquad_portio);
70#endif
71EXPORT_SYMBOL(dump_thread);
72EXPORT_SYMBOL(dump_fpu);
73EXPORT_SYMBOL_GPL(kernel_fpu_begin);
74EXPORT_SYMBOL(__ioremap);
75EXPORT_SYMBOL(ioremap_nocache);
76EXPORT_SYMBOL(iounmap);
77EXPORT_SYMBOL(kernel_thread);
78EXPORT_SYMBOL(pm_idle);
79EXPORT_SYMBOL(pm_power_off);
80EXPORT_SYMBOL(get_cmos_time);
81EXPORT_SYMBOL(cpu_khz);
82EXPORT_SYMBOL(apm_info);
83
84EXPORT_SYMBOL(__down_failed); 9EXPORT_SYMBOL(__down_failed);
85EXPORT_SYMBOL(__down_failed_interruptible); 10EXPORT_SYMBOL(__down_failed_interruptible);
86EXPORT_SYMBOL(__down_failed_trylock); 11EXPORT_SYMBOL(__down_failed_trylock);
87EXPORT_SYMBOL(__up_wakeup); 12EXPORT_SYMBOL(__up_wakeup);
88/* Networking helper routines. */ 13/* Networking helper routines. */
89EXPORT_SYMBOL(csum_partial_copy_generic); 14EXPORT_SYMBOL(csum_partial_copy_generic);
90/* Delay loops */
91EXPORT_SYMBOL(__ndelay);
92EXPORT_SYMBOL(__udelay);
93EXPORT_SYMBOL(__delay);
94EXPORT_SYMBOL(__const_udelay);
95 15
96EXPORT_SYMBOL(__get_user_1); 16EXPORT_SYMBOL(__get_user_1);
97EXPORT_SYMBOL(__get_user_2); 17EXPORT_SYMBOL(__get_user_2);
@@ -105,87 +25,11 @@ EXPORT_SYMBOL(__put_user_8);
105EXPORT_SYMBOL(strpbrk); 25EXPORT_SYMBOL(strpbrk);
106EXPORT_SYMBOL(strstr); 26EXPORT_SYMBOL(strstr);
107 27
108EXPORT_SYMBOL(strncpy_from_user);
109EXPORT_SYMBOL(__strncpy_from_user);
110EXPORT_SYMBOL(clear_user);
111EXPORT_SYMBOL(__clear_user);
112EXPORT_SYMBOL(__copy_from_user_ll);
113EXPORT_SYMBOL(__copy_to_user_ll);
114EXPORT_SYMBOL(strnlen_user);
115
116EXPORT_SYMBOL(dma_alloc_coherent);
117EXPORT_SYMBOL(dma_free_coherent);
118
119#ifdef CONFIG_PCI
120EXPORT_SYMBOL(pci_mem_start);
121#endif
122
123#ifdef CONFIG_PCI_BIOS
124EXPORT_SYMBOL(pcibios_set_irq_routing);
125EXPORT_SYMBOL(pcibios_get_irq_routing_table);
126#endif
127
128#ifdef CONFIG_X86_USE_3DNOW
129EXPORT_SYMBOL(_mmx_memcpy);
130EXPORT_SYMBOL(mmx_clear_page);
131EXPORT_SYMBOL(mmx_copy_page);
132#endif
133
134#ifdef CONFIG_X86_HT
135EXPORT_SYMBOL(smp_num_siblings);
136EXPORT_SYMBOL(cpu_sibling_map);
137#endif
138
139#ifdef CONFIG_SMP 28#ifdef CONFIG_SMP
140EXPORT_SYMBOL(cpu_data); 29extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
141EXPORT_SYMBOL(cpu_online_map); 30extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
142EXPORT_SYMBOL(cpu_callout_map);
143EXPORT_SYMBOL(__write_lock_failed); 31EXPORT_SYMBOL(__write_lock_failed);
144EXPORT_SYMBOL(__read_lock_failed); 32EXPORT_SYMBOL(__read_lock_failed);
145
146/* Global SMP stuff */
147EXPORT_SYMBOL(smp_call_function);
148
149/* TLB flushing */
150EXPORT_SYMBOL(flush_tlb_page);
151#endif
152
153#ifdef CONFIG_X86_IO_APIC
154EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
155#endif
156
157#ifdef CONFIG_MCA
158EXPORT_SYMBOL(machine_id);
159#endif
160
161#ifdef CONFIG_VT
162EXPORT_SYMBOL(screen_info);
163#endif
164
165EXPORT_SYMBOL(get_wchan);
166
167EXPORT_SYMBOL(rtc_lock);
168
169EXPORT_SYMBOL_GPL(set_nmi_callback);
170EXPORT_SYMBOL_GPL(unset_nmi_callback);
171
172EXPORT_SYMBOL(register_die_notifier);
173#ifdef CONFIG_HAVE_DEC_LOCK
174EXPORT_SYMBOL(_atomic_dec_and_lock);
175#endif
176
177EXPORT_SYMBOL(__PAGE_KERNEL);
178
179#ifdef CONFIG_HIGHMEM
180EXPORT_SYMBOL(kmap);
181EXPORT_SYMBOL(kunmap);
182EXPORT_SYMBOL(kmap_atomic);
183EXPORT_SYMBOL(kunmap_atomic);
184EXPORT_SYMBOL(kmap_atomic_to_page);
185#endif
186
187#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
188EXPORT_SYMBOL(ist_info);
189#endif 33#endif
190 34
191EXPORT_SYMBOL(csum_partial); 35EXPORT_SYMBOL(csum_partial);
diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c
index c55e037f08f7..b817168d9c62 100644
--- a/arch/i386/kernel/i387.c
+++ b/arch/i386/kernel/i387.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/config.h> 11#include <linux/config.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/module.h>
13#include <asm/processor.h> 14#include <asm/processor.h>
14#include <asm/i387.h> 15#include <asm/i387.h>
15#include <asm/math_emu.h> 16#include <asm/math_emu.h>
@@ -79,6 +80,7 @@ void kernel_fpu_begin(void)
79 } 80 }
80 clts(); 81 clts();
81} 82}
83EXPORT_SYMBOL_GPL(kernel_fpu_begin);
82 84
83void restore_fpu( struct task_struct *tsk ) 85void restore_fpu( struct task_struct *tsk )
84{ 86{
@@ -526,6 +528,7 @@ int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
526 528
527 return fpvalid; 529 return fpvalid;
528} 530}
531EXPORT_SYMBOL(dump_fpu);
529 532
530int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) 533int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
531{ 534{
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index 2c4813b47e57..178f4e9bac9d 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -268,10 +268,22 @@ static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
268 return 0; 268 return 0;
269} 269}
270 270
271static int i8259A_shutdown(struct sys_device *dev)
272{
273 /* Put the i8259A into a quiescent state that
274 * the kernel initialization code can get it
275 * out of.
276 */
277 outb(0xff, 0x21); /* mask all of 8259A-1 */
278 outb(0xff, 0xA1); /* mask all of 8259A-1 */
279 return 0;
280}
281
271static struct sysdev_class i8259_sysdev_class = { 282static struct sysdev_class i8259_sysdev_class = {
272 set_kset_name("i8259"), 283 set_kset_name("i8259"),
273 .suspend = i8259A_suspend, 284 .suspend = i8259A_suspend,
274 .resume = i8259A_resume, 285 .resume = i8259A_resume,
286 .shutdown = i8259A_shutdown,
275}; 287};
276 288
277static struct sys_device device_i8259A = { 289static struct sys_device device_i8259A = {
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 7a324e8b86f9..6578f40bd501 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -31,12 +31,13 @@
31#include <linux/mc146818rtc.h> 31#include <linux/mc146818rtc.h>
32#include <linux/compiler.h> 32#include <linux/compiler.h>
33#include <linux/acpi.h> 33#include <linux/acpi.h>
34 34#include <linux/module.h>
35#include <linux/sysdev.h> 35#include <linux/sysdev.h>
36#include <asm/io.h> 36#include <asm/io.h>
37#include <asm/smp.h> 37#include <asm/smp.h>
38#include <asm/desc.h> 38#include <asm/desc.h>
39#include <asm/timer.h> 39#include <asm/timer.h>
40#include <asm/i8259.h>
40 41
41#include <mach_apic.h> 42#include <mach_apic.h>
42 43
@@ -573,12 +574,14 @@ static int balanced_irq(void *unused)
573 for ( ; ; ) { 574 for ( ; ; ) {
574 set_current_state(TASK_INTERRUPTIBLE); 575 set_current_state(TASK_INTERRUPTIBLE);
575 time_remaining = schedule_timeout(time_remaining); 576 time_remaining = schedule_timeout(time_remaining);
576 try_to_freeze(PF_FREEZE); 577 try_to_freeze();
577 if (time_after(jiffies, 578 if (time_after(jiffies,
578 prev_balance_time+balanced_irq_interval)) { 579 prev_balance_time+balanced_irq_interval)) {
580 preempt_disable();
579 do_irq_balance(); 581 do_irq_balance();
580 prev_balance_time = jiffies; 582 prev_balance_time = jiffies;
581 time_remaining = balanced_irq_interval; 583 time_remaining = balanced_irq_interval;
584 preempt_enable();
582 } 585 }
583 } 586 }
584 return 0; 587 return 0;
@@ -630,10 +633,8 @@ static int __init balanced_irq_init(void)
630 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); 633 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
631failed: 634failed:
632 for (i = 0; i < NR_CPUS; i++) { 635 for (i = 0; i < NR_CPUS; i++) {
633 if(irq_cpu_data[i].irq_delta) 636 kfree(irq_cpu_data[i].irq_delta);
634 kfree(irq_cpu_data[i].irq_delta); 637 kfree(irq_cpu_data[i].last_irq);
635 if(irq_cpu_data[i].last_irq)
636 kfree(irq_cpu_data[i].last_irq);
637 } 638 }
638 return 0; 639 return 0;
639} 640}
@@ -812,6 +813,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
812 } 813 }
813 return best_guess; 814 return best_guess;
814} 815}
816EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
815 817
816/* 818/*
817 * This function currently is only a helper for the i386 smp boot process where 819 * This function currently is only a helper for the i386 smp boot process where
@@ -1565,7 +1567,6 @@ void print_all_local_APICs (void)
1565 1567
1566void /*__init*/ print_PIC(void) 1568void /*__init*/ print_PIC(void)
1567{ 1569{
1568 extern spinlock_t i8259A_lock;
1569 unsigned int v; 1570 unsigned int v;
1570 unsigned long flags; 1571 unsigned long flags;
1571 1572
@@ -1633,12 +1634,43 @@ static void __init enable_IO_APIC(void)
1633 */ 1634 */
1634void disable_IO_APIC(void) 1635void disable_IO_APIC(void)
1635{ 1636{
1637 int pin;
1636 /* 1638 /*
1637 * Clear the IO-APIC before rebooting: 1639 * Clear the IO-APIC before rebooting:
1638 */ 1640 */
1639 clear_IO_APIC(); 1641 clear_IO_APIC();
1640 1642
1641 disconnect_bsp_APIC(); 1643 /*
1644 * If the i82559 is routed through an IOAPIC
1645 * Put that IOAPIC in virtual wire mode
1646 * so legacy interrups can be delivered.
1647 */
1648 pin = find_isa_irq_pin(0, mp_ExtINT);
1649 if (pin != -1) {
1650 struct IO_APIC_route_entry entry;
1651 unsigned long flags;
1652
1653 memset(&entry, 0, sizeof(entry));
1654 entry.mask = 0; /* Enabled */
1655 entry.trigger = 0; /* Edge */
1656 entry.irr = 0;
1657 entry.polarity = 0; /* High */
1658 entry.delivery_status = 0;
1659 entry.dest_mode = 0; /* Physical */
1660 entry.delivery_mode = 7; /* ExtInt */
1661 entry.vector = 0;
1662 entry.dest.physical.physical_dest = 0;
1663
1664
1665 /*
1666 * Add it to the IO-APIC irq-routing table:
1667 */
1668 spin_lock_irqsave(&ioapic_lock, flags);
1669 io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
1670 io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
1671 spin_unlock_irqrestore(&ioapic_lock, flags);
1672 }
1673 disconnect_bsp_APIC(pin != -1);
1642} 1674}
1643 1675
1644/* 1676/*
@@ -1659,6 +1691,12 @@ static void __init setup_ioapic_ids_from_mpc(void)
1659 unsigned long flags; 1691 unsigned long flags;
1660 1692
1661 /* 1693 /*
1694 * Don't check I/O APIC IDs for xAPIC systems. They have
1695 * no meaning without the serial APIC bus.
1696 */
1697 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15))
1698 return;
1699 /*
1662 * This is broken; anything with a real cpu count has to 1700 * This is broken; anything with a real cpu count has to
1663 * circumvent this idiocy regardless. 1701 * circumvent this idiocy regardless.
1664 */ 1702 */
@@ -1684,10 +1722,6 @@ static void __init setup_ioapic_ids_from_mpc(void)
1684 mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; 1722 mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
1685 } 1723 }
1686 1724
1687 /* Don't check I/O APIC IDs for some xAPIC systems. They have
1688 * no meaning without the serial APIC bus. */
1689 if (NO_IOAPIC_CHECK)
1690 continue;
1691 /* 1725 /*
1692 * Sanity check, is the ID really free? Every APIC in a 1726 * Sanity check, is the ID really free? Every APIC in a
1693 * system must have a unique ID or we get lots of nice 1727 * system must have a unique ID or we get lots of nice
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 73945a3c53c4..ce66dcc26d90 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -15,6 +15,9 @@
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18#include <linux/notifier.h>
19#include <linux/cpu.h>
20#include <linux/delay.h>
18 21
19DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; 22DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
20EXPORT_PER_CPU_SYMBOL(irq_stat); 23EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -153,6 +156,11 @@ void irq_ctx_init(int cpu)
153 cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); 156 cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
154} 157}
155 158
159void irq_ctx_exit(int cpu)
160{
161 hardirq_ctx[cpu] = NULL;
162}
163
156extern asmlinkage void __do_softirq(void); 164extern asmlinkage void __do_softirq(void);
157 165
158asmlinkage void do_softirq(void) 166asmlinkage void do_softirq(void)
@@ -210,9 +218,8 @@ int show_interrupts(struct seq_file *p, void *v)
210 218
211 if (i == 0) { 219 if (i == 0) {
212 seq_printf(p, " "); 220 seq_printf(p, " ");
213 for (j=0; j<NR_CPUS; j++) 221 for_each_cpu(j)
214 if (cpu_online(j)) 222 seq_printf(p, "CPU%d ",j);
215 seq_printf(p, "CPU%d ",j);
216 seq_putc(p, '\n'); 223 seq_putc(p, '\n');
217 } 224 }
218 225
@@ -225,9 +232,8 @@ int show_interrupts(struct seq_file *p, void *v)
225#ifndef CONFIG_SMP 232#ifndef CONFIG_SMP
226 seq_printf(p, "%10u ", kstat_irqs(i)); 233 seq_printf(p, "%10u ", kstat_irqs(i));
227#else 234#else
228 for (j = 0; j < NR_CPUS; j++) 235 for_each_cpu(j)
229 if (cpu_online(j)) 236 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
230 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
231#endif 237#endif
232 seq_printf(p, " %14s", irq_desc[i].handler->typename); 238 seq_printf(p, " %14s", irq_desc[i].handler->typename);
233 seq_printf(p, " %s", action->name); 239 seq_printf(p, " %s", action->name);
@@ -240,16 +246,14 @@ skip:
240 spin_unlock_irqrestore(&irq_desc[i].lock, flags); 246 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
241 } else if (i == NR_IRQS) { 247 } else if (i == NR_IRQS) {
242 seq_printf(p, "NMI: "); 248 seq_printf(p, "NMI: ");
243 for (j = 0; j < NR_CPUS; j++) 249 for_each_cpu(j)
244 if (cpu_online(j)) 250 seq_printf(p, "%10u ", nmi_count(j));
245 seq_printf(p, "%10u ", nmi_count(j));
246 seq_putc(p, '\n'); 251 seq_putc(p, '\n');
247#ifdef CONFIG_X86_LOCAL_APIC 252#ifdef CONFIG_X86_LOCAL_APIC
248 seq_printf(p, "LOC: "); 253 seq_printf(p, "LOC: ");
249 for (j = 0; j < NR_CPUS; j++) 254 for_each_cpu(j)
250 if (cpu_online(j)) 255 seq_printf(p, "%10u ",
251 seq_printf(p, "%10u ", 256 per_cpu(irq_stat,j).apic_timer_irqs);
252 per_cpu(irq_stat,j).apic_timer_irqs);
253 seq_putc(p, '\n'); 257 seq_putc(p, '\n');
254#endif 258#endif
255 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); 259 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -259,3 +263,45 @@ skip:
259 } 263 }
260 return 0; 264 return 0;
261} 265}
266
267#ifdef CONFIG_HOTPLUG_CPU
268#include <mach_apic.h>
269
270void fixup_irqs(cpumask_t map)
271{
272 unsigned int irq;
273 static int warned;
274
275 for (irq = 0; irq < NR_IRQS; irq++) {
276 cpumask_t mask;
277 if (irq == 2)
278 continue;
279
280 cpus_and(mask, irq_affinity[irq], map);
281 if (any_online_cpu(mask) == NR_CPUS) {
282 printk("Breaking affinity for irq %i\n", irq);
283 mask = map;
284 }
285 if (irq_desc[irq].handler->set_affinity)
286 irq_desc[irq].handler->set_affinity(irq, mask);
287 else if (irq_desc[irq].action && !(warned++))
288 printk("Cannot set affinity for irq %i\n", irq);
289 }
290
291#if 0
292 barrier();
293 /* Ingo Molnar says: "after the IO-APIC masks have been redirected
294 [note the nop - the interrupt-enable boundary on x86 is two
295 instructions from sti] - to flush out pending hardirqs and
296 IPIs. After this point nothing is supposed to reach this CPU." */
297 __asm__ __volatile__("sti; nop; cli");
298 barrier();
299#else
300 /* That doesn't seem sufficient. Give it 1ms. */
301 local_irq_enable();
302 mdelay(1);
303 local_irq_disable();
304#endif
305}
306#endif
307
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 59ff9b455069..fc8b17521761 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -23,6 +23,9 @@
23 * Rusty Russell). 23 * Rusty Russell).
24 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 24 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
25 * interface to access function arguments. 25 * interface to access function arguments.
26 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
27 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
28 * <prasanna@in.ibm.com> added function-return probes.
26 */ 29 */
27 30
28#include <linux/config.h> 31#include <linux/config.h>
@@ -30,15 +33,14 @@
30#include <linux/ptrace.h> 33#include <linux/ptrace.h>
31#include <linux/spinlock.h> 34#include <linux/spinlock.h>
32#include <linux/preempt.h> 35#include <linux/preempt.h>
36#include <asm/cacheflush.h>
33#include <asm/kdebug.h> 37#include <asm/kdebug.h>
34#include <asm/desc.h> 38#include <asm/desc.h>
35 39
36/* kprobe_status settings */
37#define KPROBE_HIT_ACTIVE 0x00000001
38#define KPROBE_HIT_SS 0x00000002
39
40static struct kprobe *current_kprobe; 40static struct kprobe *current_kprobe;
41static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; 41static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags;
42static struct kprobe *kprobe_prev;
43static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev;
42static struct pt_regs jprobe_saved_regs; 44static struct pt_regs jprobe_saved_regs;
43static long *jprobe_saved_esp; 45static long *jprobe_saved_esp;
44/* copy of the kernel stack at the probe fire time */ 46/* copy of the kernel stack at the probe fire time */
@@ -68,16 +70,50 @@ int arch_prepare_kprobe(struct kprobe *p)
68void arch_copy_kprobe(struct kprobe *p) 70void arch_copy_kprobe(struct kprobe *p)
69{ 71{
70 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 72 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
73 p->opcode = *p->addr;
71} 74}
72 75
73void arch_remove_kprobe(struct kprobe *p) 76void arch_arm_kprobe(struct kprobe *p)
74{ 77{
78 *p->addr = BREAKPOINT_INSTRUCTION;
79 flush_icache_range((unsigned long) p->addr,
80 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
75} 81}
76 82
77static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) 83void arch_disarm_kprobe(struct kprobe *p)
78{ 84{
79 *p->addr = p->opcode; 85 *p->addr = p->opcode;
80 regs->eip = (unsigned long)p->addr; 86 flush_icache_range((unsigned long) p->addr,
87 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
88}
89
90void arch_remove_kprobe(struct kprobe *p)
91{
92}
93
94static inline void save_previous_kprobe(void)
95{
96 kprobe_prev = current_kprobe;
97 kprobe_status_prev = kprobe_status;
98 kprobe_old_eflags_prev = kprobe_old_eflags;
99 kprobe_saved_eflags_prev = kprobe_saved_eflags;
100}
101
102static inline void restore_previous_kprobe(void)
103{
104 current_kprobe = kprobe_prev;
105 kprobe_status = kprobe_status_prev;
106 kprobe_old_eflags = kprobe_old_eflags_prev;
107 kprobe_saved_eflags = kprobe_saved_eflags_prev;
108}
109
110static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs)
111{
112 current_kprobe = p;
113 kprobe_saved_eflags = kprobe_old_eflags
114 = (regs->eflags & (TF_MASK | IF_MASK));
115 if (is_IF_modifier(p->opcode))
116 kprobe_saved_eflags &= ~IF_MASK;
81} 117}
82 118
83static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 119static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -91,6 +127,25 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
91 regs->eip = (unsigned long)&p->ainsn.insn; 127 regs->eip = (unsigned long)&p->ainsn.insn;
92} 128}
93 129
130void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
131{
132 unsigned long *sara = (unsigned long *)&regs->esp;
133 struct kretprobe_instance *ri;
134
135 if ((ri = get_free_rp_inst(rp)) != NULL) {
136 ri->rp = rp;
137 ri->task = current;
138 ri->ret_addr = (kprobe_opcode_t *) *sara;
139
140 /* Replace the return addr with trampoline addr */
141 *sara = (unsigned long) &kretprobe_trampoline;
142
143 add_rp_inst(ri);
144 } else {
145 rp->nmissed++;
146 }
147}
148
94/* 149/*
95 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 150 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
96 * remain disabled thorough out this function. 151 * remain disabled thorough out this function.
@@ -127,8 +182,18 @@ static int kprobe_handler(struct pt_regs *regs)
127 unlock_kprobes(); 182 unlock_kprobes();
128 goto no_kprobe; 183 goto no_kprobe;
129 } 184 }
130 disarm_kprobe(p, regs); 185 /* We have reentered the kprobe_handler(), since
131 ret = 1; 186 * another probe was hit while within the handler.
187 * We here save the original kprobes variables and
188 * just single step on the instruction of the new probe
189 * without calling any user handlers.
190 */
191 save_previous_kprobe();
192 set_current_kprobe(p, regs);
193 p->nmissed++;
194 prepare_singlestep(p, regs);
195 kprobe_status = KPROBE_REENTER;
196 return 1;
132 } else { 197 } else {
133 p = current_kprobe; 198 p = current_kprobe;
134 if (p->break_handler && p->break_handler(p, regs)) { 199 if (p->break_handler && p->break_handler(p, regs)) {
@@ -163,11 +228,7 @@ static int kprobe_handler(struct pt_regs *regs)
163 } 228 }
164 229
165 kprobe_status = KPROBE_HIT_ACTIVE; 230 kprobe_status = KPROBE_HIT_ACTIVE;
166 current_kprobe = p; 231 set_current_kprobe(p, regs);
167 kprobe_saved_eflags = kprobe_old_eflags
168 = (regs->eflags & (TF_MASK | IF_MASK));
169 if (is_IF_modifier(p->opcode))
170 kprobe_saved_eflags &= ~IF_MASK;
171 232
172 if (p->pre_handler && p->pre_handler(p, regs)) 233 if (p->pre_handler && p->pre_handler(p, regs))
173 /* handler has already set things up, so skip ss setup */ 234 /* handler has already set things up, so skip ss setup */
@@ -184,6 +245,78 @@ no_kprobe:
184} 245}
185 246
186/* 247/*
248 * For function-return probes, init_kprobes() establishes a probepoint
249 * here. When a retprobed function returns, this probe is hit and
250 * trampoline_probe_handler() runs, calling the kretprobe's handler.
251 */
252 void kretprobe_trampoline_holder(void)
253 {
254 asm volatile ( ".global kretprobe_trampoline\n"
255 "kretprobe_trampoline: \n"
256 "nop\n");
257 }
258
259/*
260 * Called when we hit the probe point at kretprobe_trampoline
261 */
262int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
263{
264 struct kretprobe_instance *ri = NULL;
265 struct hlist_head *head;
266 struct hlist_node *node, *tmp;
267 unsigned long orig_ret_address = 0;
268 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
269
270 head = kretprobe_inst_table_head(current);
271
272 /*
273 * It is possible to have multiple instances associated with a given
274 * task either because an multiple functions in the call path
275 * have a return probe installed on them, and/or more then one return
276 * return probe was registered for a target function.
277 *
278 * We can handle this because:
279 * - instances are always inserted at the head of the list
280 * - when multiple return probes are registered for the same
281 * function, the first instance's ret_addr will point to the
282 * real return address, and all the rest will point to
283 * kretprobe_trampoline
284 */
285 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
286 if (ri->task != current)
287 /* another task is sharing our hash bucket */
288 continue;
289
290 if (ri->rp && ri->rp->handler)
291 ri->rp->handler(ri, regs);
292
293 orig_ret_address = (unsigned long)ri->ret_addr;
294 recycle_rp_inst(ri);
295
296 if (orig_ret_address != trampoline_address)
297 /*
298 * This is the real return address. Any other
299 * instances associated with this task are for
300 * other calls deeper on the call stack
301 */
302 break;
303 }
304
305 BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
306 regs->eip = orig_ret_address;
307
308 unlock_kprobes();
309 preempt_enable_no_resched();
310
311 /*
312 * By returning a non-zero value, we are telling
313 * kprobe_handler() that we have handled unlocking
314 * and re-enabling preemption.
315 */
316 return 1;
317}
318
319/*
187 * Called after single-stepping. p->addr is the address of the 320 * Called after single-stepping. p->addr is the address of the
188 * instruction whose first byte has been replaced by the "int 3" 321 * instruction whose first byte has been replaced by the "int 3"
189 * instruction. To avoid the SMP problems that can occur when we 322 * instruction. To avoid the SMP problems that can occur when we
@@ -263,13 +396,21 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
263 if (!kprobe_running()) 396 if (!kprobe_running())
264 return 0; 397 return 0;
265 398
266 if (current_kprobe->post_handler) 399 if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
400 kprobe_status = KPROBE_HIT_SSDONE;
267 current_kprobe->post_handler(current_kprobe, regs, 0); 401 current_kprobe->post_handler(current_kprobe, regs, 0);
402 }
268 403
269 resume_execution(current_kprobe, regs); 404 resume_execution(current_kprobe, regs);
270 regs->eflags |= kprobe_saved_eflags; 405 regs->eflags |= kprobe_saved_eflags;
271 406
407 /*Restore back the original saved kprobes variables and continue. */
408 if (kprobe_status == KPROBE_REENTER) {
409 restore_previous_kprobe();
410 goto out;
411 }
272 unlock_kprobes(); 412 unlock_kprobes();
413out:
273 preempt_enable_no_resched(); 414 preempt_enable_no_resched();
274 415
275 /* 416 /*
@@ -390,3 +531,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
390 } 531 }
391 return 0; 532 return 0;
392} 533}
534
535static struct kprobe trampoline_p = {
536 .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
537 .pre_handler = trampoline_probe_handler
538};
539
540int __init arch_init(void)
541{
542 return register_kprobe(&trampoline_p);
543}
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
new file mode 100644
index 000000000000..52ed18d8b511
--- /dev/null
+++ b/arch/i386/kernel/machine_kexec.c
@@ -0,0 +1,226 @@
1/*
2 * machine_kexec.c - handle transition of Linux booting another kernel
3 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
4 *
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
7 */
8
9#include <linux/mm.h>
10#include <linux/kexec.h>
11#include <linux/delay.h>
12#include <asm/pgtable.h>
13#include <asm/pgalloc.h>
14#include <asm/tlbflush.h>
15#include <asm/mmu_context.h>
16#include <asm/io.h>
17#include <asm/apic.h>
18#include <asm/cpufeature.h>
19
20static inline unsigned long read_cr3(void)
21{
22 unsigned long cr3;
23 asm volatile("movl %%cr3,%0": "=r"(cr3));
24 return cr3;
25}
26
27#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
28
29#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
30#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
31#define L2_ATTR (_PAGE_PRESENT)
32
33#define LEVEL0_SIZE (1UL << 12UL)
34
35#ifndef CONFIG_X86_PAE
36#define LEVEL1_SIZE (1UL << 22UL)
37static u32 pgtable_level1[1024] PAGE_ALIGNED;
38
39static void identity_map_page(unsigned long address)
40{
41 unsigned long level1_index, level2_index;
42 u32 *pgtable_level2;
43
44 /* Find the current page table */
45 pgtable_level2 = __va(read_cr3());
46
47 /* Find the indexes of the physical address to identity map */
48 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
49 level2_index = address / LEVEL1_SIZE;
50
51 /* Identity map the page table entry */
52 pgtable_level1[level1_index] = address | L0_ATTR;
53 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
54
55 /* Flush the tlb so the new mapping takes effect.
56 * Global tlb entries are not flushed but that is not an issue.
57 */
58 load_cr3(pgtable_level2);
59}
60
61#else
62#define LEVEL1_SIZE (1UL << 21UL)
63#define LEVEL2_SIZE (1UL << 30UL)
64static u64 pgtable_level1[512] PAGE_ALIGNED;
65static u64 pgtable_level2[512] PAGE_ALIGNED;
66
67static void identity_map_page(unsigned long address)
68{
69 unsigned long level1_index, level2_index, level3_index;
70 u64 *pgtable_level3;
71
72 /* Find the current page table */
73 pgtable_level3 = __va(read_cr3());
74
75 /* Find the indexes of the physical address to identity map */
76 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
77 level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
78 level3_index = address / LEVEL2_SIZE;
79
80 /* Identity map the page table entry */
81 pgtable_level1[level1_index] = address | L0_ATTR;
82 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
83 set_64bit(&pgtable_level3[level3_index],
84 __pa(pgtable_level2) | L2_ATTR);
85
86 /* Flush the tlb so the new mapping takes effect.
87 * Global tlb entries are not flushed but that is not an issue.
88 */
89 load_cr3(pgtable_level3);
90}
91#endif
92
93
94static void set_idt(void *newidt, __u16 limit)
95{
96 unsigned char curidt[6];
97
98 /* ia32 supports unaliged loads & stores */
99 (*(__u16 *)(curidt)) = limit;
100 (*(__u32 *)(curidt +2)) = (unsigned long)(newidt);
101
102 __asm__ __volatile__ (
103 "lidt %0\n"
104 : "=m" (curidt)
105 );
106};
107
108
109static void set_gdt(void *newgdt, __u16 limit)
110{
111 unsigned char curgdt[6];
112
113 /* ia32 supports unaligned loads & stores */
114 (*(__u16 *)(curgdt)) = limit;
115 (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt);
116
117 __asm__ __volatile__ (
118 "lgdt %0\n"
119 : "=m" (curgdt)
120 );
121};
122
123static void load_segments(void)
124{
125#define __STR(X) #X
126#define STR(X) __STR(X)
127
128 __asm__ __volatile__ (
129 "\tljmp $"STR(__KERNEL_CS)",$1f\n"
130 "\t1:\n"
131 "\tmovl $"STR(__KERNEL_DS)",%eax\n"
132 "\tmovl %eax,%ds\n"
133 "\tmovl %eax,%es\n"
134 "\tmovl %eax,%fs\n"
135 "\tmovl %eax,%gs\n"
136 "\tmovl %eax,%ss\n"
137 );
138#undef STR
139#undef __STR
140}
141
142typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
143 unsigned long indirection_page,
144 unsigned long reboot_code_buffer,
145 unsigned long start_address,
146 unsigned int has_pae) ATTRIB_NORET;
147
148const extern unsigned char relocate_new_kernel[];
149extern void relocate_new_kernel_end(void);
150const extern unsigned int relocate_new_kernel_size;
151
152/*
153 * A architecture hook called to validate the
154 * proposed image and prepare the control pages
155 * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
156 * have been allocated, but the segments have yet
157 * been copied into the kernel.
158 *
159 * Do what every setup is needed on image and the
160 * reboot code buffer to allow us to avoid allocations
161 * later.
162 *
163 * Currently nothing.
164 */
165int machine_kexec_prepare(struct kimage *image)
166{
167 return 0;
168}
169
170/*
171 * Undo anything leftover by machine_kexec_prepare
172 * when an image is freed.
173 */
174void machine_kexec_cleanup(struct kimage *image)
175{
176}
177
178/*
179 * Do not allocate memory (or fail in any way) in machine_kexec().
180 * We are past the point of no return, committed to rebooting now.
181 */
182NORET_TYPE void machine_kexec(struct kimage *image)
183{
184 unsigned long page_list;
185 unsigned long reboot_code_buffer;
186
187 relocate_new_kernel_t rnk;
188
189 /* Interrupts aren't acceptable while we reboot */
190 local_irq_disable();
191
192 /* Compute some offsets */
193 reboot_code_buffer = page_to_pfn(image->control_code_page)
194 << PAGE_SHIFT;
195 page_list = image->head;
196
197 /* Set up an identity mapping for the reboot_code_buffer */
198 identity_map_page(reboot_code_buffer);
199
200 /* copy it out */
201 memcpy((void *)reboot_code_buffer, relocate_new_kernel,
202 relocate_new_kernel_size);
203
204 /* The segment registers are funny things, they are
205 * automatically loaded from a table, in memory wherever you
206 * set them to a specific selector, but this table is never
207 * accessed again you set the segment to a different selector.
208 *
209 * The more common model is are caches where the behide
210 * the scenes work is done, but is also dropped at arbitrary
211 * times.
212 *
213 * I take advantage of this here by force loading the
214 * segments, before I zap the gdt with an invalid value.
215 */
216 load_segments();
217 /* The gdt & idt are now invalid.
218 * If you want to load them you must set up your own idt & gdt.
219 */
220 set_gdt(phys_to_virt(0),0);
221 set_idt(phys_to_virt(0),0);
222
223 /* now call it */
224 rnk = (relocate_new_kernel_t) reboot_code_buffer;
225 (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
226}
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 1347ab4939e7..af917f609c7d 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -67,7 +67,6 @@ unsigned long mp_lapic_addr;
67 67
68/* Processor that is doing the boot up */ 68/* Processor that is doing the boot up */
69unsigned int boot_cpu_physical_apicid = -1U; 69unsigned int boot_cpu_physical_apicid = -1U;
70unsigned int boot_cpu_logical_apicid = -1U;
71/* Internal processor count */ 70/* Internal processor count */
72static unsigned int __initdata num_processors; 71static unsigned int __initdata num_processors;
73 72
@@ -180,7 +179,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
180 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { 179 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
181 Dprintk(" Bootup CPU\n"); 180 Dprintk(" Bootup CPU\n");
182 boot_cpu_physical_apicid = m->mpc_apicid; 181 boot_cpu_physical_apicid = m->mpc_apicid;
183 boot_cpu_logical_apicid = apicid;
184 } 182 }
185 183
186 if (num_processors >= NR_CPUS) { 184 if (num_processors >= NR_CPUS) {
@@ -914,7 +912,10 @@ void __init mp_register_ioapic (
914 mp_ioapics[idx].mpc_apicaddr = address; 912 mp_ioapics[idx].mpc_apicaddr = address;
915 913
916 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 914 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
917 mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); 915 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15))
916 mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
917 else
918 mp_ioapics[idx].mpc_apicid = id;
918 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); 919 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
919 920
920 /* 921 /*
@@ -1055,11 +1056,20 @@ void __init mp_config_acpi_legacy_irqs (void)
1055 } 1056 }
1056} 1057}
1057 1058
1059#define MAX_GSI_NUM 4096
1060
1058int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) 1061int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
1059{ 1062{
1060 int ioapic = -1; 1063 int ioapic = -1;
1061 int ioapic_pin = 0; 1064 int ioapic_pin = 0;
1062 int idx, bit = 0; 1065 int idx, bit = 0;
1066 static int pci_irq = 16;
1067 /*
1068 * Mapping between Global System Interrups, which
1069 * represent all possible interrupts, and IRQs
1070 * assigned to actual devices.
1071 */
1072 static int gsi_to_irq[MAX_GSI_NUM];
1063 1073
1064#ifdef CONFIG_ACPI_BUS 1074#ifdef CONFIG_ACPI_BUS
1065 /* Don't set up the ACPI SCI because it's already set up */ 1075 /* Don't set up the ACPI SCI because it's already set up */
@@ -1094,11 +1104,26 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
1094 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { 1104 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
1095 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", 1105 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
1096 mp_ioapic_routing[ioapic].apic_id, ioapic_pin); 1106 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
1097 return gsi; 1107 return gsi_to_irq[gsi];
1098 } 1108 }
1099 1109
1100 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); 1110 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
1101 1111
1112 if (edge_level) {
1113 /*
1114 * For PCI devices assign IRQs in order, avoiding gaps
1115 * due to unused I/O APIC pins.
1116 */
1117 int irq = gsi;
1118 if (gsi < MAX_GSI_NUM) {
1119 gsi = pci_irq++;
1120 gsi_to_irq[irq] = gsi;
1121 } else {
1122 printk(KERN_ERR "GSI %u is too high\n", gsi);
1123 return gsi;
1124 }
1125 }
1126
1102 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, 1127 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
1103 edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, 1128 edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
1104 active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); 1129 active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index 05d9f8f363a6..b2f03c39a6fe 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -44,7 +44,7 @@
44#include <asm/uaccess.h> 44#include <asm/uaccess.h>
45#include <asm/system.h> 45#include <asm/system.h>
46 46
47static struct class_simple *msr_class; 47static struct class *msr_class;
48 48
49/* Note: "err" is handled in a funny way below. Otherwise one version 49/* Note: "err" is handled in a funny way below. Otherwise one version
50 of gcc or another breaks. */ 50 of gcc or another breaks. */
@@ -260,12 +260,12 @@ static struct file_operations msr_fops = {
260 .open = msr_open, 260 .open = msr_open,
261}; 261};
262 262
263static int msr_class_simple_device_add(int i) 263static int msr_class_device_create(int i)
264{ 264{
265 int err = 0; 265 int err = 0;
266 struct class_device *class_err; 266 struct class_device *class_err;
267 267
268 class_err = class_simple_device_add(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); 268 class_err = class_device_create(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i);
269 if (IS_ERR(class_err)) 269 if (IS_ERR(class_err))
270 err = PTR_ERR(class_err); 270 err = PTR_ERR(class_err);
271 return err; 271 return err;
@@ -277,10 +277,10 @@ static int __devinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned
277 277
278 switch (action) { 278 switch (action) {
279 case CPU_ONLINE: 279 case CPU_ONLINE:
280 msr_class_simple_device_add(cpu); 280 msr_class_device_create(cpu);
281 break; 281 break;
282 case CPU_DEAD: 282 case CPU_DEAD:
283 class_simple_device_remove(MKDEV(MSR_MAJOR, cpu)); 283 class_device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
284 break; 284 break;
285 } 285 }
286 return NOTIFY_OK; 286 return NOTIFY_OK;
@@ -302,13 +302,13 @@ static int __init msr_init(void)
302 err = -EBUSY; 302 err = -EBUSY;
303 goto out; 303 goto out;
304 } 304 }
305 msr_class = class_simple_create(THIS_MODULE, "msr"); 305 msr_class = class_create(THIS_MODULE, "msr");
306 if (IS_ERR(msr_class)) { 306 if (IS_ERR(msr_class)) {
307 err = PTR_ERR(msr_class); 307 err = PTR_ERR(msr_class);
308 goto out_chrdev; 308 goto out_chrdev;
309 } 309 }
310 for_each_online_cpu(i) { 310 for_each_online_cpu(i) {
311 err = msr_class_simple_device_add(i); 311 err = msr_class_device_create(i);
312 if (err != 0) 312 if (err != 0)
313 goto out_class; 313 goto out_class;
314 } 314 }
@@ -320,8 +320,8 @@ static int __init msr_init(void)
320out_class: 320out_class:
321 i = 0; 321 i = 0;
322 for_each_online_cpu(i) 322 for_each_online_cpu(i)
323 class_simple_device_remove(MKDEV(MSR_MAJOR, i)); 323 class_device_destroy(msr_class, MKDEV(MSR_MAJOR, i));
324 class_simple_destroy(msr_class); 324 class_destroy(msr_class);
325out_chrdev: 325out_chrdev:
326 unregister_chrdev(MSR_MAJOR, "cpu/msr"); 326 unregister_chrdev(MSR_MAJOR, "cpu/msr");
327out: 327out:
@@ -332,8 +332,8 @@ static void __exit msr_exit(void)
332{ 332{
333 int cpu = 0; 333 int cpu = 0;
334 for_each_online_cpu(cpu) 334 for_each_online_cpu(cpu)
335 class_simple_device_remove(MKDEV(MSR_MAJOR, cpu)); 335 class_device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
336 class_simple_destroy(msr_class); 336 class_destroy(msr_class);
337 unregister_chrdev(MSR_MAJOR, "cpu/msr"); 337 unregister_chrdev(MSR_MAJOR, "cpu/msr");
338 unregister_cpu_notifier(&msr_class_cpu_notifier); 338 unregister_cpu_notifier(&msr_class_cpu_notifier);
339} 339}
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 2c0ee9c2d020..da6c46d667cb 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -28,8 +28,7 @@
28#include <linux/sysctl.h> 28#include <linux/sysctl.h>
29 29
30#include <asm/smp.h> 30#include <asm/smp.h>
31#include <asm/mtrr.h> 31#include <asm/div64.h>
32#include <asm/mpspec.h>
33#include <asm/nmi.h> 32#include <asm/nmi.h>
34 33
35#include "mach_traps.h" 34#include "mach_traps.h"
@@ -324,6 +323,16 @@ static void clear_msr_range(unsigned int base, unsigned int n)
324 wrmsr(base+i, 0, 0); 323 wrmsr(base+i, 0, 0);
325} 324}
326 325
326static inline void write_watchdog_counter(const char *descr)
327{
328 u64 count = (u64)cpu_khz * 1000;
329
330 do_div(count, nmi_hz);
331 if(descr)
332 Dprintk("setting %s to -0x%08Lx\n", descr, count);
333 wrmsrl(nmi_perfctr_msr, 0 - count);
334}
335
327static void setup_k7_watchdog(void) 336static void setup_k7_watchdog(void)
328{ 337{
329 unsigned int evntsel; 338 unsigned int evntsel;
@@ -339,8 +348,7 @@ static void setup_k7_watchdog(void)
339 | K7_NMI_EVENT; 348 | K7_NMI_EVENT;
340 349
341 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 350 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
342 Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); 351 write_watchdog_counter("K7_PERFCTR0");
343 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
344 apic_write(APIC_LVTPC, APIC_DM_NMI); 352 apic_write(APIC_LVTPC, APIC_DM_NMI);
345 evntsel |= K7_EVNTSEL_ENABLE; 353 evntsel |= K7_EVNTSEL_ENABLE;
346 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 354 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
@@ -361,8 +369,7 @@ static void setup_p6_watchdog(void)
361 | P6_NMI_EVENT; 369 | P6_NMI_EVENT;
362 370
363 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 371 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
364 Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); 372 write_watchdog_counter("P6_PERFCTR0");
365 wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
366 apic_write(APIC_LVTPC, APIC_DM_NMI); 373 apic_write(APIC_LVTPC, APIC_DM_NMI);
367 evntsel |= P6_EVNTSEL0_ENABLE; 374 evntsel |= P6_EVNTSEL0_ENABLE;
368 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 375 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
@@ -402,8 +409,7 @@ static int setup_p4_watchdog(void)
402 409
403 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); 410 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
404 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); 411 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
405 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); 412 write_watchdog_counter("P4_IQ_COUNTER0");
406 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
407 apic_write(APIC_LVTPC, APIC_DM_NMI); 413 apic_write(APIC_LVTPC, APIC_DM_NMI);
408 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 414 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
409 return 1; 415 return 1;
@@ -518,7 +524,7 @@ void nmi_watchdog_tick (struct pt_regs * regs)
518 * other P6 variant */ 524 * other P6 variant */
519 apic_write(APIC_LVTPC, APIC_DM_NMI); 525 apic_write(APIC_LVTPC, APIC_DM_NMI);
520 } 526 }
521 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); 527 write_watchdog_counter(NULL);
522 } 528 }
523} 529}
524 530
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c
index 4de2e03c7b45..1e51427cc9eb 100644
--- a/arch/i386/kernel/pci-dma.c
+++ b/arch/i386/kernel/pci-dma.c
@@ -11,6 +11,7 @@
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/pci.h> 13#include <linux/pci.h>
14#include <linux/module.h>
14#include <asm/io.h> 15#include <asm/io.h>
15 16
16struct dma_coherent_mem { 17struct dma_coherent_mem {
@@ -54,6 +55,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
54 } 55 }
55 return ret; 56 return ret;
56} 57}
58EXPORT_SYMBOL(dma_alloc_coherent);
57 59
58void dma_free_coherent(struct device *dev, size_t size, 60void dma_free_coherent(struct device *dev, size_t size,
59 void *vaddr, dma_addr_t dma_handle) 61 void *vaddr, dma_addr_t dma_handle)
@@ -68,6 +70,7 @@ void dma_free_coherent(struct device *dev, size_t size,
68 } else 70 } else
69 free_pages((unsigned long)vaddr, order); 71 free_pages((unsigned long)vaddr, order);
70} 72}
73EXPORT_SYMBOL(dma_free_coherent);
71 74
72int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, 75int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
73 dma_addr_t device_addr, size_t size, int flags) 76 dma_addr_t device_addr, size_t size, int flags)
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 96e3ea6b17c7..ba243a4cc119 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -13,6 +13,7 @@
13 13
14#include <stdarg.h> 14#include <stdarg.h>
15 15
16#include <linux/cpu.h>
16#include <linux/errno.h> 17#include <linux/errno.h>
17#include <linux/sched.h> 18#include <linux/sched.h>
18#include <linux/fs.h> 19#include <linux/fs.h>
@@ -37,6 +38,7 @@
37#include <linux/kallsyms.h> 38#include <linux/kallsyms.h>
38#include <linux/ptrace.h> 39#include <linux/ptrace.h>
39#include <linux/random.h> 40#include <linux/random.h>
41#include <linux/kprobes.h>
40 42
41#include <asm/uaccess.h> 43#include <asm/uaccess.h>
42#include <asm/pgtable.h> 44#include <asm/pgtable.h>
@@ -54,6 +56,9 @@
54#include <linux/irq.h> 56#include <linux/irq.h>
55#include <linux/err.h> 57#include <linux/err.h>
56 58
59#include <asm/tlbflush.h>
60#include <asm/cpu.h>
61
57asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
58 63
59static int hlt_counter; 64static int hlt_counter;
@@ -73,6 +78,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
73 * Powermanagement idle function, if any.. 78 * Powermanagement idle function, if any..
74 */ 79 */
75void (*pm_idle)(void); 80void (*pm_idle)(void);
81EXPORT_SYMBOL(pm_idle);
76static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 82static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
77 83
78void disable_hlt(void) 84void disable_hlt(void)
@@ -105,6 +111,9 @@ void default_idle(void)
105 cpu_relax(); 111 cpu_relax();
106 } 112 }
107} 113}
114#ifdef CONFIG_APM_MODULE
115EXPORT_SYMBOL(default_idle);
116#endif
108 117
109/* 118/*
110 * On SMP it's slightly faster (but much more power-consuming!) 119 * On SMP it's slightly faster (but much more power-consuming!)
@@ -138,14 +147,42 @@ static void poll_idle (void)
138 } 147 }
139} 148}
140 149
150#ifdef CONFIG_HOTPLUG_CPU
151#include <asm/nmi.h>
152/* We don't actually take CPU down, just spin without interrupts. */
153static inline void play_dead(void)
154{
155 /* This must be done before dead CPU ack */
156 cpu_exit_clear();
157 wbinvd();
158 mb();
159 /* Ack it */
160 __get_cpu_var(cpu_state) = CPU_DEAD;
161
162 /*
163 * With physical CPU hotplug, we should halt the cpu
164 */
165 local_irq_disable();
166 while (1)
167 __asm__ __volatile__("hlt":::"memory");
168}
169#else
170static inline void play_dead(void)
171{
172 BUG();
173}
174#endif /* CONFIG_HOTPLUG_CPU */
175
141/* 176/*
142 * The idle thread. There's no useful work to be 177 * The idle thread. There's no useful work to be
143 * done, so just try to conserve power and have a 178 * done, so just try to conserve power and have a
144 * low exit latency (ie sit in a loop waiting for 179 * low exit latency (ie sit in a loop waiting for
145 * somebody to say that they'd like to reschedule) 180 * somebody to say that they'd like to reschedule)
146 */ 181 */
147void cpu_idle (void) 182void cpu_idle(void)
148{ 183{
184 int cpu = raw_smp_processor_id();
185
149 /* endless idle loop with no priority at all */ 186 /* endless idle loop with no priority at all */
150 while (1) { 187 while (1) {
151 while (!need_resched()) { 188 while (!need_resched()) {
@@ -160,6 +197,9 @@ void cpu_idle (void)
160 if (!idle) 197 if (!idle)
161 idle = default_idle; 198 idle = default_idle;
162 199
200 if (cpu_is_offline(cpu))
201 play_dead();
202
163 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 203 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
164 idle(); 204 idle();
165 } 205 }
@@ -218,7 +258,7 @@ static void mwait_idle(void)
218 } 258 }
219} 259}
220 260
221void __init select_idle_routine(const struct cpuinfo_x86 *c) 261void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
222{ 262{
223 if (cpu_has(c, X86_FEATURE_MWAIT)) { 263 if (cpu_has(c, X86_FEATURE_MWAIT)) {
224 printk("monitor/mwait feature present.\n"); 264 printk("monitor/mwait feature present.\n");
@@ -262,7 +302,7 @@ void show_regs(struct pt_regs * regs)
262 printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); 302 printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
263 print_symbol("EIP is at %s\n", regs->eip); 303 print_symbol("EIP is at %s\n", regs->eip);
264 304
265 if (regs->xcs & 3) 305 if (user_mode(regs))
266 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); 306 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
267 printk(" EFLAGS: %08lx %s (%s)\n", 307 printk(" EFLAGS: %08lx %s (%s)\n",
268 regs->eflags, print_tainted(), system_utsname.release); 308 regs->eflags, print_tainted(), system_utsname.release);
@@ -325,6 +365,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
325 /* Ok, create the new process.. */ 365 /* Ok, create the new process.. */
326 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL); 366 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
327} 367}
368EXPORT_SYMBOL(kernel_thread);
328 369
329/* 370/*
330 * Free current thread data structures etc.. 371 * Free current thread data structures etc..
@@ -334,6 +375,13 @@ void exit_thread(void)
334 struct task_struct *tsk = current; 375 struct task_struct *tsk = current;
335 struct thread_struct *t = &tsk->thread; 376 struct thread_struct *t = &tsk->thread;
336 377
378 /*
379 * Remove function-return probe instances associated with this task
380 * and put them back on the free list. Do not insert an exit probe for
381 * this function, it will be disabled by kprobe_flush_task if you do.
382 */
383 kprobe_flush_task(tsk);
384
337 /* The process may have allocated an io port bitmap... nuke it. */ 385 /* The process may have allocated an io port bitmap... nuke it. */
338 if (unlikely(NULL != t->io_bitmap_ptr)) { 386 if (unlikely(NULL != t->io_bitmap_ptr)) {
339 int cpu = get_cpu(); 387 int cpu = get_cpu();
@@ -357,6 +405,13 @@ void flush_thread(void)
357{ 405{
358 struct task_struct *tsk = current; 406 struct task_struct *tsk = current;
359 407
408 /*
409 * Remove function-return probe instances associated with this task
410 * and put them back on the free list. Do not insert an exit probe for
411 * this function, it will be disabled by kprobe_flush_task if you do.
412 */
413 kprobe_flush_task(tsk);
414
360 memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); 415 memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
361 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 416 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
362 /* 417 /*
@@ -508,6 +563,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
508 563
509 dump->u_fpvalid = dump_fpu (regs, &dump->i387); 564 dump->u_fpvalid = dump_fpu (regs, &dump->i387);
510} 565}
566EXPORT_SYMBOL(dump_thread);
511 567
512/* 568/*
513 * Capture the user space registers if the task is not running (in user space) 569 * Capture the user space registers if the task is not running (in user space)
@@ -561,6 +617,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
561} 617}
562 618
563/* 619/*
620 * This function selects if the context switch from prev to next
621 * has to tweak the TSC disable bit in the cr4.
622 */
623static inline void disable_tsc(struct task_struct *prev_p,
624 struct task_struct *next_p)
625{
626 struct thread_info *prev, *next;
627
628 /*
629 * gcc should eliminate the ->thread_info dereference if
630 * has_secure_computing returns 0 at compile time (SECCOMP=n).
631 */
632 prev = prev_p->thread_info;
633 next = next_p->thread_info;
634
635 if (has_secure_computing(prev) || has_secure_computing(next)) {
636 /* slow path here */
637 if (has_secure_computing(prev) &&
638 !has_secure_computing(next)) {
639 write_cr4(read_cr4() & ~X86_CR4_TSD);
640 } else if (!has_secure_computing(prev) &&
641 has_secure_computing(next))
642 write_cr4(read_cr4() | X86_CR4_TSD);
643 }
644}
645
646/*
564 * switch_to(x,yn) should switch tasks from x to y. 647 * switch_to(x,yn) should switch tasks from x to y.
565 * 648 *
566 * We fsave/fwait so that an exception goes off at the right time 649 * We fsave/fwait so that an exception goes off at the right time
@@ -627,18 +710,20 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
627 * Now maybe reload the debug registers 710 * Now maybe reload the debug registers
628 */ 711 */
629 if (unlikely(next->debugreg[7])) { 712 if (unlikely(next->debugreg[7])) {
630 loaddebug(next, 0); 713 set_debugreg(current->thread.debugreg[0], 0);
631 loaddebug(next, 1); 714 set_debugreg(current->thread.debugreg[1], 1);
632 loaddebug(next, 2); 715 set_debugreg(current->thread.debugreg[2], 2);
633 loaddebug(next, 3); 716 set_debugreg(current->thread.debugreg[3], 3);
634 /* no 4 and 5 */ 717 /* no 4 and 5 */
635 loaddebug(next, 6); 718 set_debugreg(current->thread.debugreg[6], 6);
636 loaddebug(next, 7); 719 set_debugreg(current->thread.debugreg[7], 7);
637 } 720 }
638 721
639 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) 722 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
640 handle_io_bitmap(next, tss); 723 handle_io_bitmap(next, tss);
641 724
725 disable_tsc(prev_p, next_p);
726
642 return prev_p; 727 return prev_p;
643} 728}
644 729
@@ -731,6 +816,7 @@ unsigned long get_wchan(struct task_struct *p)
731 } while (count++ < 16); 816 } while (count++ < 16);
732 return 0; 817 return 0;
733} 818}
819EXPORT_SYMBOL(get_wchan);
734 820
735/* 821/*
736 * sys_alloc_thread_area: get a yet unused TLS descriptor index. 822 * sys_alloc_thread_area: get a yet unused TLS descriptor index.
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index e34f651fa13c..0da59b42843c 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -668,7 +668,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
668 info.si_code = TRAP_BRKPT; 668 info.si_code = TRAP_BRKPT;
669 669
670 /* User-mode eip? */ 670 /* User-mode eip? */
671 info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; 671 info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL;
672 672
673 /* Send us the fakey SIGTRAP */ 673 /* Send us the fakey SIGTRAP */
674 force_sig_info(SIGTRAP, &info, tsk); 674 force_sig_info(SIGTRAP, &info, tsk);
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 6dc27eb70ee7..b3e584849961 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -2,6 +2,7 @@
2 * linux/arch/i386/kernel/reboot.c 2 * linux/arch/i386/kernel/reboot.c
3 */ 3 */
4 4
5#include <linux/config.h>
5#include <linux/mm.h> 6#include <linux/mm.h>
6#include <linux/module.h> 7#include <linux/module.h>
7#include <linux/delay.h> 8#include <linux/delay.h>
@@ -19,12 +20,12 @@
19 * Power off function, if any 20 * Power off function, if any
20 */ 21 */
21void (*pm_power_off)(void); 22void (*pm_power_off)(void);
23EXPORT_SYMBOL(pm_power_off);
22 24
23static int reboot_mode; 25static int reboot_mode;
24static int reboot_thru_bios; 26static int reboot_thru_bios;
25 27
26#ifdef CONFIG_SMP 28#ifdef CONFIG_SMP
27int reboot_smp = 0;
28static int reboot_cpu = -1; 29static int reboot_cpu = -1;
29/* shamelessly grabbed from lib/vsprintf.c for readability */ 30/* shamelessly grabbed from lib/vsprintf.c for readability */
30#define is_digit(c) ((c) >= '0' && (c) <= '9') 31#define is_digit(c) ((c) >= '0' && (c) <= '9')
@@ -47,7 +48,6 @@ static int __init reboot_setup(char *str)
47 break; 48 break;
48#ifdef CONFIG_SMP 49#ifdef CONFIG_SMP
49 case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ 50 case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
50 reboot_smp = 1;
51 if (is_digit(*(str+1))) { 51 if (is_digit(*(str+1))) {
52 reboot_cpu = (int) (*(str+1) - '0'); 52 reboot_cpu = (int) (*(str+1) - '0');
53 if (is_digit(*(str+2))) 53 if (is_digit(*(str+2)))
@@ -86,33 +86,9 @@ static int __init set_bios_reboot(struct dmi_system_id *d)
86 return 0; 86 return 0;
87} 87}
88 88
89/*
90 * Some machines require the "reboot=s" commandline option, this quirk makes that automatic.
91 */
92static int __init set_smp_reboot(struct dmi_system_id *d)
93{
94#ifdef CONFIG_SMP
95 if (!reboot_smp) {
96 reboot_smp = 1;
97 printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident);
98 }
99#endif
100 return 0;
101}
102
103/*
104 * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic.
105 */
106static int __init set_smp_bios_reboot(struct dmi_system_id *d)
107{
108 set_smp_reboot(d);
109 set_bios_reboot(d);
110 return 0;
111}
112
113static struct dmi_system_id __initdata reboot_dmi_table[] = { 89static struct dmi_system_id __initdata reboot_dmi_table[] = {
114 { /* Handle problems with rebooting on Dell 1300's */ 90 { /* Handle problems with rebooting on Dell 1300's */
115 .callback = set_smp_bios_reboot, 91 .callback = set_bios_reboot,
116 .ident = "Dell PowerEdge 1300", 92 .ident = "Dell PowerEdge 1300",
117 .matches = { 93 .matches = {
118 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), 94 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
@@ -295,42 +271,36 @@ void machine_real_restart(unsigned char *code, int length)
295 : 271 :
296 : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); 272 : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
297} 273}
274#ifdef CONFIG_APM_MODULE
275EXPORT_SYMBOL(machine_real_restart);
276#endif
298 277
299void machine_restart(char * __unused) 278void machine_shutdown(void)
300{ 279{
301#ifdef CONFIG_SMP 280#ifdef CONFIG_SMP
302 int cpuid; 281 int reboot_cpu_id;
303 282
304 cpuid = GET_APIC_ID(apic_read(APIC_ID)); 283 /* The boot cpu is always logical cpu 0 */
305 284 reboot_cpu_id = 0;
306 if (reboot_smp) { 285
307 286 /* See if there has been given a command line override */
308 /* check to see if reboot_cpu is valid 287 if ((reboot_cpu_id != -1) && (reboot_cpu < NR_CPUS) &&
309 if its not, default to the BSP */ 288 cpu_isset(reboot_cpu, cpu_online_map)) {
310 if ((reboot_cpu == -1) || 289 reboot_cpu_id = reboot_cpu;
311 (reboot_cpu > (NR_CPUS -1)) ||
312 !physid_isset(cpuid, phys_cpu_present_map))
313 reboot_cpu = boot_cpu_physical_apicid;
314
315 reboot_smp = 0; /* use this as a flag to only go through this once*/
316 /* re-run this function on the other CPUs
317 it will fall though this section since we have
318 cleared reboot_smp, and do the reboot if it is the
319 correct CPU, otherwise it halts. */
320 if (reboot_cpu != cpuid)
321 smp_call_function((void *)machine_restart , NULL, 1, 0);
322 } 290 }
323 291
324 /* if reboot_cpu is still -1, then we want a tradional reboot, 292 /* Make certain the cpu I'm rebooting on is online */
325 and if we are not running on the reboot_cpu,, halt */ 293 if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
326 if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) { 294 reboot_cpu_id = smp_processor_id();
327 for (;;)
328 __asm__ __volatile__ ("hlt");
329 } 295 }
330 /* 296
331 * Stop all CPUs and turn off local APICs and the IO-APIC, so 297 /* Make certain I only run on the appropriate processor */
332 * other OSs see a clean IRQ state. 298 set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
299
300 /* O.K. Now that I'm on the appropriate processor, stop
301 * all of the others, and disable their local APICs.
333 */ 302 */
303
334 smp_send_stop(); 304 smp_send_stop();
335#endif /* CONFIG_SMP */ 305#endif /* CONFIG_SMP */
336 306
@@ -339,6 +309,11 @@ void machine_restart(char * __unused)
339#ifdef CONFIG_X86_IO_APIC 309#ifdef CONFIG_X86_IO_APIC
340 disable_IO_APIC(); 310 disable_IO_APIC();
341#endif 311#endif
312}
313
314void machine_restart(char * __unused)
315{
316 machine_shutdown();
342 317
343 if (!reboot_thru_bios) { 318 if (!reboot_thru_bios) {
344 if (efi_enabled) { 319 if (efi_enabled) {
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..d312616effa1
--- /dev/null
+++ b/arch/i386/kernel/relocate_kernel.S
@@ -0,0 +1,120 @@
1/*
2 * relocate_kernel.S - put the kernel image in place to boot
3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
4 *
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
7 */
8
9#include <linux/linkage.h>
10
11 /*
12 * Must be relocatable PIC code callable as a C function, that once
13 * it starts can not use the previous processes stack.
14 */
15 .globl relocate_new_kernel
16relocate_new_kernel:
17 /* read the arguments and say goodbye to the stack */
18 movl 4(%esp), %ebx /* page_list */
19 movl 8(%esp), %ebp /* reboot_code_buffer */
20 movl 12(%esp), %edx /* start address */
21 movl 16(%esp), %ecx /* cpu_has_pae */
22
23 /* zero out flags, and disable interrupts */
24 pushl $0
25 popfl
26
27 /* set a new stack at the bottom of our page... */
28 lea 4096(%ebp), %esp
29
30 /* store the parameters back on the stack */
31 pushl %edx /* store the start address */
32
33 /* Set cr0 to a known state:
34 * 31 0 == Paging disabled
35 * 18 0 == Alignment check disabled
36 * 16 0 == Write protect disabled
37 * 3 0 == No task switch
38 * 2 0 == Don't do FP software emulation.
39 * 0 1 == Proctected mode enabled
40 */
41 movl %cr0, %eax
42 andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
43 orl $(1<<0), %eax
44 movl %eax, %cr0
45
46 /* clear cr4 if applicable */
47 testl %ecx, %ecx
48 jz 1f
49 /* Set cr4 to a known state:
50 * Setting everything to zero seems safe.
51 */
52 movl %cr4, %eax
53 andl $0, %eax
54 movl %eax, %cr4
55
56 jmp 1f
571:
58
59 /* Flush the TLB (needed?) */
60 xorl %eax, %eax
61 movl %eax, %cr3
62
63 /* Do the copies */
64 movl %ebx, %ecx
65 jmp 1f
66
670: /* top, read another word from the indirection page */
68 movl (%ebx), %ecx
69 addl $4, %ebx
701:
71 testl $0x1, %ecx /* is it a destination page */
72 jz 2f
73 movl %ecx, %edi
74 andl $0xfffff000, %edi
75 jmp 0b
762:
77 testl $0x2, %ecx /* is it an indirection page */
78 jz 2f
79 movl %ecx, %ebx
80 andl $0xfffff000, %ebx
81 jmp 0b
822:
83 testl $0x4, %ecx /* is it the done indicator */
84 jz 2f
85 jmp 3f
862:
87 testl $0x8, %ecx /* is it the source indicator */
88 jz 0b /* Ignore it otherwise */
89 movl %ecx, %esi /* For every source page do a copy */
90 andl $0xfffff000, %esi
91
92 movl $1024, %ecx
93 rep ; movsl
94 jmp 0b
95
963:
97
98 /* To be certain of avoiding problems with self-modifying code
99 * I need to execute a serializing instruction here.
100 * So I flush the TLB, it's handy, and not processor dependent.
101 */
102 xorl %eax, %eax
103 movl %eax, %cr3
104
105 /* set all of the registers to known values */
106 /* leave %esp alone */
107
108 xorl %eax, %eax
109 xorl %ebx, %ebx
110 xorl %ecx, %ecx
111 xorl %edx, %edx
112 xorl %esi, %esi
113 xorl %edi, %edi
114 xorl %ebp, %ebp
115 ret
116relocate_new_kernel_end:
117
118 .globl relocate_new_kernel_size
119relocate_new_kernel_size:
120 .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 2bfbddebdbf8..7306353c520e 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -23,8 +23,10 @@
23 * This file handles the architecture-dependent parts of initialization 23 * This file handles the architecture-dependent parts of initialization
24 */ 24 */
25 25
26#include <linux/config.h>
26#include <linux/sched.h> 27#include <linux/sched.h>
27#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/mmzone.h>
28#include <linux/tty.h> 30#include <linux/tty.h>
29#include <linux/ioport.h> 31#include <linux/ioport.h>
30#include <linux/acpi.h> 32#include <linux/acpi.h>
@@ -41,7 +43,12 @@
41#include <linux/init.h> 43#include <linux/init.h>
42#include <linux/edd.h> 44#include <linux/edd.h>
43#include <linux/nodemask.h> 45#include <linux/nodemask.h>
46#include <linux/kexec.h>
47#include <linux/crash_dump.h>
48
44#include <video/edid.h> 49#include <video/edid.h>
50
51#include <asm/apic.h>
45#include <asm/e820.h> 52#include <asm/e820.h>
46#include <asm/mpspec.h> 53#include <asm/mpspec.h>
47#include <asm/setup.h> 54#include <asm/setup.h>
@@ -53,12 +60,15 @@
53#include "setup_arch_pre.h" 60#include "setup_arch_pre.h"
54#include <bios_ebda.h> 61#include <bios_ebda.h>
55 62
63/* Forward Declaration. */
64void __init find_max_pfn(void);
65
56/* This value is set up by the early boot code to point to the value 66/* This value is set up by the early boot code to point to the value
57 immediately after the boot time page tables. It contains a *physical* 67 immediately after the boot time page tables. It contains a *physical*
58 address, and must not be in the .bss segment! */ 68 address, and must not be in the .bss segment! */
59unsigned long init_pg_tables_end __initdata = ~0UL; 69unsigned long init_pg_tables_end __initdata = ~0UL;
60 70
61int disable_pse __initdata = 0; 71int disable_pse __devinitdata = 0;
62 72
63/* 73/*
64 * Machine setup.. 74 * Machine setup..
@@ -73,6 +83,7 @@ EXPORT_SYMBOL(efi_enabled);
73struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; 83struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
74/* common cpu data for all cpus */ 84/* common cpu data for all cpus */
75struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; 85struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
86EXPORT_SYMBOL(boot_cpu_data);
76 87
77unsigned long mmu_cr4_features; 88unsigned long mmu_cr4_features;
78 89
@@ -90,12 +101,18 @@ extern acpi_interrupt_flags acpi_sci_flags;
90 101
91/* for MCA, but anyone else can use it if they want */ 102/* for MCA, but anyone else can use it if they want */
92unsigned int machine_id; 103unsigned int machine_id;
104#ifdef CONFIG_MCA
105EXPORT_SYMBOL(machine_id);
106#endif
93unsigned int machine_submodel_id; 107unsigned int machine_submodel_id;
94unsigned int BIOS_revision; 108unsigned int BIOS_revision;
95unsigned int mca_pentium_flag; 109unsigned int mca_pentium_flag;
96 110
97/* For PCI or other memory-mapped resources */ 111/* For PCI or other memory-mapped resources */
98unsigned long pci_mem_start = 0x10000000; 112unsigned long pci_mem_start = 0x10000000;
113#ifdef CONFIG_PCI
114EXPORT_SYMBOL(pci_mem_start);
115#endif
99 116
100/* Boot loader ID as an integer, for the benefit of proc_dointvec */ 117/* Boot loader ID as an integer, for the benefit of proc_dointvec */
101int bootloader_type; 118int bootloader_type;
@@ -107,14 +124,26 @@ static unsigned int highmem_pages = -1;
107 * Setup options 124 * Setup options
108 */ 125 */
109struct drive_info_struct { char dummy[32]; } drive_info; 126struct drive_info_struct { char dummy[32]; } drive_info;
127#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
128 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
129EXPORT_SYMBOL(drive_info);
130#endif
110struct screen_info screen_info; 131struct screen_info screen_info;
132#ifdef CONFIG_VT
133EXPORT_SYMBOL(screen_info);
134#endif
111struct apm_info apm_info; 135struct apm_info apm_info;
136EXPORT_SYMBOL(apm_info);
112struct sys_desc_table_struct { 137struct sys_desc_table_struct {
113 unsigned short length; 138 unsigned short length;
114 unsigned char table[0]; 139 unsigned char table[0];
115}; 140};
116struct edid_info edid_info; 141struct edid_info edid_info;
117struct ist_info ist_info; 142struct ist_info ist_info;
143#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
144 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
145EXPORT_SYMBOL(ist_info);
146#endif
118struct e820map e820; 147struct e820map e820;
119 148
120extern void early_cpu_init(void); 149extern void early_cpu_init(void);
@@ -711,6 +740,15 @@ static void __init parse_cmdline_early (char ** cmdline_p)
711 if (to != command_line) 740 if (to != command_line)
712 to--; 741 to--;
713 if (!memcmp(from+7, "exactmap", 8)) { 742 if (!memcmp(from+7, "exactmap", 8)) {
743#ifdef CONFIG_CRASH_DUMP
744 /* If we are doing a crash dump, we
745 * still need to know the real mem
746 * size before original memory map is
747 * reset.
748 */
749 find_max_pfn();
750 saved_max_pfn = max_pfn;
751#endif
714 from += 8+7; 752 from += 8+7;
715 e820.nr_map = 0; 753 e820.nr_map = 0;
716 userdef = 1; 754 userdef = 1;
@@ -814,6 +852,44 @@ static void __init parse_cmdline_early (char ** cmdline_p)
814#endif /* CONFIG_X86_LOCAL_APIC */ 852#endif /* CONFIG_X86_LOCAL_APIC */
815#endif /* CONFIG_ACPI_BOOT */ 853#endif /* CONFIG_ACPI_BOOT */
816 854
855#ifdef CONFIG_X86_LOCAL_APIC
856 /* enable local APIC */
857 else if (!memcmp(from, "lapic", 5))
858 lapic_enable();
859
860 /* disable local APIC */
861 else if (!memcmp(from, "nolapic", 6))
862 lapic_disable();
863#endif /* CONFIG_X86_LOCAL_APIC */
864
865#ifdef CONFIG_KEXEC
866 /* crashkernel=size@addr specifies the location to reserve for
867 * a crash kernel. By reserving this memory we guarantee
868 * that linux never set's it up as a DMA target.
869 * Useful for holding code to do something appropriate
870 * after a kernel panic.
871 */
872 else if (!memcmp(from, "crashkernel=", 12)) {
873 unsigned long size, base;
874 size = memparse(from+12, &from);
875 if (*from == '@') {
876 base = memparse(from+1, &from);
877 /* FIXME: Do I want a sanity check
878 * to validate the memory range?
879 */
880 crashk_res.start = base;
881 crashk_res.end = base + size - 1;
882 }
883 }
884#endif
885#ifdef CONFIG_CRASH_DUMP
886 /* elfcorehdr= specifies the location of elf core header
887 * stored by the crashed kernel.
888 */
889 else if (!memcmp(from, "elfcorehdr=", 11))
890 elfcorehdr_addr = memparse(from+11, &from);
891#endif
892
817 /* 893 /*
818 * highmem=size forces highmem to be exactly 'size' bytes. 894 * highmem=size forces highmem to be exactly 'size' bytes.
819 * This works even on boxes that have no highmem otherwise. 895 * This works even on boxes that have no highmem otherwise.
@@ -1022,7 +1098,7 @@ static void __init reserve_ebda_region(void)
1022 reserve_bootmem(addr, PAGE_SIZE); 1098 reserve_bootmem(addr, PAGE_SIZE);
1023} 1099}
1024 1100
1025#ifndef CONFIG_DISCONTIGMEM 1101#ifndef CONFIG_NEED_MULTIPLE_NODES
1026void __init setup_bootmem_allocator(void); 1102void __init setup_bootmem_allocator(void);
1027static unsigned long __init setup_memory(void) 1103static unsigned long __init setup_memory(void)
1028{ 1104{
@@ -1072,9 +1148,9 @@ void __init zone_sizes_init(void)
1072 free_area_init(zones_size); 1148 free_area_init(zones_size);
1073} 1149}
1074#else 1150#else
1075extern unsigned long setup_memory(void); 1151extern unsigned long __init setup_memory(void);
1076extern void zone_sizes_init(void); 1152extern void zone_sizes_init(void);
1077#endif /* !CONFIG_DISCONTIGMEM */ 1153#endif /* !CONFIG_NEED_MULTIPLE_NODES */
1078 1154
1079void __init setup_bootmem_allocator(void) 1155void __init setup_bootmem_allocator(void)
1080{ 1156{
@@ -1092,8 +1168,8 @@ void __init setup_bootmem_allocator(void)
1092 * the (very unlikely) case of us accidentally initializing the 1168 * the (very unlikely) case of us accidentally initializing the
1093 * bootmem allocator with an invalid RAM area. 1169 * bootmem allocator with an invalid RAM area.
1094 */ 1170 */
1095 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) + 1171 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1096 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); 1172 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1097 1173
1098 /* 1174 /*
1099 * reserve physical page 0 - it's a special BIOS page on many boxes, 1175 * reserve physical page 0 - it's a special BIOS page on many boxes,
@@ -1149,6 +1225,11 @@ void __init setup_bootmem_allocator(void)
1149 } 1225 }
1150 } 1226 }
1151#endif 1227#endif
1228#ifdef CONFIG_KEXEC
1229 if (crashk_res.start != crashk_res.end)
1230 reserve_bootmem(crashk_res.start,
1231 crashk_res.end - crashk_res.start + 1);
1232#endif
1152} 1233}
1153 1234
1154/* 1235/*
@@ -1202,6 +1283,9 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
1202 */ 1283 */
1203 request_resource(res, code_resource); 1284 request_resource(res, code_resource);
1204 request_resource(res, data_resource); 1285 request_resource(res, data_resource);
1286#ifdef CONFIG_KEXEC
1287 request_resource(res, &crashk_res);
1288#endif
1205 } 1289 }
1206 } 1290 }
1207} 1291}
@@ -1475,6 +1559,7 @@ void __init setup_arch(char **cmdline_p)
1475#endif 1559#endif
1476 paging_init(); 1560 paging_init();
1477 remapped_pgdat_init(); 1561 remapped_pgdat_init();
1562 sparse_init();
1478 zone_sizes_init(); 1563 zone_sizes_init();
1479 1564
1480 /* 1565 /*
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index ea46d028af08..89ef7adc63a4 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -346,8 +346,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
346extern void __user __kernel_sigreturn; 346extern void __user __kernel_sigreturn;
347extern void __user __kernel_rt_sigreturn; 347extern void __user __kernel_rt_sigreturn;
348 348
349static void setup_frame(int sig, struct k_sigaction *ka, 349static int setup_frame(int sig, struct k_sigaction *ka,
350 sigset_t *set, struct pt_regs * regs) 350 sigset_t *set, struct pt_regs * regs)
351{ 351{
352 void __user *restorer; 352 void __user *restorer;
353 struct sigframe __user *frame; 353 struct sigframe __user *frame;
@@ -429,13 +429,14 @@ static void setup_frame(int sig, struct k_sigaction *ka,
429 current->comm, current->pid, frame, regs->eip, frame->pretcode); 429 current->comm, current->pid, frame, regs->eip, frame->pretcode);
430#endif 430#endif
431 431
432 return; 432 return 1;
433 433
434give_sigsegv: 434give_sigsegv:
435 force_sigsegv(sig, current); 435 force_sigsegv(sig, current);
436 return 0;
436} 437}
437 438
438static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 439static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
439 sigset_t *set, struct pt_regs * regs) 440 sigset_t *set, struct pt_regs * regs)
440{ 441{
441 void __user *restorer; 442 void __user *restorer;
@@ -522,20 +523,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
522 current->comm, current->pid, frame, regs->eip, frame->pretcode); 523 current->comm, current->pid, frame, regs->eip, frame->pretcode);
523#endif 524#endif
524 525
525 return; 526 return 1;
526 527
527give_sigsegv: 528give_sigsegv:
528 force_sigsegv(sig, current); 529 force_sigsegv(sig, current);
530 return 0;
529} 531}
530 532
531/* 533/*
532 * OK, we're invoking a handler 534 * OK, we're invoking a handler
533 */ 535 */
534 536
535static void 537static int
536handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, 538handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
537 sigset_t *oldset, struct pt_regs * regs) 539 sigset_t *oldset, struct pt_regs * regs)
538{ 540{
541 int ret;
542
539 /* Are we from a system call? */ 543 /* Are we from a system call? */
540 if (regs->orig_eax >= 0) { 544 if (regs->orig_eax >= 0) {
541 /* If so, check system call restarting.. */ 545 /* If so, check system call restarting.. */
@@ -569,17 +573,19 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
569 573
570 /* Set up the stack frame */ 574 /* Set up the stack frame */
571 if (ka->sa.sa_flags & SA_SIGINFO) 575 if (ka->sa.sa_flags & SA_SIGINFO)
572 setup_rt_frame(sig, ka, info, oldset, regs); 576 ret = setup_rt_frame(sig, ka, info, oldset, regs);
573 else 577 else
574 setup_frame(sig, ka, oldset, regs); 578 ret = setup_frame(sig, ka, oldset, regs);
575 579
576 if (!(ka->sa.sa_flags & SA_NODEFER)) { 580 if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
577 spin_lock_irq(&current->sighand->siglock); 581 spin_lock_irq(&current->sighand->siglock);
578 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); 582 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
579 sigaddset(&current->blocked,sig); 583 sigaddset(&current->blocked,sig);
580 recalc_sigpending(); 584 recalc_sigpending();
581 spin_unlock_irq(&current->sighand->siglock); 585 spin_unlock_irq(&current->sighand->siglock);
582 } 586 }
587
588 return ret;
583} 589}
584 590
585/* 591/*
@@ -599,13 +605,11 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
599 * kernel mode. Just return without doing anything 605 * kernel mode. Just return without doing anything
600 * if so. 606 * if so.
601 */ 607 */
602 if ((regs->xcs & 3) != 3) 608 if (!user_mode(regs))
603 return 1; 609 return 1;
604 610
605 if (current->flags & PF_FREEZE) { 611 if (try_to_freeze())
606 refrigerator(0);
607 goto no_signal; 612 goto no_signal;
608 }
609 613
610 if (!oldset) 614 if (!oldset)
611 oldset = &current->blocked; 615 oldset = &current->blocked;
@@ -618,12 +622,11 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
618 * inside the kernel. 622 * inside the kernel.
619 */ 623 */
620 if (unlikely(current->thread.debugreg[7])) { 624 if (unlikely(current->thread.debugreg[7])) {
621 loaddebug(&current->thread, 7); 625 set_debugreg(current->thread.debugreg[7], 7);
622 } 626 }
623 627
624 /* Whee! Actually deliver the signal. */ 628 /* Whee! Actually deliver the signal. */
625 handle_signal(signr, &info, &ka, oldset, regs); 629 return handle_signal(signr, &info, &ka, oldset, regs);
626 return 1;
627 } 630 }
628 631
629 no_signal: 632 no_signal:
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 6223c33ac91c..cec4bde67161 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -19,6 +19,8 @@
19#include <linux/mc146818rtc.h> 19#include <linux/mc146818rtc.h>
20#include <linux/cache.h> 20#include <linux/cache.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/cpu.h>
23#include <linux/module.h>
22 24
23#include <asm/mtrr.h> 25#include <asm/mtrr.h>
24#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
@@ -163,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
163 unsigned long flags; 165 unsigned long flags;
164 166
165 local_irq_save(flags); 167 local_irq_save(flags);
166 168 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
167 /* 169 /*
168 * Wait for idle. 170 * Wait for idle.
169 */ 171 */
@@ -345,21 +347,21 @@ out:
345static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 347static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
346 unsigned long va) 348 unsigned long va)
347{ 349{
348 cpumask_t tmp;
349 /* 350 /*
350 * A couple of (to be removed) sanity checks: 351 * A couple of (to be removed) sanity checks:
351 * 352 *
352 * - we do not send IPIs to not-yet booted CPUs.
353 * - current CPU must not be in mask 353 * - current CPU must not be in mask
354 * - mask must exist :) 354 * - mask must exist :)
355 */ 355 */
356 BUG_ON(cpus_empty(cpumask)); 356 BUG_ON(cpus_empty(cpumask));
357
358 cpus_and(tmp, cpumask, cpu_online_map);
359 BUG_ON(!cpus_equal(cpumask, tmp));
360 BUG_ON(cpu_isset(smp_processor_id(), cpumask)); 357 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
361 BUG_ON(!mm); 358 BUG_ON(!mm);
362 359
360 /* If a CPU which we ran on has gone down, OK. */
361 cpus_and(cpumask, cpumask, cpu_online_map);
362 if (cpus_empty(cpumask))
363 return;
364
363 /* 365 /*
364 * i'm not happy about this global shared spinlock in the 366 * i'm not happy about this global shared spinlock in the
365 * MM hot path, but we'll see how contended it is. 367 * MM hot path, but we'll see how contended it is.
@@ -452,6 +454,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
452 454
453 preempt_enable(); 455 preempt_enable();
454} 456}
457EXPORT_SYMBOL(flush_tlb_page);
455 458
456static void do_flush_tlb_all(void* info) 459static void do_flush_tlb_all(void* info)
457{ 460{
@@ -474,6 +477,7 @@ void flush_tlb_all(void)
474 */ 477 */
475void smp_send_reschedule(int cpu) 478void smp_send_reschedule(int cpu)
476{ 479{
480 WARN_ON(cpu_is_offline(cpu));
477 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 481 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
478} 482}
479 483
@@ -491,6 +495,16 @@ struct call_data_struct {
491 int wait; 495 int wait;
492}; 496};
493 497
498void lock_ipi_call_lock(void)
499{
500 spin_lock_irq(&call_lock);
501}
502
503void unlock_ipi_call_lock(void)
504{
505 spin_unlock_irq(&call_lock);
506}
507
494static struct call_data_struct * call_data; 508static struct call_data_struct * call_data;
495 509
496/* 510/*
@@ -514,10 +528,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
514 */ 528 */
515{ 529{
516 struct call_data_struct data; 530 struct call_data_struct data;
517 int cpus = num_online_cpus()-1; 531 int cpus;
518 532
519 if (!cpus) 533 /* Holding any lock stops cpus from going down. */
534 spin_lock(&call_lock);
535 cpus = num_online_cpus() - 1;
536 if (!cpus) {
537 spin_unlock(&call_lock);
520 return 0; 538 return 0;
539 }
521 540
522 /* Can deadlock when called with interrupts disabled */ 541 /* Can deadlock when called with interrupts disabled */
523 WARN_ON(irqs_disabled()); 542 WARN_ON(irqs_disabled());
@@ -529,7 +548,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
529 if (wait) 548 if (wait)
530 atomic_set(&data.finished, 0); 549 atomic_set(&data.finished, 0);
531 550
532 spin_lock(&call_lock);
533 call_data = &data; 551 call_data = &data;
534 mb(); 552 mb();
535 553
@@ -547,6 +565,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
547 565
548 return 0; 566 return 0;
549} 567}
568EXPORT_SYMBOL(smp_call_function);
550 569
551static void stop_this_cpu (void * dummy) 570static void stop_this_cpu (void * dummy)
552{ 571{
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index bc1bb6919e6a..d66bf489a2e9 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -44,6 +44,9 @@
44#include <linux/smp_lock.h> 44#include <linux/smp_lock.h>
45#include <linux/irq.h> 45#include <linux/irq.h>
46#include <linux/bootmem.h> 46#include <linux/bootmem.h>
47#include <linux/notifier.h>
48#include <linux/cpu.h>
49#include <linux/percpu.h>
47 50
48#include <linux/delay.h> 51#include <linux/delay.h>
49#include <linux/mc146818rtc.h> 52#include <linux/mc146818rtc.h>
@@ -56,24 +59,46 @@
56#include <smpboot_hooks.h> 59#include <smpboot_hooks.h>
57 60
58/* Set if we find a B stepping CPU */ 61/* Set if we find a B stepping CPU */
59static int __initdata smp_b_stepping; 62static int __devinitdata smp_b_stepping;
60 63
61/* Number of siblings per CPU package */ 64/* Number of siblings per CPU package */
62int smp_num_siblings = 1; 65int smp_num_siblings = 1;
63int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ 66#ifdef CONFIG_X86_HT
67EXPORT_SYMBOL(smp_num_siblings);
68#endif
69
70/* Package ID of each logical CPU */
71int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID};
64EXPORT_SYMBOL(phys_proc_id); 72EXPORT_SYMBOL(phys_proc_id);
65int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ 73
74/* Core ID of each logical CPU */
75int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID};
66EXPORT_SYMBOL(cpu_core_id); 76EXPORT_SYMBOL(cpu_core_id);
67 77
78cpumask_t cpu_sibling_map[NR_CPUS];
79EXPORT_SYMBOL(cpu_sibling_map);
80
81cpumask_t cpu_core_map[NR_CPUS];
82EXPORT_SYMBOL(cpu_core_map);
83
68/* bitmap of online cpus */ 84/* bitmap of online cpus */
69cpumask_t cpu_online_map; 85cpumask_t cpu_online_map;
86EXPORT_SYMBOL(cpu_online_map);
70 87
71cpumask_t cpu_callin_map; 88cpumask_t cpu_callin_map;
72cpumask_t cpu_callout_map; 89cpumask_t cpu_callout_map;
90EXPORT_SYMBOL(cpu_callout_map);
73static cpumask_t smp_commenced_mask; 91static cpumask_t smp_commenced_mask;
74 92
93/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
94 * is no way to resync one AP against BP. TBD: for prescott and above, we
95 * should use IA64's algorithm
96 */
97static int __devinitdata tsc_sync_disabled;
98
75/* Per CPU bogomips and other parameters */ 99/* Per CPU bogomips and other parameters */
76struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 100struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
101EXPORT_SYMBOL(cpu_data);
77 102
78u8 x86_cpu_to_apicid[NR_CPUS] = 103u8 x86_cpu_to_apicid[NR_CPUS] =
79 { [0 ... NR_CPUS-1] = 0xff }; 104 { [0 ... NR_CPUS-1] = 0xff };
@@ -90,13 +115,16 @@ static int trampoline_exec;
90 115
91static void map_cpu_to_logical_apicid(void); 116static void map_cpu_to_logical_apicid(void);
92 117
118/* State of each CPU. */
119DEFINE_PER_CPU(int, cpu_state) = { 0 };
120
93/* 121/*
94 * Currently trivial. Write the real->protected mode 122 * Currently trivial. Write the real->protected mode
95 * bootstrap into the page concerned. The caller 123 * bootstrap into the page concerned. The caller
96 * has made sure it's suitably aligned. 124 * has made sure it's suitably aligned.
97 */ 125 */
98 126
99static unsigned long __init setup_trampoline(void) 127static unsigned long __devinit setup_trampoline(void)
100{ 128{
101 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); 129 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
102 return virt_to_phys(trampoline_base); 130 return virt_to_phys(trampoline_base);
@@ -126,7 +154,7 @@ void __init smp_alloc_memory(void)
126 * a given CPU 154 * a given CPU
127 */ 155 */
128 156
129static void __init smp_store_cpu_info(int id) 157static void __devinit smp_store_cpu_info(int id)
130{ 158{
131 struct cpuinfo_x86 *c = cpu_data + id; 159 struct cpuinfo_x86 *c = cpu_data + id;
132 160
@@ -199,7 +227,7 @@ static void __init synchronize_tsc_bp (void)
199 unsigned long long t0; 227 unsigned long long t0;
200 unsigned long long sum, avg; 228 unsigned long long sum, avg;
201 long long delta; 229 long long delta;
202 unsigned long one_usec; 230 unsigned int one_usec;
203 int buggy = 0; 231 int buggy = 0;
204 232
205 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); 233 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
@@ -320,7 +348,7 @@ extern void calibrate_delay(void);
320 348
321static atomic_t init_deasserted; 349static atomic_t init_deasserted;
322 350
323static void __init smp_callin(void) 351static void __devinit smp_callin(void)
324{ 352{
325 int cpuid, phys_id; 353 int cpuid, phys_id;
326 unsigned long timeout; 354 unsigned long timeout;
@@ -405,16 +433,48 @@ static void __init smp_callin(void)
405 /* 433 /*
406 * Synchronize the TSC with the BP 434 * Synchronize the TSC with the BP
407 */ 435 */
408 if (cpu_has_tsc && cpu_khz) 436 if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
409 synchronize_tsc_ap(); 437 synchronize_tsc_ap();
410} 438}
411 439
412static int cpucount; 440static int cpucount;
413 441
442static inline void
443set_cpu_sibling_map(int cpu)
444{
445 int i;
446
447 if (smp_num_siblings > 1) {
448 for (i = 0; i < NR_CPUS; i++) {
449 if (!cpu_isset(i, cpu_callout_map))
450 continue;
451 if (cpu_core_id[cpu] == cpu_core_id[i]) {
452 cpu_set(i, cpu_sibling_map[cpu]);
453 cpu_set(cpu, cpu_sibling_map[i]);
454 }
455 }
456 } else {
457 cpu_set(cpu, cpu_sibling_map[cpu]);
458 }
459
460 if (current_cpu_data.x86_num_cores > 1) {
461 for (i = 0; i < NR_CPUS; i++) {
462 if (!cpu_isset(i, cpu_callout_map))
463 continue;
464 if (phys_proc_id[cpu] == phys_proc_id[i]) {
465 cpu_set(i, cpu_core_map[cpu]);
466 cpu_set(cpu, cpu_core_map[i]);
467 }
468 }
469 } else {
470 cpu_core_map[cpu] = cpu_sibling_map[cpu];
471 }
472}
473
414/* 474/*
415 * Activate a secondary processor. 475 * Activate a secondary processor.
416 */ 476 */
417static void __init start_secondary(void *unused) 477static void __devinit start_secondary(void *unused)
418{ 478{
419 /* 479 /*
420 * Dont put anything before smp_callin(), SMP 480 * Dont put anything before smp_callin(), SMP
@@ -437,7 +497,23 @@ static void __init start_secondary(void *unused)
437 * the local TLBs too. 497 * the local TLBs too.
438 */ 498 */
439 local_flush_tlb(); 499 local_flush_tlb();
500
501 /* This must be done before setting cpu_online_map */
502 set_cpu_sibling_map(raw_smp_processor_id());
503 wmb();
504
505 /*
506 * We need to hold call_lock, so there is no inconsistency
507 * between the time smp_call_function() determines number of
508 * IPI receipients, and the time when the determination is made
509 * for which cpus receive the IPI. Holding this
510 * lock helps us to not include this cpu in a currently in progress
511 * smp_call_function().
512 */
513 lock_ipi_call_lock();
440 cpu_set(smp_processor_id(), cpu_online_map); 514 cpu_set(smp_processor_id(), cpu_online_map);
515 unlock_ipi_call_lock();
516 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
441 517
442 /* We can take interrupts now: we're officially "up". */ 518 /* We can take interrupts now: we're officially "up". */
443 local_irq_enable(); 519 local_irq_enable();
@@ -452,7 +528,7 @@ static void __init start_secondary(void *unused)
452 * from the task structure 528 * from the task structure
453 * This function must not return. 529 * This function must not return.
454 */ 530 */
455void __init initialize_secondary(void) 531void __devinit initialize_secondary(void)
456{ 532{
457 /* 533 /*
458 * We don't actually need to load the full TSS, 534 * We don't actually need to load the full TSS,
@@ -566,7 +642,7 @@ static inline void __inquire_remote_apic(int apicid)
566 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 642 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
567 * won't ... remember to clear down the APIC, etc later. 643 * won't ... remember to clear down the APIC, etc later.
568 */ 644 */
569static int __init 645static int __devinit
570wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) 646wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
571{ 647{
572 unsigned long send_status = 0, accept_status = 0; 648 unsigned long send_status = 0, accept_status = 0;
@@ -612,7 +688,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
612#endif /* WAKE_SECONDARY_VIA_NMI */ 688#endif /* WAKE_SECONDARY_VIA_NMI */
613 689
614#ifdef WAKE_SECONDARY_VIA_INIT 690#ifdef WAKE_SECONDARY_VIA_INIT
615static int __init 691static int __devinit
616wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) 692wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
617{ 693{
618 unsigned long send_status = 0, accept_status = 0; 694 unsigned long send_status = 0, accept_status = 0;
@@ -747,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
747#endif /* WAKE_SECONDARY_VIA_INIT */ 823#endif /* WAKE_SECONDARY_VIA_INIT */
748 824
749extern cpumask_t cpu_initialized; 825extern cpumask_t cpu_initialized;
826static inline int alloc_cpu_id(void)
827{
828 cpumask_t tmp_map;
829 int cpu;
830 cpus_complement(tmp_map, cpu_present_map);
831 cpu = first_cpu(tmp_map);
832 if (cpu >= NR_CPUS)
833 return -ENODEV;
834 return cpu;
835}
836
837#ifdef CONFIG_HOTPLUG_CPU
838static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
839static inline struct task_struct * alloc_idle_task(int cpu)
840{
841 struct task_struct *idle;
842
843 if ((idle = cpu_idle_tasks[cpu]) != NULL) {
844 /* initialize thread_struct. we really want to avoid destroy
845 * idle tread
846 */
847 idle->thread.esp = (unsigned long)(((struct pt_regs *)
848 (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
849 init_idle(idle, cpu);
850 return idle;
851 }
852 idle = fork_idle(cpu);
750 853
751static int __init do_boot_cpu(int apicid) 854 if (!IS_ERR(idle))
855 cpu_idle_tasks[cpu] = idle;
856 return idle;
857}
858#else
859#define alloc_idle_task(cpu) fork_idle(cpu)
860#endif
861
862static int __devinit do_boot_cpu(int apicid, int cpu)
752/* 863/*
753 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 864 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
754 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 865 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -757,16 +868,17 @@ static int __init do_boot_cpu(int apicid)
757{ 868{
758 struct task_struct *idle; 869 struct task_struct *idle;
759 unsigned long boot_error; 870 unsigned long boot_error;
760 int timeout, cpu; 871 int timeout;
761 unsigned long start_eip; 872 unsigned long start_eip;
762 unsigned short nmi_high = 0, nmi_low = 0; 873 unsigned short nmi_high = 0, nmi_low = 0;
763 874
764 cpu = ++cpucount; 875 ++cpucount;
876
765 /* 877 /*
766 * We can't use kernel_thread since we must avoid to 878 * We can't use kernel_thread since we must avoid to
767 * reschedule the child. 879 * reschedule the child.
768 */ 880 */
769 idle = fork_idle(cpu); 881 idle = alloc_idle_task(cpu);
770 if (IS_ERR(idle)) 882 if (IS_ERR(idle))
771 panic("failed fork for CPU %d", cpu); 883 panic("failed fork for CPU %d", cpu);
772 idle->thread.eip = (unsigned long) start_secondary; 884 idle->thread.eip = (unsigned long) start_secondary;
@@ -833,13 +945,16 @@ static int __init do_boot_cpu(int apicid)
833 inquire_remote_apic(apicid); 945 inquire_remote_apic(apicid);
834 } 946 }
835 } 947 }
836 x86_cpu_to_apicid[cpu] = apicid; 948
837 if (boot_error) { 949 if (boot_error) {
838 /* Try to put things back the way they were before ... */ 950 /* Try to put things back the way they were before ... */
839 unmap_cpu_to_logical_apicid(cpu); 951 unmap_cpu_to_logical_apicid(cpu);
840 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ 952 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
841 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ 953 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
842 cpucount--; 954 cpucount--;
955 } else {
956 x86_cpu_to_apicid[cpu] = apicid;
957 cpu_set(cpu, cpu_present_map);
843 } 958 }
844 959
845 /* mark "stuck" area as not stuck */ 960 /* mark "stuck" area as not stuck */
@@ -848,6 +963,75 @@ static int __init do_boot_cpu(int apicid)
848 return boot_error; 963 return boot_error;
849} 964}
850 965
966#ifdef CONFIG_HOTPLUG_CPU
967void cpu_exit_clear(void)
968{
969 int cpu = raw_smp_processor_id();
970
971 idle_task_exit();
972
973 cpucount --;
974 cpu_uninit();
975 irq_ctx_exit(cpu);
976
977 cpu_clear(cpu, cpu_callout_map);
978 cpu_clear(cpu, cpu_callin_map);
979 cpu_clear(cpu, cpu_present_map);
980
981 cpu_clear(cpu, smp_commenced_mask);
982 unmap_cpu_to_logical_apicid(cpu);
983}
984
985struct warm_boot_cpu_info {
986 struct completion *complete;
987 int apicid;
988 int cpu;
989};
990
991static void __devinit do_warm_boot_cpu(void *p)
992{
993 struct warm_boot_cpu_info *info = p;
994 do_boot_cpu(info->apicid, info->cpu);
995 complete(info->complete);
996}
997
998int __devinit smp_prepare_cpu(int cpu)
999{
1000 DECLARE_COMPLETION(done);
1001 struct warm_boot_cpu_info info;
1002 struct work_struct task;
1003 int apicid, ret;
1004
1005 lock_cpu_hotplug();
1006 apicid = x86_cpu_to_apicid[cpu];
1007 if (apicid == BAD_APICID) {
1008 ret = -ENODEV;
1009 goto exit;
1010 }
1011
1012 info.complete = &done;
1013 info.apicid = apicid;
1014 info.cpu = cpu;
1015 INIT_WORK(&task, do_warm_boot_cpu, &info);
1016
1017 tsc_sync_disabled = 1;
1018
1019 /* init low mem mapping */
1020 memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1021 sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
1022 flush_tlb_all();
1023 schedule_work(&task);
1024 wait_for_completion(&done);
1025
1026 tsc_sync_disabled = 0;
1027 zap_low_mappings();
1028 ret = 0;
1029exit:
1030 unlock_cpu_hotplug();
1031 return ret;
1032}
1033#endif
1034
851static void smp_tune_scheduling (void) 1035static void smp_tune_scheduling (void)
852{ 1036{
853 unsigned long cachesize; /* kB */ 1037 unsigned long cachesize; /* kB */
@@ -885,10 +1069,9 @@ static void smp_tune_scheduling (void)
885static int boot_cpu_logical_apicid; 1069static int boot_cpu_logical_apicid;
886/* Where the IO area was mapped on multiquad, always 0 otherwise */ 1070/* Where the IO area was mapped on multiquad, always 0 otherwise */
887void *xquad_portio; 1071void *xquad_portio;
888 1072#ifdef CONFIG_X86_NUMAQ
889cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; 1073EXPORT_SYMBOL(xquad_portio);
890cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; 1074#endif
891EXPORT_SYMBOL(cpu_core_map);
892 1075
893static void __init smp_boot_cpus(unsigned int max_cpus) 1076static void __init smp_boot_cpus(unsigned int max_cpus)
894{ 1077{
@@ -1001,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1001 if (max_cpus <= cpucount+1) 1184 if (max_cpus <= cpucount+1)
1002 continue; 1185 continue;
1003 1186
1004 if (do_boot_cpu(apicid)) 1187 if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
1005 printk("CPU #%d not responding - cannot use it.\n", 1188 printk("CPU #%d not responding - cannot use it.\n",
1006 apicid); 1189 apicid);
1007 else 1190 else
@@ -1053,44 +1236,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1053 cpus_clear(cpu_core_map[cpu]); 1236 cpus_clear(cpu_core_map[cpu]);
1054 } 1237 }
1055 1238
1056 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1239 cpu_set(0, cpu_sibling_map[0]);
1057 struct cpuinfo_x86 *c = cpu_data + cpu; 1240 cpu_set(0, cpu_core_map[0]);
1058 int siblings = 0;
1059 int i;
1060 if (!cpu_isset(cpu, cpu_callout_map))
1061 continue;
1062
1063 if (smp_num_siblings > 1) {
1064 for (i = 0; i < NR_CPUS; i++) {
1065 if (!cpu_isset(i, cpu_callout_map))
1066 continue;
1067 if (cpu_core_id[cpu] == cpu_core_id[i]) {
1068 siblings++;
1069 cpu_set(i, cpu_sibling_map[cpu]);
1070 }
1071 }
1072 } else {
1073 siblings++;
1074 cpu_set(cpu, cpu_sibling_map[cpu]);
1075 }
1076
1077 if (siblings != smp_num_siblings) {
1078 printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1079 smp_num_siblings = siblings;
1080 }
1081
1082 if (c->x86_num_cores > 1) {
1083 for (i = 0; i < NR_CPUS; i++) {
1084 if (!cpu_isset(i, cpu_callout_map))
1085 continue;
1086 if (phys_proc_id[cpu] == phys_proc_id[i]) {
1087 cpu_set(i, cpu_core_map[cpu]);
1088 }
1089 }
1090 } else {
1091 cpu_core_map[cpu] = cpu_sibling_map[cpu];
1092 }
1093 }
1094 1241
1095 smpboot_setup_io_apic(); 1242 smpboot_setup_io_apic();
1096 1243
@@ -1107,6 +1254,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1107 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ 1254 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1108void __init smp_prepare_cpus(unsigned int max_cpus) 1255void __init smp_prepare_cpus(unsigned int max_cpus)
1109{ 1256{
1257 smp_commenced_mask = cpumask_of_cpu(0);
1258 cpu_callin_map = cpumask_of_cpu(0);
1259 mb();
1110 smp_boot_cpus(max_cpus); 1260 smp_boot_cpus(max_cpus);
1111} 1261}
1112 1262
@@ -1114,23 +1264,98 @@ void __devinit smp_prepare_boot_cpu(void)
1114{ 1264{
1115 cpu_set(smp_processor_id(), cpu_online_map); 1265 cpu_set(smp_processor_id(), cpu_online_map);
1116 cpu_set(smp_processor_id(), cpu_callout_map); 1266 cpu_set(smp_processor_id(), cpu_callout_map);
1267 cpu_set(smp_processor_id(), cpu_present_map);
1268 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
1117} 1269}
1118 1270
1119int __devinit __cpu_up(unsigned int cpu) 1271#ifdef CONFIG_HOTPLUG_CPU
1272static void
1273remove_siblinginfo(int cpu)
1120{ 1274{
1121 /* This only works at boot for x86. See "rewrite" above. */ 1275 int sibling;
1122 if (cpu_isset(cpu, smp_commenced_mask)) { 1276
1123 local_irq_enable(); 1277 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1124 return -ENOSYS; 1278 cpu_clear(cpu, cpu_sibling_map[sibling]);
1279 for_each_cpu_mask(sibling, cpu_core_map[cpu])
1280 cpu_clear(cpu, cpu_core_map[sibling]);
1281 cpus_clear(cpu_sibling_map[cpu]);
1282 cpus_clear(cpu_core_map[cpu]);
1283 phys_proc_id[cpu] = BAD_APICID;
1284 cpu_core_id[cpu] = BAD_APICID;
1285}
1286
1287int __cpu_disable(void)
1288{
1289 cpumask_t map = cpu_online_map;
1290 int cpu = smp_processor_id();
1291
1292 /*
1293 * Perhaps use cpufreq to drop frequency, but that could go
1294 * into generic code.
1295 *
1296 * We won't take down the boot processor on i386 due to some
1297 * interrupts only being able to be serviced by the BSP.
1298 * Especially so if we're not using an IOAPIC -zwane
1299 */
1300 if (cpu == 0)
1301 return -EBUSY;
1302
1303 /* We enable the timer again on the exit path of the death loop */
1304 disable_APIC_timer();
1305 /* Allow any queued timer interrupts to get serviced */
1306 local_irq_enable();
1307 mdelay(1);
1308 local_irq_disable();
1309
1310 remove_siblinginfo(cpu);
1311
1312 cpu_clear(cpu, map);
1313 fixup_irqs(map);
1314 /* It's now safe to remove this processor from the online map */
1315 cpu_clear(cpu, cpu_online_map);
1316 return 0;
1317}
1318
1319void __cpu_die(unsigned int cpu)
1320{
1321 /* We don't do anything here: idle task is faking death itself. */
1322 unsigned int i;
1323
1324 for (i = 0; i < 10; i++) {
1325 /* They ack this in play_dead by setting CPU_DEAD */
1326 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1327 printk ("CPU %d is now offline\n", cpu);
1328 return;
1329 }
1330 current->state = TASK_UNINTERRUPTIBLE;
1331 schedule_timeout(HZ/10);
1125 } 1332 }
1333 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1334}
1335#else /* ... !CONFIG_HOTPLUG_CPU */
1336int __cpu_disable(void)
1337{
1338 return -ENOSYS;
1339}
1126 1340
1341void __cpu_die(unsigned int cpu)
1342{
1343 /* We said "no" in __cpu_disable */
1344 BUG();
1345}
1346#endif /* CONFIG_HOTPLUG_CPU */
1347
1348int __devinit __cpu_up(unsigned int cpu)
1349{
1127 /* In case one didn't come up */ 1350 /* In case one didn't come up */
1128 if (!cpu_isset(cpu, cpu_callin_map)) { 1351 if (!cpu_isset(cpu, cpu_callin_map)) {
1352 printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
1129 local_irq_enable(); 1353 local_irq_enable();
1130 return -EIO; 1354 return -EIO;
1131 } 1355 }
1132 1356
1133 local_irq_enable(); 1357 local_irq_enable();
1358 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1134 /* Unleash the CPU! */ 1359 /* Unleash the CPU! */
1135 cpu_set(cpu, smp_commenced_mask); 1360 cpu_set(cpu, smp_commenced_mask);
1136 while (!cpu_isset(cpu, cpu_online_map)) 1361 while (!cpu_isset(cpu, cpu_online_map))
@@ -1144,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
1144 setup_ioapic_dest(); 1369 setup_ioapic_dest();
1145#endif 1370#endif
1146 zap_low_mappings(); 1371 zap_low_mappings();
1372#ifndef CONFIG_HOTPLUG_CPU
1147 /* 1373 /*
1148 * Disable executability of the SMP trampoline: 1374 * Disable executability of the SMP trampoline:
1149 */ 1375 */
1150 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); 1376 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1377#endif
1151} 1378}
1152 1379
1153void __init smp_intr_init(void) 1380void __init smp_intr_init(void)
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 6cd1ed311f02..3db9a04aec6e 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -251,7 +251,7 @@ ENTRY(sys_call_table)
251 .long sys_io_submit 251 .long sys_io_submit
252 .long sys_io_cancel 252 .long sys_io_cancel
253 .long sys_fadvise64 /* 250 */ 253 .long sys_fadvise64 /* 250 */
254 .long sys_ni_syscall 254 .long sys_set_zone_reclaim
255 .long sys_exit_group 255 .long sys_exit_group
256 .long sys_lookup_dcookie 256 .long sys_lookup_dcookie
257 .long sys_epoll_create 257 .long sys_epoll_create
@@ -283,9 +283,11 @@ ENTRY(sys_call_table)
283 .long sys_mq_timedreceive /* 280 */ 283 .long sys_mq_timedreceive /* 280 */
284 .long sys_mq_notify 284 .long sys_mq_notify
285 .long sys_mq_getsetattr 285 .long sys_mq_getsetattr
286 .long sys_ni_syscall /* reserved for kexec */ 286 .long sys_kexec_load
287 .long sys_waitid 287 .long sys_waitid
288 .long sys_ni_syscall /* 285 */ /* available */ 288 .long sys_ni_syscall /* 285 */ /* available */
289 .long sys_add_key 289 .long sys_add_key
290 .long sys_request_key 290 .long sys_request_key
291 .long sys_keyctl 291 .long sys_keyctl
292 .long sys_ioprio_set
293 .long sys_ioprio_get /* 290 */
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 960d8bd137d0..0bada1870bdf 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -21,11 +21,16 @@
21 21
22extern asmlinkage void sysenter_entry(void); 22extern asmlinkage void sysenter_entry(void);
23 23
24void enable_sep_cpu(void *info) 24void enable_sep_cpu(void)
25{ 25{
26 int cpu = get_cpu(); 26 int cpu = get_cpu();
27 struct tss_struct *tss = &per_cpu(init_tss, cpu); 27 struct tss_struct *tss = &per_cpu(init_tss, cpu);
28 28
29 if (!boot_cpu_has(X86_FEATURE_SEP)) {
30 put_cpu();
31 return;
32 }
33
29 tss->ss1 = __KERNEL_CS; 34 tss->ss1 = __KERNEL_CS;
30 tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; 35 tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
31 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 36 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
@@ -41,7 +46,7 @@ void enable_sep_cpu(void *info)
41extern const char vsyscall_int80_start, vsyscall_int80_end; 46extern const char vsyscall_int80_start, vsyscall_int80_end;
42extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; 47extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
43 48
44static int __init sysenter_setup(void) 49int __init sysenter_setup(void)
45{ 50{
46 void *page = (void *)get_zeroed_page(GFP_ATOMIC); 51 void *page = (void *)get_zeroed_page(GFP_ATOMIC);
47 52
@@ -58,8 +63,5 @@ static int __init sysenter_setup(void)
58 &vsyscall_sysenter_start, 63 &vsyscall_sysenter_start,
59 &vsyscall_sysenter_end - &vsyscall_sysenter_start); 64 &vsyscall_sysenter_end - &vsyscall_sysenter_start);
60 65
61 on_each_cpu(enable_sep_cpu, NULL, 1, 1);
62 return 0; 66 return 0;
63} 67}
64
65__initcall(sysenter_setup);
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index a0dcb7c87c30..2854c357377f 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -68,7 +68,8 @@
68 68
69#include "io_ports.h" 69#include "io_ports.h"
70 70
71extern spinlock_t i8259A_lock; 71#include <asm/i8259.h>
72
72int pit_latch_buggy; /* extern */ 73int pit_latch_buggy; /* extern */
73 74
74#include "do_timer.h" 75#include "do_timer.h"
@@ -77,11 +78,15 @@ u64 jiffies_64 = INITIAL_JIFFIES;
77 78
78EXPORT_SYMBOL(jiffies_64); 79EXPORT_SYMBOL(jiffies_64);
79 80
80unsigned long cpu_khz; /* Detected as we calibrate the TSC */ 81unsigned int cpu_khz; /* Detected as we calibrate the TSC */
82EXPORT_SYMBOL(cpu_khz);
81 83
82extern unsigned long wall_jiffies; 84extern unsigned long wall_jiffies;
83 85
84DEFINE_SPINLOCK(rtc_lock); 86DEFINE_SPINLOCK(rtc_lock);
87EXPORT_SYMBOL(rtc_lock);
88
89#include <asm/i8253.h>
85 90
86DEFINE_SPINLOCK(i8253_lock); 91DEFINE_SPINLOCK(i8253_lock);
87EXPORT_SYMBOL(i8253_lock); 92EXPORT_SYMBOL(i8253_lock);
@@ -324,6 +329,8 @@ unsigned long get_cmos_time(void)
324 329
325 return retval; 330 return retval;
326} 331}
332EXPORT_SYMBOL(get_cmos_time);
333
327static void sync_cmos_clock(unsigned long dummy); 334static void sync_cmos_clock(unsigned long dummy);
328 335
329static struct timer_list sync_cmos_timer = 336static struct timer_list sync_cmos_timer =
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 10a0cbb88e75..658c0629ba6a 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -50,7 +50,7 @@ static void hpet_writel(unsigned long d, unsigned long a)
50 * comparator value and continue. Next tick can be caught by checking 50 * comparator value and continue. Next tick can be caught by checking
51 * for a change in the comparator value. Used in apic.c. 51 * for a change in the comparator value. Used in apic.c.
52 */ 52 */
53static void __init wait_hpet_tick(void) 53static void __devinit wait_hpet_tick(void)
54{ 54{
55 unsigned int start_cmp_val, end_cmp_val; 55 unsigned int start_cmp_val, end_cmp_val;
56 56
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
index 8e201219f525..8163fe0cf1f0 100644
--- a/arch/i386/kernel/timers/common.c
+++ b/arch/i386/kernel/timers/common.c
@@ -86,7 +86,7 @@ bad_ctc:
86#define CALIBRATE_CNT_HPET (5 * hpet_tick) 86#define CALIBRATE_CNT_HPET (5 * hpet_tick)
87#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) 87#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
88 88
89unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) 89unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
90{ 90{
91 unsigned long tsc_startlow, tsc_starthigh; 91 unsigned long tsc_startlow, tsc_starthigh;
92 unsigned long tsc_endlow, tsc_endhigh; 92 unsigned long tsc_endlow, tsc_endhigh;
@@ -139,6 +139,15 @@ bad_calibration:
139} 139}
140#endif 140#endif
141 141
142
143unsigned long read_timer_tsc(void)
144{
145 unsigned long retval;
146 rdtscl(retval);
147 return retval;
148}
149
150
142/* calculate cpu_khz */ 151/* calculate cpu_khz */
143void init_cpu_khz(void) 152void init_cpu_khz(void)
144{ 153{
@@ -154,7 +163,8 @@ void init_cpu_khz(void)
154 :"=a" (cpu_khz), "=d" (edx) 163 :"=a" (cpu_khz), "=d" (edx)
155 :"r" (tsc_quotient), 164 :"r" (tsc_quotient),
156 "0" (eax), "1" (edx)); 165 "0" (eax), "1" (edx));
157 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); 166 printk("Detected %u.%03u MHz processor.\n",
167 cpu_khz / 1000, cpu_khz % 1000);
158 } 168 }
159 } 169 }
160 } 170 }
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
index a3d6a288088b..7e39ed8e33f8 100644
--- a/arch/i386/kernel/timers/timer.c
+++ b/arch/i386/kernel/timers/timer.c
@@ -64,3 +64,12 @@ struct timer_opts* __init select_timer(void)
64 panic("select_timer: Cannot find a suitable timer\n"); 64 panic("select_timer: Cannot find a suitable timer\n");
65 return NULL; 65 return NULL;
66} 66}
67
68int read_current_timer(unsigned long *timer_val)
69{
70 if (cur_timer->read_timer) {
71 *timer_val = cur_timer->read_timer();
72 return 0;
73 }
74 return -1;
75}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
index f6f1206a11bb..13892a65c941 100644
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ b/arch/i386/kernel/timers/timer_cyclone.c
@@ -17,9 +17,9 @@
17#include <asm/io.h> 17#include <asm/io.h>
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/fixmap.h> 19#include <asm/fixmap.h>
20#include "io_ports.h" 20#include <asm/i8253.h>
21 21
22extern spinlock_t i8253_lock; 22#include "io_ports.h"
23 23
24/* Number of usecs that the last interrupt was delayed */ 24/* Number of usecs that the last interrupt was delayed */
25static int delay_at_last_interrupt; 25static int delay_at_last_interrupt;
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index f778f471a09a..d766e0963ac1 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -158,7 +158,7 @@ static int __init init_hpet(char* override)
158 { unsigned long eax=0, edx=1000; 158 { unsigned long eax=0, edx=1000;
159 ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, 159 ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
160 eax, edx); 160 eax, edx);
161 printk("Detected %lu.%03lu MHz processor.\n", 161 printk("Detected %u.%03u MHz processor.\n",
162 cpu_khz / 1000, cpu_khz % 1000); 162 cpu_khz / 1000, cpu_khz % 1000);
163 } 163 }
164 set_cyc2ns_scale(cpu_khz/1000); 164 set_cyc2ns_scale(cpu_khz/1000);
@@ -186,6 +186,7 @@ static struct timer_opts timer_hpet = {
186 .get_offset = get_offset_hpet, 186 .get_offset = get_offset_hpet,
187 .monotonic_clock = monotonic_clock_hpet, 187 .monotonic_clock = monotonic_clock_hpet,
188 .delay = delay_hpet, 188 .delay = delay_hpet,
189 .read_timer = read_timer_tsc,
189}; 190};
190 191
191struct init_timer_opts __initdata timer_hpet_init = { 192struct init_timer_opts __initdata timer_hpet_init = {
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index 967d5453cd0e..06de036a820c 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -15,9 +15,8 @@
15#include <asm/smp.h> 15#include <asm/smp.h>
16#include <asm/io.h> 16#include <asm/io.h>
17#include <asm/arch_hooks.h> 17#include <asm/arch_hooks.h>
18#include <asm/i8253.h>
18 19
19extern spinlock_t i8259A_lock;
20extern spinlock_t i8253_lock;
21#include "do_timer.h" 20#include "do_timer.h"
22#include "io_ports.h" 21#include "io_ports.h"
23 22
@@ -166,7 +165,6 @@ struct init_timer_opts __initdata timer_pit_init = {
166 165
167void setup_pit_timer(void) 166void setup_pit_timer(void)
168{ 167{
169 extern spinlock_t i8253_lock;
170 unsigned long flags; 168 unsigned long flags;
171 169
172 spin_lock_irqsave(&i8253_lock, flags); 170 spin_lock_irqsave(&i8253_lock, flags);
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
index d77f22030fe6..4ef20e663498 100644
--- a/arch/i386/kernel/timers/timer_pm.c
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -246,6 +246,7 @@ static struct timer_opts timer_pmtmr = {
246 .get_offset = get_offset_pmtmr, 246 .get_offset = get_offset_pmtmr,
247 .monotonic_clock = monotonic_clock_pmtmr, 247 .monotonic_clock = monotonic_clock_pmtmr,
248 .delay = delay_pmtmr, 248 .delay = delay_pmtmr,
249 .read_timer = read_timer_tsc,
249}; 250};
250 251
251struct init_timer_opts __initdata timer_pmtmr_init = { 252struct init_timer_opts __initdata timer_pmtmr_init = {
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 180444d87824..8f4e4d5bc560 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -24,6 +24,7 @@
24#include "mach_timer.h" 24#include "mach_timer.h"
25 25
26#include <asm/hpet.h> 26#include <asm/hpet.h>
27#include <asm/i8253.h>
27 28
28#ifdef CONFIG_HPET_TIMER 29#ifdef CONFIG_HPET_TIMER
29static unsigned long hpet_usec_quotient; 30static unsigned long hpet_usec_quotient;
@@ -33,9 +34,7 @@ static struct timer_opts timer_tsc;
33 34
34static inline void cpufreq_delayed_get(void); 35static inline void cpufreq_delayed_get(void);
35 36
36int tsc_disable __initdata = 0; 37int tsc_disable __devinitdata = 0;
37
38extern spinlock_t i8253_lock;
39 38
40static int use_tsc; 39static int use_tsc;
41/* Number of usecs that the last interrupt was delayed */ 40/* Number of usecs that the last interrupt was delayed */
@@ -256,7 +255,7 @@ static unsigned long loops_per_jiffy_ref = 0;
256 255
257#ifndef CONFIG_SMP 256#ifndef CONFIG_SMP
258static unsigned long fast_gettimeoffset_ref = 0; 257static unsigned long fast_gettimeoffset_ref = 0;
259static unsigned long cpu_khz_ref = 0; 258static unsigned int cpu_khz_ref = 0;
260#endif 259#endif
261 260
262static int 261static int
@@ -323,7 +322,7 @@ static inline void cpufreq_delayed_get(void) { return; }
323int recalibrate_cpu_khz(void) 322int recalibrate_cpu_khz(void)
324{ 323{
325#ifndef CONFIG_SMP 324#ifndef CONFIG_SMP
326 unsigned long cpu_khz_old = cpu_khz; 325 unsigned int cpu_khz_old = cpu_khz;
327 326
328 if (cpu_has_tsc) { 327 if (cpu_has_tsc) {
329 init_cpu_khz(); 328 init_cpu_khz();
@@ -534,7 +533,8 @@ static int __init init_tsc(char* override)
534 :"=a" (cpu_khz), "=d" (edx) 533 :"=a" (cpu_khz), "=d" (edx)
535 :"r" (tsc_quotient), 534 :"r" (tsc_quotient),
536 "0" (eax), "1" (edx)); 535 "0" (eax), "1" (edx));
537 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); 536 printk("Detected %u.%03u MHz processor.\n",
537 cpu_khz / 1000, cpu_khz % 1000);
538 } 538 }
539 set_cyc2ns_scale(cpu_khz/1000); 539 set_cyc2ns_scale(cpu_khz/1000);
540 return 0; 540 return 0;
@@ -572,6 +572,7 @@ static struct timer_opts timer_tsc = {
572 .get_offset = get_offset_tsc, 572 .get_offset = get_offset_tsc,
573 .monotonic_clock = monotonic_clock_tsc, 573 .monotonic_clock = monotonic_clock_tsc,
574 .delay = delay_tsc, 574 .delay = delay_tsc,
575 .read_timer = read_timer_tsc,
575}; 576};
576 577
577struct init_timer_opts __initdata timer_tsc_init = { 578struct init_timer_opts __initdata timer_tsc_init = {
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 00c63419c06f..a61f33d06ea3 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -27,6 +27,7 @@
27#include <linux/ptrace.h> 27#include <linux/ptrace.h>
28#include <linux/utsname.h> 28#include <linux/utsname.h>
29#include <linux/kprobes.h> 29#include <linux/kprobes.h>
30#include <linux/kexec.h>
30 31
31#ifdef CONFIG_EISA 32#ifdef CONFIG_EISA
32#include <linux/ioport.h> 33#include <linux/ioport.h>
@@ -104,6 +105,7 @@ int register_die_notifier(struct notifier_block *nb)
104 spin_unlock_irqrestore(&die_notifier_lock, flags); 105 spin_unlock_irqrestore(&die_notifier_lock, flags);
105 return err; 106 return err;
106} 107}
108EXPORT_SYMBOL(register_die_notifier);
107 109
108static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) 110static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
109{ 111{
@@ -209,7 +211,7 @@ void show_registers(struct pt_regs *regs)
209 211
210 esp = (unsigned long) (&regs->esp); 212 esp = (unsigned long) (&regs->esp);
211 ss = __KERNEL_DS; 213 ss = __KERNEL_DS;
212 if (regs->xcs & 3) { 214 if (user_mode(regs)) {
213 in_kernel = 0; 215 in_kernel = 0;
214 esp = regs->esp; 216 esp = regs->esp;
215 ss = regs->xss & 0xffff; 217 ss = regs->xss & 0xffff;
@@ -233,22 +235,22 @@ void show_registers(struct pt_regs *regs)
233 * time of the fault.. 235 * time of the fault..
234 */ 236 */
235 if (in_kernel) { 237 if (in_kernel) {
236 u8 *eip; 238 u8 __user *eip;
237 239
238 printk("\nStack: "); 240 printk("\nStack: ");
239 show_stack(NULL, (unsigned long*)esp); 241 show_stack(NULL, (unsigned long*)esp);
240 242
241 printk("Code: "); 243 printk("Code: ");
242 244
243 eip = (u8 *)regs->eip - 43; 245 eip = (u8 __user *)regs->eip - 43;
244 for (i = 0; i < 64; i++, eip++) { 246 for (i = 0; i < 64; i++, eip++) {
245 unsigned char c; 247 unsigned char c;
246 248
247 if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) { 249 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
248 printk(" Bad EIP value."); 250 printk(" Bad EIP value.");
249 break; 251 break;
250 } 252 }
251 if (eip == (u8 *)regs->eip) 253 if (eip == (u8 __user *)regs->eip)
252 printk("<%02x> ", c); 254 printk("<%02x> ", c);
253 else 255 else
254 printk("%02x ", c); 256 printk("%02x ", c);
@@ -265,20 +267,20 @@ static void handle_BUG(struct pt_regs *regs)
265 char c; 267 char c;
266 unsigned long eip; 268 unsigned long eip;
267 269
268 if (regs->xcs & 3) 270 if (user_mode(regs))
269 goto no_bug; /* Not in kernel */ 271 goto no_bug; /* Not in kernel */
270 272
271 eip = regs->eip; 273 eip = regs->eip;
272 274
273 if (eip < PAGE_OFFSET) 275 if (eip < PAGE_OFFSET)
274 goto no_bug; 276 goto no_bug;
275 if (__get_user(ud2, (unsigned short *)eip)) 277 if (__get_user(ud2, (unsigned short __user *)eip))
276 goto no_bug; 278 goto no_bug;
277 if (ud2 != 0x0b0f) 279 if (ud2 != 0x0b0f)
278 goto no_bug; 280 goto no_bug;
279 if (__get_user(line, (unsigned short *)(eip + 2))) 281 if (__get_user(line, (unsigned short __user *)(eip + 2)))
280 goto bug; 282 goto bug;
281 if (__get_user(file, (char **)(eip + 4)) || 283 if (__get_user(file, (char * __user *)(eip + 4)) ||
282 (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) 284 (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
283 file = "<bad filename>"; 285 file = "<bad filename>";
284 286
@@ -293,6 +295,9 @@ bug:
293 printk("Kernel BUG\n"); 295 printk("Kernel BUG\n");
294} 296}
295 297
298/* This is gone through when something in the kernel
299 * has done something bad and is about to be terminated.
300*/
296void die(const char * str, struct pt_regs * regs, long err) 301void die(const char * str, struct pt_regs * regs, long err)
297{ 302{
298 static struct { 303 static struct {
@@ -306,7 +311,7 @@ void die(const char * str, struct pt_regs * regs, long err)
306 }; 311 };
307 static int die_counter; 312 static int die_counter;
308 313
309 if (die.lock_owner != _smp_processor_id()) { 314 if (die.lock_owner != raw_smp_processor_id()) {
310 console_verbose(); 315 console_verbose();
311 spin_lock_irq(&die.lock); 316 spin_lock_irq(&die.lock);
312 die.lock_owner = smp_processor_id(); 317 die.lock_owner = smp_processor_id();
@@ -340,6 +345,10 @@ void die(const char * str, struct pt_regs * regs, long err)
340 bust_spinlocks(0); 345 bust_spinlocks(0);
341 die.lock_owner = -1; 346 die.lock_owner = -1;
342 spin_unlock_irq(&die.lock); 347 spin_unlock_irq(&die.lock);
348
349 if (kexec_should_crash(current))
350 crash_kexec(regs);
351
343 if (in_interrupt()) 352 if (in_interrupt())
344 panic("Fatal exception in interrupt"); 353 panic("Fatal exception in interrupt");
345 354
@@ -353,26 +362,27 @@ void die(const char * str, struct pt_regs * regs, long err)
353 362
354static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) 363static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
355{ 364{
356 if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) 365 if (!user_mode_vm(regs))
357 die(str, regs, err); 366 die(str, regs, err);
358} 367}
359 368
360static void do_trap(int trapnr, int signr, char *str, int vm86, 369static void do_trap(int trapnr, int signr, char *str, int vm86,
361 struct pt_regs * regs, long error_code, siginfo_t *info) 370 struct pt_regs * regs, long error_code, siginfo_t *info)
362{ 371{
372 struct task_struct *tsk = current;
373 tsk->thread.error_code = error_code;
374 tsk->thread.trap_no = trapnr;
375
363 if (regs->eflags & VM_MASK) { 376 if (regs->eflags & VM_MASK) {
364 if (vm86) 377 if (vm86)
365 goto vm86_trap; 378 goto vm86_trap;
366 goto trap_signal; 379 goto trap_signal;
367 } 380 }
368 381
369 if (!(regs->xcs & 3)) 382 if (!user_mode(regs))
370 goto kernel_trap; 383 goto kernel_trap;
371 384
372 trap_signal: { 385 trap_signal: {
373 struct task_struct *tsk = current;
374 tsk->thread.error_code = error_code;
375 tsk->thread.trap_no = trapnr;
376 if (info) 386 if (info)
377 force_sig_info(signr, info, tsk); 387 force_sig_info(signr, info, tsk);
378 else 388 else
@@ -485,10 +495,13 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code)
485 } 495 }
486 put_cpu(); 496 put_cpu();
487 497
498 current->thread.error_code = error_code;
499 current->thread.trap_no = 13;
500
488 if (regs->eflags & VM_MASK) 501 if (regs->eflags & VM_MASK)
489 goto gp_in_vm86; 502 goto gp_in_vm86;
490 503
491 if (!(regs->xcs & 3)) 504 if (!user_mode(regs))
492 goto gp_in_kernel; 505 goto gp_in_kernel;
493 506
494 current->thread.error_code = error_code; 507 current->thread.error_code = error_code;
@@ -569,6 +582,15 @@ void die_nmi (struct pt_regs *regs, const char *msg)
569 console_silent(); 582 console_silent();
570 spin_unlock(&nmi_print_lock); 583 spin_unlock(&nmi_print_lock);
571 bust_spinlocks(0); 584 bust_spinlocks(0);
585
586 /* If we are in kernel we are probably nested up pretty bad
587 * and might aswell get out now while we still can.
588 */
589 if (!user_mode(regs)) {
590 current->thread.trap_no = 2;
591 crash_kexec(regs);
592 }
593
572 do_exit(SIGSEGV); 594 do_exit(SIGSEGV);
573} 595}
574 596
@@ -624,6 +646,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
624 nmi_enter(); 646 nmi_enter();
625 647
626 cpu = smp_processor_id(); 648 cpu = smp_processor_id();
649
650#ifdef CONFIG_HOTPLUG_CPU
651 if (!cpu_online(cpu)) {
652 nmi_exit();
653 return;
654 }
655#endif
656
627 ++nmi_count(cpu); 657 ++nmi_count(cpu);
628 658
629 if (!nmi_callback(regs, cpu)) 659 if (!nmi_callback(regs, cpu))
@@ -636,11 +666,13 @@ void set_nmi_callback(nmi_callback_t callback)
636{ 666{
637 nmi_callback = callback; 667 nmi_callback = callback;
638} 668}
669EXPORT_SYMBOL_GPL(set_nmi_callback);
639 670
640void unset_nmi_callback(void) 671void unset_nmi_callback(void)
641{ 672{
642 nmi_callback = dummy_nmi_callback; 673 nmi_callback = dummy_nmi_callback;
643} 674}
675EXPORT_SYMBOL_GPL(unset_nmi_callback);
644 676
645#ifdef CONFIG_KPROBES 677#ifdef CONFIG_KPROBES
646fastcall void do_int3(struct pt_regs *regs, long error_code) 678fastcall void do_int3(struct pt_regs *regs, long error_code)
@@ -682,7 +714,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code)
682 unsigned int condition; 714 unsigned int condition;
683 struct task_struct *tsk = current; 715 struct task_struct *tsk = current;
684 716
685 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); 717 get_debugreg(condition, 6);
686 718
687 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 719 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
688 SIGTRAP) == NOTIFY_STOP) 720 SIGTRAP) == NOTIFY_STOP)
@@ -713,7 +745,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code)
713 * check for kernel mode by just checking the CPL 745 * check for kernel mode by just checking the CPL
714 * of CS. 746 * of CS.
715 */ 747 */
716 if ((regs->xcs & 3) == 0) 748 if (!user_mode(regs))
717 goto clear_TF_reenable; 749 goto clear_TF_reenable;
718 } 750 }
719 751
@@ -724,9 +756,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code)
724 * the signal is delivered. 756 * the signal is delivered.
725 */ 757 */
726clear_dr7: 758clear_dr7:
727 __asm__("movl %0,%%db7" 759 set_debugreg(0, 7);
728 : /* no output */
729 : "r" (0));
730 return; 760 return;
731 761
732debug_vm86: 762debug_vm86:
@@ -871,9 +901,9 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
871 error_code); 901 error_code);
872 return; 902 return;
873 } 903 }
874 die_if_kernel("cache flush denied", regs, error_code);
875 current->thread.trap_no = 19; 904 current->thread.trap_no = 19;
876 current->thread.error_code = error_code; 905 current->thread.error_code = error_code;
906 die_if_kernel("cache flush denied", regs, error_code);
877 force_sig(SIGSEGV, current); 907 force_sig(SIGSEGV, current);
878 } 908 }
879} 909}
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index e0512cc8bea7..7e01a528a83a 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -2,20 +2,23 @@
2 * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; 2 * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
3 */ 3 */
4 4
5#define LOAD_OFFSET __PAGE_OFFSET
6
5#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
6#include <asm/thread_info.h> 8#include <asm/thread_info.h>
7#include <asm/page.h> 9#include <asm/page.h>
8 10
9OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") 11OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
10OUTPUT_ARCH(i386) 12OUTPUT_ARCH(i386)
11ENTRY(startup_32) 13ENTRY(phys_startup_32)
12jiffies = jiffies_64; 14jiffies = jiffies_64;
13SECTIONS 15SECTIONS
14{ 16{
15 . = __PAGE_OFFSET + 0x100000; 17 . = __KERNEL_START;
18 phys_startup_32 = startup_32 - LOAD_OFFSET;
16 /* read-only */ 19 /* read-only */
17 _text = .; /* Text and read-only data */ 20 _text = .; /* Text and read-only data */
18 .text : { 21 .text : AT(ADDR(.text) - LOAD_OFFSET) {
19 *(.text) 22 *(.text)
20 SCHED_TEXT 23 SCHED_TEXT
21 LOCK_TEXT 24 LOCK_TEXT
@@ -27,49 +30,55 @@ SECTIONS
27 30
28 . = ALIGN(16); /* Exception table */ 31 . = ALIGN(16); /* Exception table */
29 __start___ex_table = .; 32 __start___ex_table = .;
30 __ex_table : { *(__ex_table) } 33 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
31 __stop___ex_table = .; 34 __stop___ex_table = .;
32 35
33 RODATA 36 RODATA
34 37
35 /* writeable */ 38 /* writeable */
36 .data : { /* Data */ 39 .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
37 *(.data) 40 *(.data)
38 CONSTRUCTORS 41 CONSTRUCTORS
39 } 42 }
40 43
41 . = ALIGN(4096); 44 . = ALIGN(4096);
42 __nosave_begin = .; 45 __nosave_begin = .;
43 .data_nosave : { *(.data.nosave) } 46 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
44 . = ALIGN(4096); 47 . = ALIGN(4096);
45 __nosave_end = .; 48 __nosave_end = .;
46 49
47 . = ALIGN(4096); 50 . = ALIGN(4096);
48 .data.page_aligned : { *(.data.idt) } 51 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
52 *(.data.idt)
53 }
49 54
50 . = ALIGN(32); 55 . = ALIGN(32);
51 .data.cacheline_aligned : { *(.data.cacheline_aligned) } 56 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
57 *(.data.cacheline_aligned)
58 }
52 59
53 _edata = .; /* End of data section */ 60 _edata = .; /* End of data section */
54 61
55 . = ALIGN(THREAD_SIZE); /* init_task */ 62 . = ALIGN(THREAD_SIZE); /* init_task */
56 .data.init_task : { *(.data.init_task) } 63 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
64 *(.data.init_task)
65 }
57 66
58 /* will be freed after init */ 67 /* will be freed after init */
59 . = ALIGN(4096); /* Init code and data */ 68 . = ALIGN(4096); /* Init code and data */
60 __init_begin = .; 69 __init_begin = .;
61 .init.text : { 70 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
62 _sinittext = .; 71 _sinittext = .;
63 *(.init.text) 72 *(.init.text)
64 _einittext = .; 73 _einittext = .;
65 } 74 }
66 .init.data : { *(.init.data) } 75 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
67 . = ALIGN(16); 76 . = ALIGN(16);
68 __setup_start = .; 77 __setup_start = .;
69 .init.setup : { *(.init.setup) } 78 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
70 __setup_end = .; 79 __setup_end = .;
71 __initcall_start = .; 80 __initcall_start = .;
72 .initcall.init : { 81 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
73 *(.initcall1.init) 82 *(.initcall1.init)
74 *(.initcall2.init) 83 *(.initcall2.init)
75 *(.initcall3.init) 84 *(.initcall3.init)
@@ -80,33 +89,41 @@ SECTIONS
80 } 89 }
81 __initcall_end = .; 90 __initcall_end = .;
82 __con_initcall_start = .; 91 __con_initcall_start = .;
83 .con_initcall.init : { *(.con_initcall.init) } 92 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
93 *(.con_initcall.init)
94 }
84 __con_initcall_end = .; 95 __con_initcall_end = .;
85 SECURITY_INIT 96 SECURITY_INIT
86 . = ALIGN(4); 97 . = ALIGN(4);
87 __alt_instructions = .; 98 __alt_instructions = .;
88 .altinstructions : { *(.altinstructions) } 99 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
100 *(.altinstructions)
101 }
89 __alt_instructions_end = .; 102 __alt_instructions_end = .;
90 .altinstr_replacement : { *(.altinstr_replacement) } 103 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
104 *(.altinstr_replacement)
105 }
91 /* .exit.text is discard at runtime, not link time, to deal with references 106 /* .exit.text is discard at runtime, not link time, to deal with references
92 from .altinstructions and .eh_frame */ 107 from .altinstructions and .eh_frame */
93 .exit.text : { *(.exit.text) } 108 .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
94 .exit.data : { *(.exit.data) } 109 .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
95 . = ALIGN(4096); 110 . = ALIGN(4096);
96 __initramfs_start = .; 111 __initramfs_start = .;
97 .init.ramfs : { *(.init.ramfs) } 112 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
98 __initramfs_end = .; 113 __initramfs_end = .;
99 . = ALIGN(32); 114 . = ALIGN(32);
100 __per_cpu_start = .; 115 __per_cpu_start = .;
101 .data.percpu : { *(.data.percpu) } 116 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
102 __per_cpu_end = .; 117 __per_cpu_end = .;
103 . = ALIGN(4096); 118 . = ALIGN(4096);
104 __init_end = .; 119 __init_end = .;
105 /* freed after init ends here */ 120 /* freed after init ends here */
106 121
107 __bss_start = .; /* BSS */ 122 __bss_start = .; /* BSS */
108 .bss : { 123 .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) {
109 *(.bss.page_aligned) 124 *(.bss.page_aligned)
125 }
126 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
110 *(.bss) 127 *(.bss)
111 } 128 }
112 . = ALIGN(4); 129 . = ALIGN(4);
diff --git a/arch/i386/lib/dec_and_lock.c b/arch/i386/lib/dec_and_lock.c
index ab43394dc775..8b81b2524fa6 100644
--- a/arch/i386/lib/dec_and_lock.c
+++ b/arch/i386/lib/dec_and_lock.c
@@ -8,6 +8,7 @@
8 */ 8 */
9 9
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/module.h>
11#include <asm/atomic.h> 12#include <asm/atomic.h>
12 13
13int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) 14int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
@@ -38,3 +39,4 @@ slow_path:
38 spin_unlock(lock); 39 spin_unlock(lock);
39 return 0; 40 return 0;
40} 41}
42EXPORT_SYMBOL(_atomic_dec_and_lock);
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
index 080639f262b1..c49a6acbee56 100644
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -13,6 +13,7 @@
13#include <linux/config.h> 13#include <linux/config.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/module.h>
16#include <asm/processor.h> 17#include <asm/processor.h>
17#include <asm/delay.h> 18#include <asm/delay.h>
18#include <asm/timer.h> 19#include <asm/timer.h>
@@ -34,7 +35,7 @@ inline void __const_udelay(unsigned long xloops)
34 xloops *= 4; 35 xloops *= 4;
35 __asm__("mull %0" 36 __asm__("mull %0"
36 :"=d" (xloops), "=&a" (d0) 37 :"=d" (xloops), "=&a" (d0)
37 :"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4))); 38 :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
38 __delay(++xloops); 39 __delay(++xloops);
39} 40}
40 41
@@ -47,3 +48,8 @@ void __ndelay(unsigned long nsecs)
47{ 48{
48 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 49 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
49} 50}
51
52EXPORT_SYMBOL(__delay);
53EXPORT_SYMBOL(__const_udelay);
54EXPORT_SYMBOL(__udelay);
55EXPORT_SYMBOL(__ndelay);
diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c
index 01f8b1a2cc84..2afda94dffd3 100644
--- a/arch/i386/lib/mmx.c
+++ b/arch/i386/lib/mmx.c
@@ -3,6 +3,7 @@
3#include <linux/string.h> 3#include <linux/string.h>
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/hardirq.h> 5#include <linux/hardirq.h>
6#include <linux/module.h>
6 7
7#include <asm/i387.h> 8#include <asm/i387.h>
8 9
@@ -397,3 +398,7 @@ void mmx_copy_page(void *to, void *from)
397 else 398 else
398 fast_copy_page(to, from); 399 fast_copy_page(to, from);
399} 400}
401
402EXPORT_SYMBOL(_mmx_memcpy);
403EXPORT_SYMBOL(mmx_clear_page);
404EXPORT_SYMBOL(mmx_copy_page);
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index 51aa2bbb0269..4cf981d70f45 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -84,6 +84,7 @@ __strncpy_from_user(char *dst, const char __user *src, long count)
84 __do_strncpy_from_user(dst, src, count, res); 84 __do_strncpy_from_user(dst, src, count, res);
85 return res; 85 return res;
86} 86}
87EXPORT_SYMBOL(__strncpy_from_user);
87 88
88/** 89/**
89 * strncpy_from_user: - Copy a NUL terminated string from userspace. 90 * strncpy_from_user: - Copy a NUL terminated string from userspace.
@@ -111,7 +112,7 @@ strncpy_from_user(char *dst, const char __user *src, long count)
111 __do_strncpy_from_user(dst, src, count, res); 112 __do_strncpy_from_user(dst, src, count, res);
112 return res; 113 return res;
113} 114}
114 115EXPORT_SYMBOL(strncpy_from_user);
115 116
116/* 117/*
117 * Zero Userspace 118 * Zero Userspace
@@ -157,6 +158,7 @@ clear_user(void __user *to, unsigned long n)
157 __do_clear_user(to, n); 158 __do_clear_user(to, n);
158 return n; 159 return n;
159} 160}
161EXPORT_SYMBOL(clear_user);
160 162
161/** 163/**
162 * __clear_user: - Zero a block of memory in user space, with less checking. 164 * __clear_user: - Zero a block of memory in user space, with less checking.
@@ -175,6 +177,7 @@ __clear_user(void __user *to, unsigned long n)
175 __do_clear_user(to, n); 177 __do_clear_user(to, n);
176 return n; 178 return n;
177} 179}
180EXPORT_SYMBOL(__clear_user);
178 181
179/** 182/**
180 * strlen_user: - Get the size of a string in user space. 183 * strlen_user: - Get the size of a string in user space.
@@ -218,6 +221,7 @@ long strnlen_user(const char __user *s, long n)
218 :"cc"); 221 :"cc");
219 return res & mask; 222 return res & mask;
220} 223}
224EXPORT_SYMBOL(strnlen_user);
221 225
222#ifdef CONFIG_X86_INTEL_USERCOPY 226#ifdef CONFIG_X86_INTEL_USERCOPY
223static unsigned long 227static unsigned long
@@ -570,6 +574,7 @@ survive:
570 n = __copy_user_intel(to, from, n); 574 n = __copy_user_intel(to, from, n);
571 return n; 575 return n;
572} 576}
577EXPORT_SYMBOL(__copy_to_user_ll);
573 578
574unsigned long 579unsigned long
575__copy_from_user_ll(void *to, const void __user *from, unsigned long n) 580__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
@@ -581,6 +586,7 @@ __copy_from_user_ll(void *to, const void __user *from, unsigned long n)
581 n = __copy_user_zeroing_intel(to, from, n); 586 n = __copy_user_zeroing_intel(to, from, n);
582 return n; 587 return n;
583} 588}
589EXPORT_SYMBOL(__copy_from_user_ll);
584 590
585/** 591/**
586 * copy_to_user: - Copy a block of data into user space. 592 * copy_to_user: - Copy a block of data into user space.
diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
index 0aa08eaa8932..e5a1a83d09ef 100644
--- a/arch/i386/mach-default/setup.c
+++ b/arch/i386/mach-default/setup.c
@@ -10,6 +10,14 @@
10#include <asm/acpi.h> 10#include <asm/acpi.h>
11#include <asm/arch_hooks.h> 11#include <asm/arch_hooks.h>
12 12
13#ifdef CONFIG_HOTPLUG_CPU
14#define DEFAULT_SEND_IPI (1)
15#else
16#define DEFAULT_SEND_IPI (0)
17#endif
18
19int no_broadcast=DEFAULT_SEND_IPI;
20
13/** 21/**
14 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors 22 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
15 * 23 *
@@ -104,3 +112,22 @@ void __init mca_nmi_hook(void)
104 printk("NMI generated from unknown source!\n"); 112 printk("NMI generated from unknown source!\n");
105} 113}
106#endif 114#endif
115
116static __init int no_ipi_broadcast(char *str)
117{
118 get_option(&str, &no_broadcast);
119 printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
120 "IPI Broadcast");
121 return 1;
122}
123
124__setup("no_ipi_broadcast", no_ipi_broadcast);
125
126static int __init print_ipi_mode(void)
127{
128 printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
129 "Shortcut");
130 return 0;
131}
132
133late_initcall(print_ipi_mode);
diff --git a/arch/i386/mach-default/topology.c b/arch/i386/mach-default/topology.c
index 5b3e8817dae8..23395fff35d1 100644
--- a/arch/i386/mach-default/topology.c
+++ b/arch/i386/mach-default/topology.c
@@ -73,12 +73,11 @@ static int __init topology_init(void)
73{ 73{
74 int i; 74 int i;
75 75
76 for (i = 0; i < MAX_NUMNODES; i++) { 76 for_each_online_node(i)
77 if (node_online(i)) 77 arch_register_node(i);
78 arch_register_node(i); 78
79 } 79 for_each_cpu(i)
80 for (i = 0; i < NR_CPUS; i++) 80 arch_register_cpu(i);
81 if (cpu_possible(i)) arch_register_cpu(i);
82 return 0; 81 return 0;
83} 82}
84 83
@@ -88,8 +87,8 @@ static int __init topology_init(void)
88{ 87{
89 int i; 88 int i;
90 89
91 for (i = 0; i < NR_CPUS; i++) 90 for_each_cpu(i)
92 if (cpu_possible(i)) arch_register_cpu(i); 91 arch_register_cpu(i);
93 return 0; 92 return 0;
94} 93}
95 94
diff --git a/arch/i386/mach-visws/mpparse.c b/arch/i386/mach-visws/mpparse.c
index 5a22082147f4..5f3d7e6de37b 100644
--- a/arch/i386/mach-visws/mpparse.c
+++ b/arch/i386/mach-visws/mpparse.c
@@ -23,7 +23,6 @@ unsigned long mp_lapic_addr;
23 23
24/* Processor that is doing the boot up */ 24/* Processor that is doing the boot up */
25unsigned int boot_cpu_physical_apicid = -1U; 25unsigned int boot_cpu_physical_apicid = -1U;
26unsigned int boot_cpu_logical_apicid = -1U;
27 26
28/* Bitmask of physically existing CPUs */ 27/* Bitmask of physically existing CPUs */
29physid_mask_t phys_cpu_present_map; 28physid_mask_t phys_cpu_present_map;
@@ -52,10 +51,8 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
52 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, 51 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
53 m->mpc_apicver); 52 m->mpc_apicver);
54 53
55 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { 54 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
56 boot_cpu_physical_apicid = m->mpc_apicid; 55 boot_cpu_physical_apicid = m->mpc_apicid;
57 boot_cpu_logical_apicid = logical_apicid;
58 }
59 56
60 ver = m->mpc_apicver; 57 ver = m->mpc_apicver;
61 if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) { 58 if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c
index 602aea240e9b..3e439ce5e1b2 100644
--- a/arch/i386/mach-voyager/voyager_basic.c
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -30,6 +30,7 @@
30#include <linux/irq.h> 30#include <linux/irq.h>
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <asm/arch_hooks.h> 32#include <asm/arch_hooks.h>
33#include <asm/i8253.h>
33 34
34/* 35/*
35 * Power off function, if any 36 * Power off function, if any
@@ -182,7 +183,6 @@ voyager_timer_interrupt(struct pt_regs *regs)
182 * and swiftly introduce it to something sharp and 183 * and swiftly introduce it to something sharp and
183 * pointy. */ 184 * pointy. */
184 __u16 val; 185 __u16 val;
185 extern spinlock_t i8253_lock;
186 186
187 spin_lock(&i8253_lock); 187 spin_lock(&i8253_lock);
188 188
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index a6e0ddd65bd0..8c8527593da0 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -1288,7 +1288,7 @@ smp_local_timer_interrupt(struct pt_regs * regs)
1288 per_cpu(prof_counter, cpu); 1288 per_cpu(prof_counter, cpu);
1289 } 1289 }
1290 1290
1291 update_process_times(user_mode(regs)); 1291 update_process_times(user_mode_vm(regs));
1292 } 1292 }
1293 1293
1294 if( ((1<<cpu) & voyager_extended_vic_processors) == 0) 1294 if( ((1<<cpu) & voyager_extended_vic_processors) == 0)
diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile
index fc3272506846..80908b5aa60f 100644
--- a/arch/i386/mm/Makefile
+++ b/arch/i386/mm/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o 5obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o
6 6
7obj-$(CONFIG_DISCONTIGMEM) += discontig.o 7obj-$(CONFIG_NUMA) += discontig.o
8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
9obj-$(CONFIG_HIGHMEM) += highmem.o 9obj-$(CONFIG_HIGHMEM) += highmem.o
10obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o 10obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 1726b4096b10..b358f0702a44 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -29,12 +29,16 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/initrd.h> 30#include <linux/initrd.h>
31#include <linux/nodemask.h> 31#include <linux/nodemask.h>
32#include <linux/module.h>
33#include <linux/kexec.h>
34
32#include <asm/e820.h> 35#include <asm/e820.h>
33#include <asm/setup.h> 36#include <asm/setup.h>
34#include <asm/mmzone.h> 37#include <asm/mmzone.h>
35#include <bios_ebda.h> 38#include <bios_ebda.h>
36 39
37struct pglist_data *node_data[MAX_NUMNODES]; 40struct pglist_data *node_data[MAX_NUMNODES];
41EXPORT_SYMBOL(node_data);
38bootmem_data_t node0_bdata; 42bootmem_data_t node0_bdata;
39 43
40/* 44/*
@@ -42,12 +46,16 @@ bootmem_data_t node0_bdata;
42 * populated the following initialisation. 46 * populated the following initialisation.
43 * 47 *
44 * 1) node_online_map - the map of all nodes configured (online) in the system 48 * 1) node_online_map - the map of all nodes configured (online) in the system
45 * 2) physnode_map - the mapping between a pfn and owning node 49 * 2) node_start_pfn - the starting page frame number for a node
46 * 3) node_start_pfn - the starting page frame number for a node
47 * 3) node_end_pfn - the ending page fram number for a node 50 * 3) node_end_pfn - the ending page fram number for a node
48 */ 51 */
52unsigned long node_start_pfn[MAX_NUMNODES];
53unsigned long node_end_pfn[MAX_NUMNODES];
49 54
55
56#ifdef CONFIG_DISCONTIGMEM
50/* 57/*
58 * 4) physnode_map - the mapping between a pfn and owning node
51 * physnode_map keeps track of the physical memory layout of a generic 59 * physnode_map keeps track of the physical memory layout of a generic
52 * numa node on a 256Mb break (each element of the array will 60 * numa node on a 256Mb break (each element of the array will
53 * represent 256Mb of memory and will be marked by the node id. so, 61 * represent 256Mb of memory and will be marked by the node id. so,
@@ -59,6 +67,7 @@ bootmem_data_t node0_bdata;
59 * physnode_map[8- ] = -1; 67 * physnode_map[8- ] = -1;
60 */ 68 */
61s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; 69s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1};
70EXPORT_SYMBOL(physnode_map);
62 71
63void memory_present(int nid, unsigned long start, unsigned long end) 72void memory_present(int nid, unsigned long start, unsigned long end)
64{ 73{
@@ -85,9 +94,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
85 94
86 return (nr_pages + 1) * sizeof(struct page); 95 return (nr_pages + 1) * sizeof(struct page);
87} 96}
88 97#endif
89unsigned long node_start_pfn[MAX_NUMNODES];
90unsigned long node_end_pfn[MAX_NUMNODES];
91 98
92extern unsigned long find_max_low_pfn(void); 99extern unsigned long find_max_low_pfn(void);
93extern void find_max_pfn(void); 100extern void find_max_pfn(void);
@@ -108,6 +115,9 @@ unsigned long node_remap_offset[MAX_NUMNODES];
108void *node_remap_start_vaddr[MAX_NUMNODES]; 115void *node_remap_start_vaddr[MAX_NUMNODES];
109void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 116void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
110 117
118void *node_remap_end_vaddr[MAX_NUMNODES];
119void *node_remap_alloc_vaddr[MAX_NUMNODES];
120
111/* 121/*
112 * FLAT - support for basic PC memory model with discontig enabled, essentially 122 * FLAT - support for basic PC memory model with discontig enabled, essentially
113 * a single node with all available processors in it with a flat 123 * a single node with all available processors in it with a flat
@@ -146,6 +156,21 @@ static void __init find_max_pfn_node(int nid)
146 BUG(); 156 BUG();
147} 157}
148 158
159/* Find the owning node for a pfn. */
160int early_pfn_to_nid(unsigned long pfn)
161{
162 int nid;
163
164 for_each_node(nid) {
165 if (node_end_pfn[nid] == 0)
166 break;
167 if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn)
168 return nid;
169 }
170
171 return 0;
172}
173
149/* 174/*
150 * Allocate memory for the pg_data_t for this node via a crude pre-bootmem 175 * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
151 * method. For node zero take this from the bottom of memory, for 176 * method. For node zero take this from the bottom of memory, for
@@ -163,6 +188,21 @@ static void __init allocate_pgdat(int nid)
163 } 188 }
164} 189}
165 190
191void *alloc_remap(int nid, unsigned long size)
192{
193 void *allocation = node_remap_alloc_vaddr[nid];
194
195 size = ALIGN(size, L1_CACHE_BYTES);
196
197 if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
198 return 0;
199
200 node_remap_alloc_vaddr[nid] += size;
201 memset(allocation, 0, size);
202
203 return allocation;
204}
205
166void __init remap_numa_kva(void) 206void __init remap_numa_kva(void)
167{ 207{
168 void *vaddr; 208 void *vaddr;
@@ -170,8 +210,6 @@ void __init remap_numa_kva(void)
170 int node; 210 int node;
171 211
172 for_each_online_node(node) { 212 for_each_online_node(node) {
173 if (node == 0)
174 continue;
175 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { 213 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
176 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); 214 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
177 set_pmd_pfn((ulong) vaddr, 215 set_pmd_pfn((ulong) vaddr,
@@ -185,13 +223,9 @@ static unsigned long calculate_numa_remap_pages(void)
185{ 223{
186 int nid; 224 int nid;
187 unsigned long size, reserve_pages = 0; 225 unsigned long size, reserve_pages = 0;
226 unsigned long pfn;
188 227
189 for_each_online_node(nid) { 228 for_each_online_node(nid) {
190 if (nid == 0)
191 continue;
192 if (!node_remap_size[nid])
193 continue;
194
195 /* 229 /*
196 * The acpi/srat node info can show hot-add memroy zones 230 * The acpi/srat node info can show hot-add memroy zones
197 * where memory could be added but not currently present. 231 * where memory could be added but not currently present.
@@ -208,11 +242,24 @@ static unsigned long calculate_numa_remap_pages(void)
208 size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; 242 size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
209 /* now the roundup is correct, convert to PAGE_SIZE pages */ 243 /* now the roundup is correct, convert to PAGE_SIZE pages */
210 size = size * PTRS_PER_PTE; 244 size = size * PTRS_PER_PTE;
245
246 /*
247 * Validate the region we are allocating only contains valid
248 * pages.
249 */
250 for (pfn = node_end_pfn[nid] - size;
251 pfn < node_end_pfn[nid]; pfn++)
252 if (!page_is_ram(pfn))
253 break;
254
255 if (pfn != node_end_pfn[nid])
256 size = 0;
257
211 printk("Reserving %ld pages of KVA for lmem_map of node %d\n", 258 printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
212 size, nid); 259 size, nid);
213 node_remap_size[nid] = size; 260 node_remap_size[nid] = size;
214 reserve_pages += size;
215 node_remap_offset[nid] = reserve_pages; 261 node_remap_offset[nid] = reserve_pages;
262 reserve_pages += size;
216 printk("Shrinking node %d from %ld pages to %ld pages\n", 263 printk("Shrinking node %d from %ld pages to %ld pages\n",
217 nid, node_end_pfn[nid], node_end_pfn[nid] - size); 264 nid, node_end_pfn[nid], node_end_pfn[nid] - size);
218 node_end_pfn[nid] -= size; 265 node_end_pfn[nid] -= size;
@@ -265,12 +312,18 @@ unsigned long __init setup_memory(void)
265 (ulong) pfn_to_kaddr(max_low_pfn)); 312 (ulong) pfn_to_kaddr(max_low_pfn));
266 for_each_online_node(nid) { 313 for_each_online_node(nid) {
267 node_remap_start_vaddr[nid] = pfn_to_kaddr( 314 node_remap_start_vaddr[nid] = pfn_to_kaddr(
268 (highstart_pfn + reserve_pages) - node_remap_offset[nid]); 315 highstart_pfn + node_remap_offset[nid]);
316 /* Init the node remap allocator */
317 node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
318 (node_remap_size[nid] * PAGE_SIZE);
319 node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
320 ALIGN(sizeof(pg_data_t), PAGE_SIZE);
321
269 allocate_pgdat(nid); 322 allocate_pgdat(nid);
270 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, 323 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
271 (ulong) node_remap_start_vaddr[nid], 324 (ulong) node_remap_start_vaddr[nid],
272 (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages 325 (ulong) pfn_to_kaddr(highstart_pfn
273 - node_remap_offset[nid] + node_remap_size[nid])); 326 + node_remap_offset[nid] + node_remap_size[nid]));
274 } 327 }
275 printk("High memory starts at vaddr %08lx\n", 328 printk("High memory starts at vaddr %08lx\n",
276 (ulong) pfn_to_kaddr(highstart_pfn)); 329 (ulong) pfn_to_kaddr(highstart_pfn));
@@ -333,23 +386,9 @@ void __init zone_sizes_init(void)
333 } 386 }
334 387
335 zholes_size = get_zholes_size(nid); 388 zholes_size = get_zholes_size(nid);
336 /* 389
337 * We let the lmem_map for node 0 be allocated from the 390 free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
338 * normal bootmem allocator, but other nodes come from the 391 zholes_size);
339 * remapped KVA area - mbligh
340 */
341 if (!nid)
342 free_area_init_node(nid, NODE_DATA(nid),
343 zones_size, start, zholes_size);
344 else {
345 unsigned long lmem_map;
346 lmem_map = (unsigned long)node_remap_start_vaddr[nid];
347 lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
348 lmem_map &= PAGE_MASK;
349 NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
350 free_area_init_node(nid, NODE_DATA(nid), zones_size,
351 start, zholes_size);
352 }
353 } 392 }
354 return; 393 return;
355} 394}
@@ -358,24 +397,26 @@ void __init set_highmem_pages_init(int bad_ppro)
358{ 397{
359#ifdef CONFIG_HIGHMEM 398#ifdef CONFIG_HIGHMEM
360 struct zone *zone; 399 struct zone *zone;
400 struct page *page;
361 401
362 for_each_zone(zone) { 402 for_each_zone(zone) {
363 unsigned long node_pfn, node_high_size, zone_start_pfn; 403 unsigned long node_pfn, zone_start_pfn, zone_end_pfn;
364 struct page * zone_mem_map; 404
365
366 if (!is_highmem(zone)) 405 if (!is_highmem(zone))
367 continue; 406 continue;
368 407
369 printk("Initializing %s for node %d\n", zone->name,
370 zone->zone_pgdat->node_id);
371
372 node_high_size = zone->spanned_pages;
373 zone_mem_map = zone->zone_mem_map;
374 zone_start_pfn = zone->zone_start_pfn; 408 zone_start_pfn = zone->zone_start_pfn;
409 zone_end_pfn = zone_start_pfn + zone->spanned_pages;
410
411 printk("Initializing %s for node %d (%08lx:%08lx)\n",
412 zone->name, zone->zone_pgdat->node_id,
413 zone_start_pfn, zone_end_pfn);
375 414
376 for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { 415 for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
377 one_highpage_init((struct page *)(zone_mem_map + node_pfn), 416 if (!pfn_valid(node_pfn))
378 zone_start_pfn + node_pfn, bad_ppro); 417 continue;
418 page = pfn_to_page(node_pfn);
419 one_highpage_init(page, node_pfn, bad_ppro);
379 } 420 }
380 } 421 }
381 totalram_pages += totalhigh_pages; 422 totalram_pages += totalhigh_pages;
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index a509237c4815..8e90339d6eaa 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -146,7 +146,7 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
146 146
147 if (instr > limit) 147 if (instr > limit)
148 break; 148 break;
149 if (__get_user(opcode, (unsigned char *) instr)) 149 if (__get_user(opcode, (unsigned char __user *) instr))
150 break; 150 break;
151 151
152 instr_hi = opcode & 0xf0; 152 instr_hi = opcode & 0xf0;
@@ -173,7 +173,7 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
173 scan_more = 0; 173 scan_more = 0;
174 if (instr > limit) 174 if (instr > limit)
175 break; 175 break;
176 if (__get_user(opcode, (unsigned char *) instr)) 176 if (__get_user(opcode, (unsigned char __user *) instr))
177 break; 177 break;
178 prefetch = (instr_lo == 0xF) && 178 prefetch = (instr_lo == 0xF) &&
179 (opcode == 0x0D || opcode == 0x18); 179 (opcode == 0x0D || opcode == 0x18);
@@ -463,6 +463,9 @@ no_context:
463 printk(KERN_ALERT "*pte = %08lx\n", page); 463 printk(KERN_ALERT "*pte = %08lx\n", page);
464 } 464 }
465#endif 465#endif
466 tsk->thread.cr2 = address;
467 tsk->thread.trap_no = 14;
468 tsk->thread.error_code = error_code;
466 die("Oops", regs, error_code); 469 die("Oops", regs, error_code);
467 bust_spinlocks(0); 470 bust_spinlocks(0);
468 do_exit(SIGKILL); 471 do_exit(SIGKILL);
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
index fc4c4cad4e98..b6eb4dcb8777 100644
--- a/arch/i386/mm/highmem.c
+++ b/arch/i386/mm/highmem.c
@@ -1,4 +1,5 @@
1#include <linux/highmem.h> 1#include <linux/highmem.h>
2#include <linux/module.h>
2 3
3void *kmap(struct page *page) 4void *kmap(struct page *page)
4{ 5{
@@ -74,6 +75,24 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
74 preempt_check_resched(); 75 preempt_check_resched();
75} 76}
76 77
78/* This is the same as kmap_atomic() but can map memory that doesn't
79 * have a struct page associated with it.
80 */
81void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
82{
83 enum fixed_addresses idx;
84 unsigned long vaddr;
85
86 inc_preempt_count();
87
88 idx = type + KM_TYPE_NR*smp_processor_id();
89 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
90 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
91 __flush_tlb_one(vaddr);
92
93 return (void*) vaddr;
94}
95
77struct page *kmap_atomic_to_page(void *ptr) 96struct page *kmap_atomic_to_page(void *ptr)
78{ 97{
79 unsigned long idx, vaddr = (unsigned long)ptr; 98 unsigned long idx, vaddr = (unsigned long)ptr;
@@ -87,3 +106,8 @@ struct page *kmap_atomic_to_page(void *ptr)
87 return pte_page(*pte); 106 return pte_page(*pte);
88} 107}
89 108
109EXPORT_SYMBOL(kmap);
110EXPORT_SYMBOL(kunmap);
111EXPORT_SYMBOL(kmap_atomic);
112EXPORT_SYMBOL(kunmap_atomic);
113EXPORT_SYMBOL(kmap_atomic_to_page);
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 171fc925e1e4..3b099f32b948 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -18,7 +18,7 @@
18#include <asm/tlb.h> 18#include <asm/tlb.h>
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20 20
21static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 21pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
22{ 22{
23 pgd_t *pgd; 23 pgd_t *pgd;
24 pud_t *pud; 24 pud_t *pud;
@@ -30,7 +30,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
30 return (pte_t *) pmd; 30 return (pte_t *) pmd;
31} 31}
32 32
33static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 33pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
34{ 34{
35 pgd_t *pgd; 35 pgd_t *pgd;
36 pud_t *pud; 36 pud_t *pud;
@@ -42,21 +42,6 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
42 return (pte_t *) pmd; 42 return (pte_t *) pmd;
43} 43}
44 44
45static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access)
46{
47 pte_t entry;
48
49 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
50 if (write_access) {
51 entry =
52 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
53 } else
54 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
55 entry = pte_mkyoung(entry);
56 mk_pte_huge(entry);
57 set_pte(page_table, entry);
58}
59
60/* 45/*
61 * This function checks for proper alignment of input addr and len parameters. 46 * This function checks for proper alignment of input addr and len parameters.
62 */ 47 */
@@ -69,77 +54,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
69 return 0; 54 return 0;
70} 55}
71 56
72int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
73 struct vm_area_struct *vma)
74{
75 pte_t *src_pte, *dst_pte, entry;
76 struct page *ptepage;
77 unsigned long addr = vma->vm_start;
78 unsigned long end = vma->vm_end;
79
80 while (addr < end) {
81 dst_pte = huge_pte_alloc(dst, addr);
82 if (!dst_pte)
83 goto nomem;
84 src_pte = huge_pte_offset(src, addr);
85 entry = *src_pte;
86 ptepage = pte_page(entry);
87 get_page(ptepage);
88 set_pte(dst_pte, entry);
89 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
90 addr += HPAGE_SIZE;
91 }
92 return 0;
93
94nomem:
95 return -ENOMEM;
96}
97
98int
99follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
100 struct page **pages, struct vm_area_struct **vmas,
101 unsigned long *position, int *length, int i)
102{
103 unsigned long vpfn, vaddr = *position;
104 int remainder = *length;
105
106 WARN_ON(!is_vm_hugetlb_page(vma));
107
108 vpfn = vaddr/PAGE_SIZE;
109 while (vaddr < vma->vm_end && remainder) {
110
111 if (pages) {
112 pte_t *pte;
113 struct page *page;
114
115 pte = huge_pte_offset(mm, vaddr);
116
117 /* hugetlb should be locked, and hence, prefaulted */
118 WARN_ON(!pte || pte_none(*pte));
119
120 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
121
122 WARN_ON(!PageCompound(page));
123
124 get_page(page);
125 pages[i] = page;
126 }
127
128 if (vmas)
129 vmas[i] = vma;
130
131 vaddr += PAGE_SIZE;
132 ++vpfn;
133 --remainder;
134 ++i;
135 }
136
137 *length = remainder;
138 *position = vaddr;
139
140 return i;
141}
142
143#if 0 /* This is just for testing */ 57#if 0 /* This is just for testing */
144struct page * 58struct page *
145follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 59follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
@@ -204,83 +118,15 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
204} 118}
205#endif 119#endif
206 120
207void unmap_hugepage_range(struct vm_area_struct *vma, 121void hugetlb_clean_stale_pgtable(pte_t *pte)
208 unsigned long start, unsigned long end)
209{ 122{
210 struct mm_struct *mm = vma->vm_mm; 123 pmd_t *pmd = (pmd_t *) pte;
211 unsigned long address;
212 pte_t pte, *ptep;
213 struct page *page; 124 struct page *page;
214 125
215 BUG_ON(start & (HPAGE_SIZE - 1)); 126 page = pmd_page(*pmd);
216 BUG_ON(end & (HPAGE_SIZE - 1)); 127 pmd_clear(pmd);
217 128 dec_page_state(nr_page_table_pages);
218 for (address = start; address < end; address += HPAGE_SIZE) { 129 page_cache_release(page);
219 ptep = huge_pte_offset(mm, address);
220 if (!ptep)
221 continue;
222 pte = ptep_get_and_clear(mm, address, ptep);
223 if (pte_none(pte))
224 continue;
225 page = pte_page(pte);
226 put_page(page);
227 }
228 add_mm_counter(mm ,rss, -((end - start) >> PAGE_SHIFT));
229 flush_tlb_range(vma, start, end);
230}
231
232int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
233{
234 struct mm_struct *mm = current->mm;
235 unsigned long addr;
236 int ret = 0;
237
238 BUG_ON(vma->vm_start & ~HPAGE_MASK);
239 BUG_ON(vma->vm_end & ~HPAGE_MASK);
240
241 spin_lock(&mm->page_table_lock);
242 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
243 unsigned long idx;
244 pte_t *pte = huge_pte_alloc(mm, addr);
245 struct page *page;
246
247 if (!pte) {
248 ret = -ENOMEM;
249 goto out;
250 }
251
252 if (!pte_none(*pte))
253 continue;
254
255 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
256 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
257 page = find_get_page(mapping, idx);
258 if (!page) {
259 /* charge the fs quota first */
260 if (hugetlb_get_quota(mapping)) {
261 ret = -ENOMEM;
262 goto out;
263 }
264 page = alloc_huge_page();
265 if (!page) {
266 hugetlb_put_quota(mapping);
267 ret = -ENOMEM;
268 goto out;
269 }
270 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
271 if (! ret) {
272 unlock_page(page);
273 } else {
274 hugetlb_put_quota(mapping);
275 free_huge_page(page);
276 goto out;
277 }
278 }
279 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
280 }
281out:
282 spin_unlock(&mm->page_table_lock);
283 return ret;
284} 130}
285 131
286/* x86_64 also uses this file */ 132/* x86_64 also uses this file */
@@ -294,7 +140,12 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
294 struct vm_area_struct *vma; 140 struct vm_area_struct *vma;
295 unsigned long start_addr; 141 unsigned long start_addr;
296 142
297 start_addr = mm->free_area_cache; 143 if (len > mm->cached_hole_size) {
144 start_addr = mm->free_area_cache;
145 } else {
146 start_addr = TASK_UNMAPPED_BASE;
147 mm->cached_hole_size = 0;
148 }
298 149
299full_search: 150full_search:
300 addr = ALIGN(start_addr, HPAGE_SIZE); 151 addr = ALIGN(start_addr, HPAGE_SIZE);
@@ -308,6 +159,7 @@ full_search:
308 */ 159 */
309 if (start_addr != TASK_UNMAPPED_BASE) { 160 if (start_addr != TASK_UNMAPPED_BASE) {
310 start_addr = TASK_UNMAPPED_BASE; 161 start_addr = TASK_UNMAPPED_BASE;
162 mm->cached_hole_size = 0;
311 goto full_search; 163 goto full_search;
312 } 164 }
313 return -ENOMEM; 165 return -ENOMEM;
@@ -316,6 +168,8 @@ full_search:
316 mm->free_area_cache = addr + len; 168 mm->free_area_cache = addr + len;
317 return addr; 169 return addr;
318 } 170 }
171 if (addr + mm->cached_hole_size < vma->vm_start)
172 mm->cached_hole_size = vma->vm_start - addr;
319 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 173 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
320 } 174 }
321} 175}
@@ -327,12 +181,17 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
327 struct mm_struct *mm = current->mm; 181 struct mm_struct *mm = current->mm;
328 struct vm_area_struct *vma, *prev_vma; 182 struct vm_area_struct *vma, *prev_vma;
329 unsigned long base = mm->mmap_base, addr = addr0; 183 unsigned long base = mm->mmap_base, addr = addr0;
184 unsigned long largest_hole = mm->cached_hole_size;
330 int first_time = 1; 185 int first_time = 1;
331 186
332 /* don't allow allocations above current base */ 187 /* don't allow allocations above current base */
333 if (mm->free_area_cache > base) 188 if (mm->free_area_cache > base)
334 mm->free_area_cache = base; 189 mm->free_area_cache = base;
335 190
191 if (len <= largest_hole) {
192 largest_hole = 0;
193 mm->free_area_cache = base;
194 }
336try_again: 195try_again:
337 /* make sure it can fit in the remaining address space */ 196 /* make sure it can fit in the remaining address space */
338 if (mm->free_area_cache < len) 197 if (mm->free_area_cache < len)
@@ -353,13 +212,21 @@ try_again:
353 * vma->vm_start, use it: 212 * vma->vm_start, use it:
354 */ 213 */
355 if (addr + len <= vma->vm_start && 214 if (addr + len <= vma->vm_start &&
356 (!prev_vma || (addr >= prev_vma->vm_end))) 215 (!prev_vma || (addr >= prev_vma->vm_end))) {
357 /* remember the address as a hint for next time */ 216 /* remember the address as a hint for next time */
358 return (mm->free_area_cache = addr); 217 mm->cached_hole_size = largest_hole;
359 else 218 return (mm->free_area_cache = addr);
219 } else {
360 /* pull free_area_cache down to the first hole */ 220 /* pull free_area_cache down to the first hole */
361 if (mm->free_area_cache == vma->vm_end) 221 if (mm->free_area_cache == vma->vm_end) {
362 mm->free_area_cache = vma->vm_start; 222 mm->free_area_cache = vma->vm_start;
223 mm->cached_hole_size = largest_hole;
224 }
225 }
226
227 /* remember the largest hole we saw so far */
228 if (addr + largest_hole < vma->vm_start)
229 largest_hole = vma->vm_start - addr;
363 230
364 /* try just below the current vma->vm_start */ 231 /* try just below the current vma->vm_start */
365 addr = (vma->vm_start - len) & HPAGE_MASK; 232 addr = (vma->vm_start - len) & HPAGE_MASK;
@@ -372,6 +239,7 @@ fail:
372 */ 239 */
373 if (first_time) { 240 if (first_time) {
374 mm->free_area_cache = base; 241 mm->free_area_cache = base;
242 largest_hole = 0;
375 first_time = 0; 243 first_time = 0;
376 goto try_again; 244 goto try_again;
377 } 245 }
@@ -382,6 +250,7 @@ fail:
382 * allocations. 250 * allocations.
383 */ 251 */
384 mm->free_area_cache = TASK_UNMAPPED_BASE; 252 mm->free_area_cache = TASK_UNMAPPED_BASE;
253 mm->cached_hole_size = ~0UL;
385 addr = hugetlb_get_unmapped_area_bottomup(file, addr0, 254 addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
386 len, pgoff, flags); 255 len, pgoff, flags);
387 256
@@ -389,6 +258,7 @@ fail:
389 * Restore the topdown base: 258 * Restore the topdown base:
390 */ 259 */
391 mm->free_area_cache = base; 260 mm->free_area_cache = base;
261 mm->cached_hole_size = ~0UL;
392 262
393 return addr; 263 return addr;
394} 264}
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 7a7ea3737265..12216b52e28b 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -191,7 +191,7 @@ static inline int page_kills_ppro(unsigned long pagenr)
191 191
192extern int is_available_memory(efi_memory_desc_t *); 192extern int is_available_memory(efi_memory_desc_t *);
193 193
194static inline int page_is_ram(unsigned long pagenr) 194int page_is_ram(unsigned long pagenr)
195{ 195{
196 int i; 196 int i;
197 unsigned long addr, end; 197 unsigned long addr, end;
@@ -269,7 +269,6 @@ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
269{ 269{
270 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { 270 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
271 ClearPageReserved(page); 271 ClearPageReserved(page);
272 set_bit(PG_highmem, &page->flags);
273 set_page_count(page, 1); 272 set_page_count(page, 1);
274 __free_page(page); 273 __free_page(page);
275 totalhigh_pages++; 274 totalhigh_pages++;
@@ -277,7 +276,9 @@ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
277 SetPageReserved(page); 276 SetPageReserved(page);
278} 277}
279 278
280#ifndef CONFIG_DISCONTIGMEM 279#ifdef CONFIG_NUMA
280extern void set_highmem_pages_init(int);
281#else
281static void __init set_highmem_pages_init(int bad_ppro) 282static void __init set_highmem_pages_init(int bad_ppro)
282{ 283{
283 int pfn; 284 int pfn;
@@ -285,9 +286,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
285 one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); 286 one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
286 totalram_pages += totalhigh_pages; 287 totalram_pages += totalhigh_pages;
287} 288}
288#else 289#endif /* CONFIG_FLATMEM */
289extern void set_highmem_pages_init(int);
290#endif /* !CONFIG_DISCONTIGMEM */
291 290
292#else 291#else
293#define kmap_init() do { } while (0) 292#define kmap_init() do { } while (0)
@@ -296,12 +295,13 @@ extern void set_highmem_pages_init(int);
296#endif /* CONFIG_HIGHMEM */ 295#endif /* CONFIG_HIGHMEM */
297 296
298unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; 297unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
298EXPORT_SYMBOL(__PAGE_KERNEL);
299unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; 299unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
300 300
301#ifndef CONFIG_DISCONTIGMEM 301#ifdef CONFIG_NUMA
302#define remap_numa_kva() do {} while (0)
303#else
304extern void __init remap_numa_kva(void); 302extern void __init remap_numa_kva(void);
303#else
304#define remap_numa_kva() do {} while (0)
305#endif 305#endif
306 306
307static void __init pagetable_init (void) 307static void __init pagetable_init (void)
@@ -352,7 +352,7 @@ static void __init pagetable_init (void)
352#endif 352#endif
353} 353}
354 354
355#if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND) 355#ifdef CONFIG_SOFTWARE_SUSPEND
356/* 356/*
357 * Swap suspend & friends need this for resume because things like the intel-agp 357 * Swap suspend & friends need this for resume because things like the intel-agp
358 * driver might have split up a kernel 4MB mapping. 358 * driver might have split up a kernel 4MB mapping.
@@ -526,7 +526,7 @@ static void __init set_max_mapnr_init(void)
526#else 526#else
527 num_physpages = max_low_pfn; 527 num_physpages = max_low_pfn;
528#endif 528#endif
529#ifndef CONFIG_DISCONTIGMEM 529#ifdef CONFIG_FLATMEM
530 max_mapnr = num_physpages; 530 max_mapnr = num_physpages;
531#endif 531#endif
532} 532}
@@ -540,7 +540,7 @@ void __init mem_init(void)
540 int tmp; 540 int tmp;
541 int bad_ppro; 541 int bad_ppro;
542 542
543#ifndef CONFIG_DISCONTIGMEM 543#ifdef CONFIG_FLATMEM
544 if (!mem_map) 544 if (!mem_map)
545 BUG(); 545 BUG();
546#endif 546#endif
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index ab542792b27b..6b25afc933b6 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -11,6 +11,7 @@
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/module.h>
14#include <asm/io.h> 15#include <asm/io.h>
15#include <asm/fixmap.h> 16#include <asm/fixmap.h>
16#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
@@ -165,7 +166,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
165 } 166 }
166 return (void __iomem *) (offset + (char __iomem *)addr); 167 return (void __iomem *) (offset + (char __iomem *)addr);
167} 168}
168 169EXPORT_SYMBOL(__ioremap);
169 170
170/** 171/**
171 * ioremap_nocache - map bus memory into CPU space 172 * ioremap_nocache - map bus memory into CPU space
@@ -222,6 +223,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
222 223
223 return p; 224 return p;
224} 225}
226EXPORT_SYMBOL(ioremap_nocache);
225 227
226void iounmap(volatile void __iomem *addr) 228void iounmap(volatile void __iomem *addr)
227{ 229{
@@ -241,7 +243,7 @@ void iounmap(volatile void __iomem *addr)
241 write_lock(&vmlist_lock); 243 write_lock(&vmlist_lock);
242 p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); 244 p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
243 if (!p) { 245 if (!p) {
244 printk("iounmap: bad address %p\n", addr); 246 printk(KERN_WARNING "iounmap: bad address %p\n", addr);
245 goto out_unlock; 247 goto out_unlock;
246 } 248 }
247 249
@@ -255,6 +257,7 @@ out_unlock:
255 write_unlock(&vmlist_lock); 257 write_unlock(&vmlist_lock);
256 kfree(p); 258 kfree(p);
257} 259}
260EXPORT_SYMBOL(iounmap);
258 261
259void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) 262void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
260{ 263{
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index dd81479ff88a..bd2f7afc7a2a 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -30,13 +30,14 @@ void show_mem(void)
30 struct page *page; 30 struct page *page;
31 pg_data_t *pgdat; 31 pg_data_t *pgdat;
32 unsigned long i; 32 unsigned long i;
33 struct page_state ps;
33 34
34 printk("Mem-info:\n"); 35 printk(KERN_INFO "Mem-info:\n");
35 show_free_areas(); 36 show_free_areas();
36 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 37 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
37 for_each_pgdat(pgdat) { 38 for_each_pgdat(pgdat) {
38 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 39 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
39 page = pgdat->node_mem_map + i; 40 page = pgdat_page_nr(pgdat, i);
40 total++; 41 total++;
41 if (PageHighMem(page)) 42 if (PageHighMem(page))
42 highmem++; 43 highmem++;
@@ -48,11 +49,18 @@ void show_mem(void)
48 shared += page_count(page) - 1; 49 shared += page_count(page) - 1;
49 } 50 }
50 } 51 }
51 printk("%d pages of RAM\n", total); 52 printk(KERN_INFO "%d pages of RAM\n", total);
52 printk("%d pages of HIGHMEM\n",highmem); 53 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
53 printk("%d reserved pages\n",reserved); 54 printk(KERN_INFO "%d reserved pages\n", reserved);
54 printk("%d pages shared\n",shared); 55 printk(KERN_INFO "%d pages shared\n", shared);
55 printk("%d pages swap cached\n",cached); 56 printk(KERN_INFO "%d pages swap cached\n", cached);
57
58 get_page_state(&ps);
59 printk(KERN_INFO "%lu pages dirty\n", ps.nr_dirty);
60 printk(KERN_INFO "%lu pages writeback\n", ps.nr_writeback);
61 printk(KERN_INFO "%lu pages mapped\n", ps.nr_mapped);
62 printk(KERN_INFO "%lu pages slab\n", ps.nr_slab);
63 printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages);
56} 64}
57 65
58/* 66/*
@@ -105,16 +113,16 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
105 pmd_t *pmd; 113 pmd_t *pmd;
106 114
107 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ 115 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
108 printk ("set_pmd_pfn: vaddr misaligned\n"); 116 printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n");
109 return; /* BUG(); */ 117 return; /* BUG(); */
110 } 118 }
111 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ 119 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
112 printk ("set_pmd_pfn: pfn misaligned\n"); 120 printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n");
113 return; /* BUG(); */ 121 return; /* BUG(); */
114 } 122 }
115 pgd = swapper_pg_dir + pgd_index(vaddr); 123 pgd = swapper_pg_dir + pgd_index(vaddr);
116 if (pgd_none(*pgd)) { 124 if (pgd_none(*pgd)) {
117 printk ("set_pmd_pfn: pgd_none\n"); 125 printk(KERN_WARNING "set_pmd_pfn: pgd_none\n");
118 return; /* BUG(); */ 126 return; /* BUG(); */
119 } 127 }
120 pud = pud_offset(pgd, vaddr); 128 pud = pud_offset(pgd, vaddr);
diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c
index 52d72e074f7f..65dfd2edb671 100644
--- a/arch/i386/oprofile/backtrace.c
+++ b/arch/i386/oprofile/backtrace.c
@@ -91,7 +91,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
91 head = (struct frame_head *)regs->ebp; 91 head = (struct frame_head *)regs->ebp;
92#endif 92#endif
93 93
94 if (!user_mode(regs)) { 94 if (!user_mode_vm(regs)) {
95 while (depth-- && valid_kernel_stack(head, regs)) 95 while (depth-- && valid_kernel_stack(head, regs))
96 head = dump_backtrace(head); 96 head = dump_backtrace(head);
97 return; 97 return;
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 720975e1af50..87325263cd4f 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -25,7 +25,8 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
25 25
26int pci_routeirq; 26int pci_routeirq;
27int pcibios_last_bus = -1; 27int pcibios_last_bus = -1;
28struct pci_bus *pci_root_bus = NULL; 28unsigned long pirq_table_addr;
29struct pci_bus *pci_root_bus;
29struct pci_raw_ops *raw_pci_ops; 30struct pci_raw_ops *raw_pci_ops;
30 31
31static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) 32static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
@@ -133,7 +134,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
133 134
134 printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); 135 printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
135 136
136 return pci_scan_bus(busnum, &pci_root_ops, NULL); 137 return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL);
137} 138}
138 139
139extern u8 pci_cache_line_size; 140extern u8 pci_cache_line_size;
@@ -188,6 +189,9 @@ char * __devinit pcibios_setup(char *str)
188 } else if (!strcmp(str, "biosirq")) { 189 } else if (!strcmp(str, "biosirq")) {
189 pci_probe |= PCI_BIOS_IRQ_SCAN; 190 pci_probe |= PCI_BIOS_IRQ_SCAN;
190 return NULL; 191 return NULL;
192 } else if (!strncmp(str, "pirqaddr=", 9)) {
193 pirq_table_addr = simple_strtoul(str+9, NULL, 0);
194 return NULL;
191 } 195 }
192#endif 196#endif
193#ifdef CONFIG_PCI_DIRECT 197#ifdef CONFIG_PCI_DIRECT
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
index da21b1d07c15..78ca1ecbb907 100644
--- a/arch/i386/pci/irq.c
+++ b/arch/i386/pci/irq.c
@@ -58,6 +58,35 @@ struct irq_router_handler {
58int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL; 58int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL;
59 59
60/* 60/*
61 * Check passed address for the PCI IRQ Routing Table signature
62 * and perform checksum verification.
63 */
64
65static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr)
66{
67 struct irq_routing_table *rt;
68 int i;
69 u8 sum;
70
71 rt = (struct irq_routing_table *) addr;
72 if (rt->signature != PIRQ_SIGNATURE ||
73 rt->version != PIRQ_VERSION ||
74 rt->size % 16 ||
75 rt->size < sizeof(struct irq_routing_table))
76 return NULL;
77 sum = 0;
78 for (i=0; i < rt->size; i++)
79 sum += addr[i];
80 if (!sum) {
81 DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
82 return rt;
83 }
84 return NULL;
85}
86
87
88
89/*
61 * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table. 90 * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
62 */ 91 */
63 92
@@ -65,23 +94,17 @@ static struct irq_routing_table * __init pirq_find_routing_table(void)
65{ 94{
66 u8 *addr; 95 u8 *addr;
67 struct irq_routing_table *rt; 96 struct irq_routing_table *rt;
68 int i;
69 u8 sum;
70 97
98 if (pirq_table_addr) {
99 rt = pirq_check_routing_table((u8 *) __va(pirq_table_addr));
100 if (rt)
101 return rt;
102 printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n");
103 }
71 for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { 104 for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
72 rt = (struct irq_routing_table *) addr; 105 rt = pirq_check_routing_table(addr);
73 if (rt->signature != PIRQ_SIGNATURE || 106 if (rt)
74 rt->version != PIRQ_VERSION ||
75 rt->size % 16 ||
76 rt->size < sizeof(struct irq_routing_table))
77 continue;
78 sum = 0;
79 for(i=0; i<rt->size; i++)
80 sum += addr[i];
81 if (!sum) {
82 DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
83 return rt; 107 return rt;
84 }
85 } 108 }
86 return NULL; 109 return NULL;
87} 110}
@@ -227,6 +250,24 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
227} 250}
228 251
229/* 252/*
253 * The VIA pirq rules are nibble-based, like ALI,
254 * but without the ugly irq number munging.
255 * However, for 82C586, nibble map is different .
256 */
257static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
258{
259 static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
260 return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
261}
262
263static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
264{
265 static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
266 write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
267 return 1;
268}
269
270/*
230 * ITE 8330G pirq rules are nibble-based 271 * ITE 8330G pirq rules are nibble-based
231 * FIXME: pirqmap may be { 1, 0, 3, 2 }, 272 * FIXME: pirqmap may be { 1, 0, 3, 2 },
232 * 2+3 are both mapped to irq 9 on my system 273 * 2+3 are both mapped to irq 9 on my system
@@ -512,6 +553,10 @@ static __init int via_router_probe(struct irq_router *r, struct pci_dev *router,
512 switch(device) 553 switch(device)
513 { 554 {
514 case PCI_DEVICE_ID_VIA_82C586_0: 555 case PCI_DEVICE_ID_VIA_82C586_0:
556 r->name = "VIA";
557 r->get = pirq_via586_get;
558 r->set = pirq_via586_set;
559 return 1;
515 case PCI_DEVICE_ID_VIA_82C596: 560 case PCI_DEVICE_ID_VIA_82C596:
516 case PCI_DEVICE_ID_VIA_82C686: 561 case PCI_DEVICE_ID_VIA_82C686:
517 case PCI_DEVICE_ID_VIA_8231: 562 case PCI_DEVICE_ID_VIA_8231:
diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c
index 1492e3753869..149a9588c256 100644
--- a/arch/i386/pci/legacy.c
+++ b/arch/i386/pci/legacy.c
@@ -45,6 +45,8 @@ static int __init pci_legacy_init(void)
45 45
46 printk("PCI: Probing PCI hardware\n"); 46 printk("PCI: Probing PCI hardware\n");
47 pci_root_bus = pcibios_scan_root(0); 47 pci_root_bus = pcibios_scan_root(0);
48 if (pci_root_bus)
49 pci_bus_add_devices(pci_root_bus);
48 50
49 pcibios_fixup_peer_bridges(); 51 pcibios_fixup_peer_bridges();
50 52
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index 021a50aa51f4..60f0e7a1162a 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -11,11 +11,9 @@
11 11
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/acpi.h>
14#include "pci.h" 15#include "pci.h"
15 16
16/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
17u32 pci_mmcfg_base_addr;
18
19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) 17#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
20 18
21/* The base address of the last MMCONFIG device accessed */ 19/* The base address of the last MMCONFIG device accessed */
@@ -24,10 +22,31 @@ static u32 mmcfg_last_accessed_device;
24/* 22/*
25 * Functions for accessing PCI configuration space with MMCONFIG accesses 23 * Functions for accessing PCI configuration space with MMCONFIG accesses
26 */ 24 */
25static u32 get_base_addr(unsigned int seg, int bus)
26{
27 int cfg_num = -1;
28 struct acpi_table_mcfg_config *cfg;
29
30 while (1) {
31 ++cfg_num;
32 if (cfg_num >= pci_mmcfg_config_num) {
33 /* something bad is going on, no cfg table is found. */
34 /* so we fall back to the old way we used to do this */
35 /* and just rely on the first entry to be correct. */
36 return pci_mmcfg_config[0].base_address;
37 }
38 cfg = &pci_mmcfg_config[cfg_num];
39 if (cfg->pci_segment_group_number != seg)
40 continue;
41 if ((cfg->start_bus_number <= bus) &&
42 (cfg->end_bus_number >= bus))
43 return cfg->base_address;
44 }
45}
27 46
28static inline void pci_exp_set_dev_base(int bus, int devfn) 47static inline void pci_exp_set_dev_base(unsigned int seg, int bus, int devfn)
29{ 48{
30 u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12); 49 u32 dev_base = get_base_addr(seg, bus) | (bus << 20) | (devfn << 12);
31 if (dev_base != mmcfg_last_accessed_device) { 50 if (dev_base != mmcfg_last_accessed_device) {
32 mmcfg_last_accessed_device = dev_base; 51 mmcfg_last_accessed_device = dev_base;
33 set_fixmap_nocache(FIX_PCIE_MCFG, dev_base); 52 set_fixmap_nocache(FIX_PCIE_MCFG, dev_base);
@@ -44,7 +63,7 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
44 63
45 spin_lock_irqsave(&pci_config_lock, flags); 64 spin_lock_irqsave(&pci_config_lock, flags);
46 65
47 pci_exp_set_dev_base(bus, devfn); 66 pci_exp_set_dev_base(seg, bus, devfn);
48 67
49 switch (len) { 68 switch (len) {
50 case 1: 69 case 1:
@@ -73,7 +92,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
73 92
74 spin_lock_irqsave(&pci_config_lock, flags); 93 spin_lock_irqsave(&pci_config_lock, flags);
75 94
76 pci_exp_set_dev_base(bus, devfn); 95 pci_exp_set_dev_base(seg, bus, devfn);
77 96
78 switch (len) { 97 switch (len) {
79 case 1: 98 case 1:
@@ -101,7 +120,11 @@ static int __init pci_mmcfg_init(void)
101{ 120{
102 if ((pci_probe & PCI_PROBE_MMCONF) == 0) 121 if ((pci_probe & PCI_PROBE_MMCONF) == 0)
103 goto out; 122 goto out;
104 if (!pci_mmcfg_base_addr) 123
124 acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
125 if ((pci_mmcfg_config_num == 0) ||
126 (pci_mmcfg_config == NULL) ||
127 (pci_mmcfg_config[0].base_address == 0))
105 goto out; 128 goto out;
106 129
107 /* Kludge for now. Don't use mmconfig on AMD systems because 130 /* Kludge for now. Don't use mmconfig on AMD systems because
diff --git a/arch/i386/pci/numa.c b/arch/i386/pci/numa.c
index 9e3695461899..adbe17a38f6f 100644
--- a/arch/i386/pci/numa.c
+++ b/arch/i386/pci/numa.c
@@ -115,6 +115,8 @@ static int __init pci_numa_init(void)
115 return 0; 115 return 0;
116 116
117 pci_root_bus = pcibios_scan_root(0); 117 pci_root_bus = pcibios_scan_root(0);
118 if (pci_root_bus)
119 pci_bus_add_devices(pci_root_bus);
118 if (num_online_nodes() > 1) 120 if (num_online_nodes() > 1)
119 for_each_online_node(quad) { 121 for_each_online_node(quad) {
120 if (quad == 0) 122 if (quad == 0)
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
index 141421b673b0..b9d65f0bc2d1 100644
--- a/arch/i386/pci/pcbios.c
+++ b/arch/i386/pci/pcbios.c
@@ -4,6 +4,7 @@
4 4
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/module.h>
7#include "pci.h" 8#include "pci.h"
8#include "pci-functions.h" 9#include "pci-functions.h"
9 10
@@ -456,7 +457,7 @@ struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
456 free_page(page); 457 free_page(page);
457 return rt; 458 return rt;
458} 459}
459 460EXPORT_SYMBOL(pcibios_get_irq_routing_table);
460 461
461int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) 462int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
462{ 463{
@@ -473,6 +474,7 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
473 "S" (&pci_indirect)); 474 "S" (&pci_indirect));
474 return !(ret & 0xff00); 475 return !(ret & 0xff00);
475} 476}
477EXPORT_SYMBOL(pcibios_set_irq_routing);
476 478
477static int __init pci_pcbios_init(void) 479static int __init pci_pcbios_init(void)
478{ 480{
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index a8fc80ca69f3..a80f0f55ff51 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -27,6 +27,7 @@
27#define PCI_ASSIGN_ALL_BUSSES 0x4000 27#define PCI_ASSIGN_ALL_BUSSES 0x4000
28 28
29extern unsigned int pci_probe; 29extern unsigned int pci_probe;
30extern unsigned long pirq_table_addr;
30 31
31/* pci-i386.c */ 32/* pci-i386.c */
32 33
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index cf337c673d92..0e6b45b61251 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -22,9 +22,11 @@
22#include <linux/device.h> 22#include <linux/device.h>
23#include <linux/suspend.h> 23#include <linux/suspend.h>
24#include <linux/acpi.h> 24#include <linux/acpi.h>
25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26#include <asm/acpi.h> 27#include <asm/acpi.h>
27#include <asm/tlbflush.h> 28#include <asm/tlbflush.h>
29#include <asm/processor.h>
28 30
29static struct saved_context saved_context; 31static struct saved_context saved_context;
30 32
@@ -33,8 +35,6 @@ unsigned long saved_context_esp, saved_context_ebp;
33unsigned long saved_context_esi, saved_context_edi; 35unsigned long saved_context_esi, saved_context_edi;
34unsigned long saved_context_eflags; 36unsigned long saved_context_eflags;
35 37
36extern void enable_sep_cpu(void *);
37
38void __save_processor_state(struct saved_context *ctxt) 38void __save_processor_state(struct saved_context *ctxt)
39{ 39{
40 kernel_fpu_begin(); 40 kernel_fpu_begin();
@@ -44,7 +44,6 @@ void __save_processor_state(struct saved_context *ctxt)
44 */ 44 */
45 asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit)); 45 asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
46 asm volatile ("sidt %0" : "=m" (ctxt->idt_limit)); 46 asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
47 asm volatile ("sldt %0" : "=m" (ctxt->ldt));
48 asm volatile ("str %0" : "=m" (ctxt->tr)); 47 asm volatile ("str %0" : "=m" (ctxt->tr));
49 48
50 /* 49 /*
@@ -94,20 +93,19 @@ static void fix_processor_context(void)
94 * Now maybe reload the debug registers 93 * Now maybe reload the debug registers
95 */ 94 */
96 if (current->thread.debugreg[7]){ 95 if (current->thread.debugreg[7]){
97 loaddebug(&current->thread, 0); 96 set_debugreg(current->thread.debugreg[0], 0);
98 loaddebug(&current->thread, 1); 97 set_debugreg(current->thread.debugreg[1], 1);
99 loaddebug(&current->thread, 2); 98 set_debugreg(current->thread.debugreg[2], 2);
100 loaddebug(&current->thread, 3); 99 set_debugreg(current->thread.debugreg[3], 3);
101 /* no 4 and 5 */ 100 /* no 4 and 5 */
102 loaddebug(&current->thread, 6); 101 set_debugreg(current->thread.debugreg[6], 6);
103 loaddebug(&current->thread, 7); 102 set_debugreg(current->thread.debugreg[7], 7);
104 } 103 }
105 104
106} 105}
107 106
108void __restore_processor_state(struct saved_context *ctxt) 107void __restore_processor_state(struct saved_context *ctxt)
109{ 108{
110
111 /* 109 /*
112 * control registers 110 * control registers
113 */ 111 */
@@ -117,6 +115,13 @@ void __restore_processor_state(struct saved_context *ctxt)
117 asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0)); 115 asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0));
118 116
119 /* 117 /*
118 * now restore the descriptor tables to their proper values
119 * ltr is done i fix_processor_context().
120 */
121 asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
122 asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
123
124 /*
120 * segment registers 125 * segment registers
121 */ 126 */
122 asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); 127 asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
@@ -125,18 +130,10 @@ void __restore_processor_state(struct saved_context *ctxt)
125 asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); 130 asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
126 131
127 /* 132 /*
128 * now restore the descriptor tables to their proper values
129 * ltr is done i fix_processor_context().
130 */
131 asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
132 asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
133 asm volatile ("lldt %0" :: "m" (ctxt->ldt));
134
135 /*
136 * sysenter MSRs 133 * sysenter MSRs
137 */ 134 */
138 if (boot_cpu_has(X86_FEATURE_SEP)) 135 if (boot_cpu_has(X86_FEATURE_SEP))
139 enable_sep_cpu(NULL); 136 enable_sep_cpu();
140 137
141 fix_processor_context(); 138 fix_processor_context();
142 do_fpu_end(); 139 do_fpu_end();