aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig84
-rw-r--r--arch/x86_64/Makefile45
-rw-r--r--arch/x86_64/boot/Makefile36
-rw-r--r--arch/x86_64/boot/video.S5
-rw-r--r--arch/x86_64/crypto/aes.c7
-rw-r--r--arch/x86_64/defconfig117
-rw-r--r--arch/x86_64/ia32/Makefile4
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c6
-rw-r--r--arch/x86_64/ia32/ia32entry.S31
-rw-r--r--arch/x86_64/ia32/sys_ia32.c115
-rw-r--r--arch/x86_64/ia32/vsyscall-sigreturn.S23
-rw-r--r--arch/x86_64/kernel/Makefile6
-rw-r--r--arch/x86_64/kernel/aperture.c8
-rw-r--r--arch/x86_64/kernel/apic.c37
-rw-r--r--arch/x86_64/kernel/e820.c44
-rw-r--r--arch/x86_64/kernel/early_printk.c118
-rw-r--r--arch/x86_64/kernel/entry.S38
-rw-r--r--arch/x86_64/kernel/functionlist1286
-rw-r--r--arch/x86_64/kernel/head.S33
-rw-r--r--arch/x86_64/kernel/io_apic.c85
-rw-r--r--arch/x86_64/kernel/irq.c21
-rw-r--r--arch/x86_64/kernel/kprobes.c85
-rw-r--r--arch/x86_64/kernel/machine_kexec.c2
-rw-r--r--arch/x86_64/kernel/mce.c17
-rw-r--r--arch/x86_64/kernel/mce_amd.c2
-rw-r--r--arch/x86_64/kernel/mpparse.c35
-rw-r--r--arch/x86_64/kernel/nmi.c31
-rw-r--r--arch/x86_64/kernel/pci-dma.c9
-rw-r--r--arch/x86_64/kernel/pci-gart.c49
-rw-r--r--arch/x86_64/kernel/pci-nommu.c7
-rw-r--r--arch/x86_64/kernel/pmtimer.c7
-rw-r--r--arch/x86_64/kernel/process.c49
-rw-r--r--arch/x86_64/kernel/ptrace.c17
-rw-r--r--arch/x86_64/kernel/setup.c149
-rw-r--r--arch/x86_64/kernel/setup64.c22
-rw-r--r--arch/x86_64/kernel/signal.c4
-rw-r--r--arch/x86_64/kernel/smp.c6
-rw-r--r--arch/x86_64/kernel/smpboot.c30
-rw-r--r--arch/x86_64/kernel/time.c203
-rw-r--r--arch/x86_64/kernel/traps.c97
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S8
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c8
-rw-r--r--arch/x86_64/lib/thunk.S1
-rw-r--r--arch/x86_64/mm/fault.c81
-rw-r--r--arch/x86_64/mm/init.c81
-rw-r--r--arch/x86_64/mm/k8topology.c4
-rw-r--r--arch/x86_64/mm/numa.c89
-rw-r--r--arch/x86_64/mm/pageattr.c63
-rw-r--r--arch/x86_64/mm/srat.c194
-rw-r--r--arch/x86_64/pci/Makefile3
-rw-r--r--arch/x86_64/pci/mmconfig.c67
51 files changed, 2739 insertions, 830 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 2f9deca31cc9..6527a368fb9c 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -45,6 +45,10 @@ config RWSEM_GENERIC_SPINLOCK
45config RWSEM_XCHGADD_ALGORITHM 45config RWSEM_XCHGADD_ALGORITHM
46 bool 46 bool
47 47
48config GENERIC_HWEIGHT
49 bool
50 default y
51
48config GENERIC_CALIBRATE_DELAY 52config GENERIC_CALIBRATE_DELAY
49 bool 53 bool
50 default y 54 default y
@@ -132,6 +136,11 @@ config X86_L1_CACHE_SHIFT
132 default "7" if GENERIC_CPU || MPSC 136 default "7" if GENERIC_CPU || MPSC
133 default "6" if MK8 137 default "6" if MK8
134 138
139config X86_INTERNODE_CACHE_BYTES
140 int
141 default "4096" if X86_VSMP
142 default X86_L1_CACHE_BYTES if !X86_VSMP
143
135config X86_TSC 144config X86_TSC
136 bool 145 bool
137 default y 146 default y
@@ -246,6 +255,15 @@ config SCHED_SMT
246 cost of slightly increased overhead in some places. If unsure say 255 cost of slightly increased overhead in some places. If unsure say
247 N here. 256 N here.
248 257
258config SCHED_MC
259 bool "Multi-core scheduler support"
260 depends on SMP
261 default y
262 help
263 Multi-core scheduler support improves the CPU scheduler's decision
264 making when dealing with multi-core CPU chips at a cost of slightly
265 increased overhead in some places. If unsure say N here.
266
249source "kernel/Kconfig.preempt" 267source "kernel/Kconfig.preempt"
250 268
251config NUMA 269config NUMA
@@ -270,12 +288,18 @@ config K8_NUMA
270 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA 288 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
271 instead, which also takes priority if both are compiled in. 289 instead, which also takes priority if both are compiled in.
272 290
291config NODES_SHIFT
292 int
293 default "6"
294 depends on NEED_MULTIPLE_NODES
295
273# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig. 296# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
274 297
275config X86_64_ACPI_NUMA 298config X86_64_ACPI_NUMA
276 bool "ACPI NUMA detection" 299 bool "ACPI NUMA detection"
277 depends on NUMA 300 depends on NUMA
278 select ACPI 301 select ACPI
302 select PCI
279 select ACPI_NUMA 303 select ACPI_NUMA
280 default y 304 default y
281 help 305 help
@@ -321,9 +345,13 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
321 def_bool y 345 def_bool y
322 depends on NUMA 346 depends on NUMA
323 347
348config OUT_OF_LINE_PFN_TO_PAGE
349 def_bool y
350 depends on DISCONTIGMEM
351
324config NR_CPUS 352config NR_CPUS
325 int "Maximum number of CPUs (2-256)" 353 int "Maximum number of CPUs (2-256)"
326 range 2 256 354 range 2 255
327 depends on SMP 355 depends on SMP
328 default "8" 356 default "8"
329 help 357 help
@@ -354,21 +382,6 @@ config HPET_TIMER
354 as it is off-chip. You can find the HPET spec at 382 as it is off-chip. You can find the HPET spec at
355 <http://www.intel.com/hardwaredesign/hpetspec.htm>. 383 <http://www.intel.com/hardwaredesign/hpetspec.htm>.
356 384
357config X86_PM_TIMER
358 bool "PM timer" if EMBEDDED
359 depends on ACPI
360 default y
361 help
362 Support the ACPI PM timer for time keeping. This is slow,
363 but is useful on some chipsets without HPET on systems with more
364 than one CPU. On a single processor or single socket multi core
365 system it is normally not required.
366 When the PM timer is active 64bit vsyscalls are disabled
367 and should not be enabled (/proc/sys/kernel/vsyscall64 should
368 not be changed).
369 The kernel selects the PM timer only as a last resort, so it is
370 useful to enable just in case.
371
372config HPET_EMULATE_RTC 385config HPET_EMULATE_RTC
373 bool "Provide RTC interrupt" 386 bool "Provide RTC interrupt"
374 depends on HPET_TIMER && RTC=y 387 depends on HPET_TIMER && RTC=y
@@ -379,13 +392,15 @@ config GART_IOMMU
379 select SWIOTLB 392 select SWIOTLB
380 depends on PCI 393 depends on PCI
381 help 394 help
382 Support the IOMMU. Needed to run systems with more than 3GB of memory 395 Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors
383 properly with 32-bit PCI devices that do not support DAC (Double Address 396 and for the bounce buffering software IOMMU.
384 Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter. 397 Needed to run systems with more than 3GB of memory properly with
385 Normally the kernel will take the right choice by itself. 398 32-bit PCI devices that do not support DAC (Double Address Cycle).
386 This option includes a driver for the AMD Opteron/Athlon64 northbridge IOMMU 399 The IOMMU can be turned off at runtime with the iommu=off parameter.
387 and a software emulation used on other systems. 400 Normally the kernel will take the right choice by itself.
388 If unsure, say Y. 401 This option includes a driver for the AMD Opteron/Athlon64 IOMMU
402 northbridge and a software emulation used on other systems without
403 hardware IOMMU. If unsure, say Y.
389 404
390# need this always enabled with GART_IOMMU for the VIA workaround 405# need this always enabled with GART_IOMMU for the VIA workaround
391config SWIOTLB 406config SWIOTLB
@@ -444,10 +459,10 @@ config CRASH_DUMP
444config PHYSICAL_START 459config PHYSICAL_START
445 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) 460 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
446 default "0x1000000" if CRASH_DUMP 461 default "0x1000000" if CRASH_DUMP
447 default "0x100000" 462 default "0x200000"
448 help 463 help
449 This gives the physical address where the kernel is loaded. Normally 464 This gives the physical address where the kernel is loaded. Normally
450 for regular kernels this value is 0x100000 (1MB). But in the case 465 for regular kernels this value is 0x200000 (2MB). But in the case
451 of kexec on panic the fail safe kernel needs to run at a different 466 of kexec on panic the fail safe kernel needs to run at a different
452 address than the panic-ed kernel. This option is used to set the load 467 address than the panic-ed kernel. This option is used to set the load
453 address for kernels used to capture crash dump on being kexec'ed 468 address for kernels used to capture crash dump on being kexec'ed
@@ -479,6 +494,14 @@ config SECCOMP
479 494
480source kernel/Kconfig.hz 495source kernel/Kconfig.hz
481 496
497config REORDER
498 bool "Function reordering"
499 default n
500 help
501 This option enables the toolchain to reorder functions for a more
502 optimal TLB usage. If you have pretty much any version of binutils,
503 this can increase your kernel build time by roughly one minute.
504
482endmenu 505endmenu
483 506
484# 507#
@@ -527,16 +550,6 @@ config PCI_MMCONFIG
527 bool "Support mmconfig PCI config space access" 550 bool "Support mmconfig PCI config space access"
528 depends on PCI && ACPI 551 depends on PCI && ACPI
529 552
530config UNORDERED_IO
531 bool "Unordered IO mapping access"
532 depends on EXPERIMENTAL
533 help
534 Use unordered stores to access IO memory mappings in device drivers.
535 Still very experimental. When a driver works on IA64/ppc64/pa-risc it should
536 work with this option, but it makes the drivers behave differently
537 from i386. Requires that the driver writer used memory barriers
538 properly.
539
540source "drivers/pci/pcie/Kconfig" 553source "drivers/pci/pcie/Kconfig"
541 554
542source "drivers/pci/Kconfig" 555source "drivers/pci/Kconfig"
@@ -592,6 +605,7 @@ source "arch/x86_64/oprofile/Kconfig"
592 605
593config KPROBES 606config KPROBES
594 bool "Kprobes (EXPERIMENTAL)" 607 bool "Kprobes (EXPERIMENTAL)"
608 depends on EXPERIMENTAL && MODULES
595 help 609 help
596 Kprobes allows you to trap at almost any kernel address and 610 Kprobes allows you to trap at almost any kernel address and
597 execute a callback function. register_kprobe() establishes 611 execute a callback function. register_kprobe() establishes
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index d7fd46479c55..e573e2ab5510 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -24,35 +24,37 @@
24LDFLAGS := -m elf_x86_64 24LDFLAGS := -m elf_x86_64
25OBJCOPYFLAGS := -O binary -R .note -R .comment -S 25OBJCOPYFLAGS := -O binary -R .note -R .comment -S
26LDFLAGS_vmlinux := 26LDFLAGS_vmlinux :=
27
28CHECKFLAGS += -D__x86_64__ -m64 27CHECKFLAGS += -D__x86_64__ -m64
29 28
29cflags-y :=
30cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) 30cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
31cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) 31cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
32CFLAGS += $(cflags-y) 32cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
33 33
34CFLAGS += -m64 34cflags-y += -m64
35CFLAGS += -mno-red-zone 35cflags-y += -mno-red-zone
36CFLAGS += -mcmodel=kernel 36cflags-y += -mcmodel=kernel
37CFLAGS += -pipe 37cflags-y += -pipe
38cflags-$(CONFIG_REORDER) += -ffunction-sections
38# this makes reading assembly source easier, but produces worse code 39# this makes reading assembly source easier, but produces worse code
39# actually it makes the kernel smaller too. 40# actually it makes the kernel smaller too.
40CFLAGS += -fno-reorder-blocks 41cflags-y += -fno-reorder-blocks
41CFLAGS += -Wno-sign-compare 42cflags-y += -Wno-sign-compare
42ifneq ($(CONFIG_UNWIND_INFO),y) 43ifneq ($(CONFIG_UNWIND_INFO),y)
43CFLAGS += -fno-asynchronous-unwind-tables 44cflags-y += -fno-asynchronous-unwind-tables
44endif 45endif
45ifneq ($(CONFIG_DEBUG_INFO),y) 46ifneq ($(CONFIG_DEBUG_INFO),y)
46# -fweb shrinks the kernel a bit, but the difference is very small 47# -fweb shrinks the kernel a bit, but the difference is very small
47# it also messes up debugging, so don't use it for now. 48# it also messes up debugging, so don't use it for now.
48#CFLAGS += $(call cc-option,-fweb) 49#cflags-y += $(call cc-option,-fweb)
49endif 50endif
50# -funit-at-a-time shrinks the kernel .text considerably 51# -funit-at-a-time shrinks the kernel .text considerably
51# unfortunately it makes reading oopses harder. 52# unfortunately it makes reading oopses harder.
52CFLAGS += $(call cc-option,-funit-at-a-time) 53cflags-y += $(call cc-option,-funit-at-a-time)
53# prevent gcc from generating any FP code by mistake 54# prevent gcc from generating any FP code by mistake
54CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) 55cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
55 56
57CFLAGS += $(cflags-y)
56AFLAGS += -m64 58AFLAGS += -m64
57 59
58head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o 60head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
@@ -67,8 +69,8 @@ drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
67 69
68boot := arch/x86_64/boot 70boot := arch/x86_64/boot
69 71
70.PHONY: bzImage bzlilo install archmrproper \ 72PHONY += bzImage bzlilo install archmrproper \
71 fdimage fdimage144 fdimage288 archclean 73 fdimage fdimage144 fdimage288 isoimage archclean
72 74
73#Default target when executing "make" 75#Default target when executing "make"
74all: bzImage 76all: bzImage
@@ -85,7 +87,7 @@ bzlilo: vmlinux
85bzdisk: vmlinux 87bzdisk: vmlinux
86 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk 88 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk
87 89
88fdimage fdimage144 fdimage288: vmlinux 90fdimage fdimage144 fdimage288 isoimage: vmlinux
89 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ 91 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
90 92
91install: 93install:
@@ -97,11 +99,16 @@ archclean:
97define archhelp 99define archhelp
98 echo '* bzImage - Compressed kernel image (arch/$(ARCH)/boot/bzImage)' 100 echo '* bzImage - Compressed kernel image (arch/$(ARCH)/boot/bzImage)'
99 echo ' install - Install kernel using' 101 echo ' install - Install kernel using'
100 echo ' (your) ~/bin/installkernel or' 102 echo ' (your) ~/bin/installkernel or'
101 echo ' (distribution) /sbin/installkernel or' 103 echo ' (distribution) /sbin/installkernel or'
102 echo ' install to $$(INSTALL_PATH) and run lilo' 104 echo ' install to $$(INSTALL_PATH) and run lilo'
105 echo ' bzdisk - Create a boot floppy in /dev/fd0'
106 echo ' fdimage - Create a boot floppy image'
107 echo ' isoimage - Create a boot CD-ROM image'
103endef 108endef
104 109
105CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf 110CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
111 arch/$(ARCH)/boot/image.iso \
112 arch/$(ARCH)/boot/mtools.conf
106 113
107 114
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index 29f8396ed151..43ee6c50c277 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -60,8 +60,12 @@ $(obj)/setup $(obj)/bootsect: %: %.o FORCE
60$(obj)/compressed/vmlinux: FORCE 60$(obj)/compressed/vmlinux: FORCE
61 $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ 61 $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
62 62
63# Set this if you want to pass append arguments to the zdisk/fdimage kernel 63# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
64FDARGS = 64FDARGS =
65# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
66FDINITRD =
67
68image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
65 69
66$(obj)/mtools.conf: $(src)/mtools.conf.in 70$(obj)/mtools.conf: $(src)/mtools.conf.in
67 sed -e 's|@OBJ@|$(obj)|g' < $< > $@ 71 sed -e 's|@OBJ@|$(obj)|g' < $< > $@
@@ -70,8 +74,11 @@ $(obj)/mtools.conf: $(src)/mtools.conf.in
70zdisk: $(BOOTIMAGE) $(obj)/mtools.conf 74zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
71 MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync 75 MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync
72 syslinux /dev/fd0 ; sync 76 syslinux /dev/fd0 ; sync
73 echo 'default linux $(FDARGS)' | \ 77 echo '$(image_cmdline)' | \
74 MTOOLSRC=$(obj)/mtools.conf mcopy - a:syslinux.cfg 78 MTOOLSRC=$(obj)/mtools.conf mcopy - a:syslinux.cfg
79 if [ -f '$(FDINITRD)' ] ; then \
80 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \
81 fi
75 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync 82 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync
76 83
77# These require being root or having syslinux 2.02 or higher installed 84# These require being root or having syslinux 2.02 or higher installed
@@ -79,18 +86,39 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf
79 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 86 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
80 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync 87 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync
81 syslinux $(obj)/fdimage ; sync 88 syslinux $(obj)/fdimage ; sync
82 echo 'default linux $(FDARGS)' | \ 89 echo '$(image_cmdline)' | \
83 MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg 90 MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg
91 if [ -f '$(FDINITRD)' ] ; then \
92 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \
93 fi
84 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync 94 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync
85 95
86fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf 96fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
87 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 97 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
88 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync 98 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync
89 syslinux $(obj)/fdimage ; sync 99 syslinux $(obj)/fdimage ; sync
90 echo 'default linux $(FDARGS)' | \ 100 echo '$(image_cmdline)' | \
91 MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg 101 MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg
102 if [ -f '$(FDINITRD)' ] ; then \
103 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \
104 fi
92 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync 105 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync
93 106
107isoimage: $(BOOTIMAGE)
108 -rm -rf $(obj)/isoimage
109 mkdir $(obj)/isoimage
110 cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
111 $(obj)/isoimage
112 cp $(BOOTIMAGE) $(obj)/isoimage/linux
113 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
114 if [ -f '$(FDINITRD)' ] ; then \
115 cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \
116 fi
117 mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \
118 -no-emul-boot -boot-load-size 4 -boot-info-table \
119 $(obj)/isoimage
120 rm -rf $(obj)/isoimage
121
94zlilo: $(BOOTIMAGE) 122zlilo: $(BOOTIMAGE)
95 if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi 123 if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
96 if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi 124 if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
index 0587477c99f2..32327bb37aff 100644
--- a/arch/x86_64/boot/video.S
+++ b/arch/x86_64/boot/video.S
@@ -97,6 +97,7 @@
97#define PARAM_VESAPM_OFF 0x30 97#define PARAM_VESAPM_OFF 0x30
98#define PARAM_LFB_PAGES 0x32 98#define PARAM_LFB_PAGES 0x32
99#define PARAM_VESA_ATTRIB 0x34 99#define PARAM_VESA_ATTRIB 0x34
100#define PARAM_CAPABILITIES 0x36
100 101
101/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ 102/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
102#ifdef CONFIG_VIDEO_RETAIN 103#ifdef CONFIG_VIDEO_RETAIN
@@ -233,6 +234,10 @@ mopar_gr:
233 movw 18(%di), %ax 234 movw 18(%di), %ax
234 movl %eax, %fs:(PARAM_LFB_SIZE) 235 movl %eax, %fs:(PARAM_LFB_SIZE)
235 236
237# store mode capabilities
238 movl 10(%di), %eax
239 movl %eax, %fs:(PARAM_CAPABILITIES)
240
236# switching the DAC to 8-bit is for <= 8 bpp only 241# switching the DAC to 8-bit is for <= 8 bpp only
237 movw %fs:(PARAM_LFB_DEPTH), %ax 242 movw %fs:(PARAM_LFB_DEPTH), %ax
238 cmpw $8, %ax 243 cmpw $8, %ax
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c
index fb1b961a2e2f..6f77e7700d32 100644
--- a/arch/x86_64/crypto/aes.c
+++ b/arch/x86_64/crypto/aes.c
@@ -77,12 +77,11 @@ static inline u8 byte(const u32 x, const unsigned n)
77struct aes_ctx 77struct aes_ctx
78{ 78{
79 u32 key_length; 79 u32 key_length;
80 u32 E[60]; 80 u32 buf[120];
81 u32 D[60];
82}; 81};
83 82
84#define E_KEY ctx->E 83#define E_KEY (&ctx->buf[0])
85#define D_KEY ctx->D 84#define D_KEY (&ctx->buf[60])
86 85
87static u8 pow_tab[256] __initdata; 86static u8 pow_tab[256] __initdata;
88static u8 log_tab[256] __initdata; 87static u8 log_tab[256] __initdata;
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 56832929a543..69db0c0721d1 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.16-rc1-git2 3# Linux kernel version: 2.6.17-rc1-git11
4# Thu Jan 19 10:05:21 2006 4# Sun Apr 16 07:22:36 2006
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -9,6 +9,7 @@ CONFIG_X86=y
9CONFIG_SEMAPHORE_SLEEPERS=y 9CONFIG_SEMAPHORE_SLEEPERS=y
10CONFIG_MMU=y 10CONFIG_MMU=y
11CONFIG_RWSEM_GENERIC_SPINLOCK=y 11CONFIG_RWSEM_GENERIC_SPINLOCK=y
12CONFIG_GENERIC_HWEIGHT=y
12CONFIG_GENERIC_CALIBRATE_DELAY=y 13CONFIG_GENERIC_CALIBRATE_DELAY=y
13CONFIG_X86_CMPXCHG=y 14CONFIG_X86_CMPXCHG=y
14CONFIG_EARLY_PRINTK=y 15CONFIG_EARLY_PRINTK=y
@@ -21,7 +22,6 @@ CONFIG_DMI=y
21# Code maturity level options 22# Code maturity level options
22# 23#
23CONFIG_EXPERIMENTAL=y 24CONFIG_EXPERIMENTAL=y
24CONFIG_CLEAN_COMPILE=y
25CONFIG_LOCK_KERNEL=y 25CONFIG_LOCK_KERNEL=y
26CONFIG_INIT_ENV_ARG_LIMIT=32 26CONFIG_INIT_ENV_ARG_LIMIT=32
27 27
@@ -39,6 +39,7 @@ CONFIG_SYSCTL=y
39CONFIG_IKCONFIG=y 39CONFIG_IKCONFIG=y
40CONFIG_IKCONFIG_PROC=y 40CONFIG_IKCONFIG_PROC=y
41# CONFIG_CPUSETS is not set 41# CONFIG_CPUSETS is not set
42# CONFIG_RELAY is not set
42CONFIG_INITRAMFS_SOURCE="" 43CONFIG_INITRAMFS_SOURCE=""
43CONFIG_UID16=y 44CONFIG_UID16=y
44CONFIG_VM86=y 45CONFIG_VM86=y
@@ -55,11 +56,8 @@ CONFIG_BASE_FULL=y
55CONFIG_FUTEX=y 56CONFIG_FUTEX=y
56CONFIG_EPOLL=y 57CONFIG_EPOLL=y
57CONFIG_SHMEM=y 58CONFIG_SHMEM=y
58CONFIG_CC_ALIGN_FUNCTIONS=0
59CONFIG_CC_ALIGN_LABELS=0
60CONFIG_CC_ALIGN_LOOPS=0
61CONFIG_CC_ALIGN_JUMPS=0
62CONFIG_SLAB=y 59CONFIG_SLAB=y
60CONFIG_DOUBLEFAULT=y
63# CONFIG_TINY_SHMEM is not set 61# CONFIG_TINY_SHMEM is not set
64CONFIG_BASE_SMALL=0 62CONFIG_BASE_SMALL=0
65# CONFIG_SLOB is not set 63# CONFIG_SLOB is not set
@@ -70,7 +68,6 @@ CONFIG_BASE_SMALL=0
70CONFIG_MODULES=y 68CONFIG_MODULES=y
71CONFIG_MODULE_UNLOAD=y 69CONFIG_MODULE_UNLOAD=y
72CONFIG_MODULE_FORCE_UNLOAD=y 70CONFIG_MODULE_FORCE_UNLOAD=y
73CONFIG_OBSOLETE_MODPARM=y
74# CONFIG_MODVERSIONS is not set 71# CONFIG_MODVERSIONS is not set
75# CONFIG_MODULE_SRCVERSION_ALL is not set 72# CONFIG_MODULE_SRCVERSION_ALL is not set
76# CONFIG_KMOD is not set 73# CONFIG_KMOD is not set
@@ -80,6 +77,8 @@ CONFIG_STOP_MACHINE=y
80# Block layer 77# Block layer
81# 78#
82CONFIG_LBD=y 79CONFIG_LBD=y
80# CONFIG_BLK_DEV_IO_TRACE is not set
81# CONFIG_LSF is not set
83 82
84# 83#
85# IO Schedulers 84# IO Schedulers
@@ -104,6 +103,7 @@ CONFIG_X86_PC=y
104CONFIG_GENERIC_CPU=y 103CONFIG_GENERIC_CPU=y
105CONFIG_X86_L1_CACHE_BYTES=128 104CONFIG_X86_L1_CACHE_BYTES=128
106CONFIG_X86_L1_CACHE_SHIFT=7 105CONFIG_X86_L1_CACHE_SHIFT=7
106CONFIG_X86_INTERNODE_CACHE_BYTES=128
107CONFIG_X86_TSC=y 107CONFIG_X86_TSC=y
108CONFIG_X86_GOOD_APIC=y 108CONFIG_X86_GOOD_APIC=y
109# CONFIG_MICROCODE is not set 109# CONFIG_MICROCODE is not set
@@ -115,12 +115,14 @@ CONFIG_X86_LOCAL_APIC=y
115CONFIG_MTRR=y 115CONFIG_MTRR=y
116CONFIG_SMP=y 116CONFIG_SMP=y
117CONFIG_SCHED_SMT=y 117CONFIG_SCHED_SMT=y
118CONFIG_SCHED_MC=y
118# CONFIG_PREEMPT_NONE is not set 119# CONFIG_PREEMPT_NONE is not set
119CONFIG_PREEMPT_VOLUNTARY=y 120CONFIG_PREEMPT_VOLUNTARY=y
120# CONFIG_PREEMPT is not set 121# CONFIG_PREEMPT is not set
121CONFIG_PREEMPT_BKL=y 122CONFIG_PREEMPT_BKL=y
122CONFIG_NUMA=y 123CONFIG_NUMA=y
123CONFIG_K8_NUMA=y 124CONFIG_K8_NUMA=y
125CONFIG_NODES_SHIFT=6
124CONFIG_X86_64_ACPI_NUMA=y 126CONFIG_X86_64_ACPI_NUMA=y
125CONFIG_NUMA_EMU=y 127CONFIG_NUMA_EMU=y
126CONFIG_ARCH_DISCONTIGMEM_ENABLE=y 128CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
@@ -137,10 +139,10 @@ CONFIG_NEED_MULTIPLE_NODES=y
137CONFIG_SPLIT_PTLOCK_CPUS=4 139CONFIG_SPLIT_PTLOCK_CPUS=4
138CONFIG_MIGRATION=y 140CONFIG_MIGRATION=y
139CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y 141CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
142CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y
140CONFIG_NR_CPUS=32 143CONFIG_NR_CPUS=32
141CONFIG_HOTPLUG_CPU=y 144CONFIG_HOTPLUG_CPU=y
142CONFIG_HPET_TIMER=y 145CONFIG_HPET_TIMER=y
143CONFIG_X86_PM_TIMER=y
144CONFIG_HPET_EMULATE_RTC=y 146CONFIG_HPET_EMULATE_RTC=y
145CONFIG_GART_IOMMU=y 147CONFIG_GART_IOMMU=y
146CONFIG_SWIOTLB=y 148CONFIG_SWIOTLB=y
@@ -149,12 +151,13 @@ CONFIG_X86_MCE_INTEL=y
149CONFIG_X86_MCE_AMD=y 151CONFIG_X86_MCE_AMD=y
150# CONFIG_KEXEC is not set 152# CONFIG_KEXEC is not set
151# CONFIG_CRASH_DUMP is not set 153# CONFIG_CRASH_DUMP is not set
152CONFIG_PHYSICAL_START=0x100000 154CONFIG_PHYSICAL_START=0x200000
153CONFIG_SECCOMP=y 155CONFIG_SECCOMP=y
154# CONFIG_HZ_100 is not set 156# CONFIG_HZ_100 is not set
155CONFIG_HZ_250=y 157CONFIG_HZ_250=y
156# CONFIG_HZ_1000 is not set 158# CONFIG_HZ_1000 is not set
157CONFIG_HZ=250 159CONFIG_HZ=250
160# CONFIG_REORDER is not set
158CONFIG_GENERIC_HARDIRQS=y 161CONFIG_GENERIC_HARDIRQS=y
159CONFIG_GENERIC_IRQ_PROBE=y 162CONFIG_GENERIC_IRQ_PROBE=y
160CONFIG_ISA_DMA_API=y 163CONFIG_ISA_DMA_API=y
@@ -190,12 +193,14 @@ CONFIG_ACPI_NUMA=y
190# CONFIG_ACPI_ASUS is not set 193# CONFIG_ACPI_ASUS is not set
191# CONFIG_ACPI_IBM is not set 194# CONFIG_ACPI_IBM is not set
192CONFIG_ACPI_TOSHIBA=y 195CONFIG_ACPI_TOSHIBA=y
193CONFIG_ACPI_BLACKLIST_YEAR=2001 196CONFIG_ACPI_BLACKLIST_YEAR=0
194# CONFIG_ACPI_DEBUG is not set 197# CONFIG_ACPI_DEBUG is not set
195CONFIG_ACPI_EC=y 198CONFIG_ACPI_EC=y
196CONFIG_ACPI_POWER=y 199CONFIG_ACPI_POWER=y
197CONFIG_ACPI_SYSTEM=y 200CONFIG_ACPI_SYSTEM=y
201CONFIG_X86_PM_TIMER=y
198CONFIG_ACPI_CONTAINER=y 202CONFIG_ACPI_CONTAINER=y
203CONFIG_ACPI_HOTPLUG_MEMORY=y
199 204
200# 205#
201# CPU Frequency scaling 206# CPU Frequency scaling
@@ -233,10 +238,8 @@ CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
233CONFIG_PCI=y 238CONFIG_PCI=y
234CONFIG_PCI_DIRECT=y 239CONFIG_PCI_DIRECT=y
235CONFIG_PCI_MMCONFIG=y 240CONFIG_PCI_MMCONFIG=y
236CONFIG_UNORDERED_IO=y
237CONFIG_PCIEPORTBUS=y 241CONFIG_PCIEPORTBUS=y
238CONFIG_PCI_MSI=y 242CONFIG_PCI_MSI=y
239# CONFIG_PCI_LEGACY_PROC is not set
240# CONFIG_PCI_DEBUG is not set 243# CONFIG_PCI_DEBUG is not set
241 244
242# 245#
@@ -267,6 +270,7 @@ CONFIG_NET=y
267# 270#
268# Networking options 271# Networking options
269# 272#
273# CONFIG_NETDEBUG is not set
270CONFIG_PACKET=y 274CONFIG_PACKET=y
271# CONFIG_PACKET_MMAP is not set 275# CONFIG_PACKET_MMAP is not set
272CONFIG_UNIX=y 276CONFIG_UNIX=y
@@ -287,6 +291,7 @@ CONFIG_IP_PNP_DHCP=y
287# CONFIG_INET_AH is not set 291# CONFIG_INET_AH is not set
288# CONFIG_INET_ESP is not set 292# CONFIG_INET_ESP is not set
289# CONFIG_INET_IPCOMP is not set 293# CONFIG_INET_IPCOMP is not set
294# CONFIG_INET_XFRM_TUNNEL is not set
290# CONFIG_INET_TUNNEL is not set 295# CONFIG_INET_TUNNEL is not set
291CONFIG_INET_DIAG=y 296CONFIG_INET_DIAG=y
292CONFIG_INET_TCP_DIAG=y 297CONFIG_INET_TCP_DIAG=y
@@ -294,9 +299,11 @@ CONFIG_INET_TCP_DIAG=y
294CONFIG_TCP_CONG_BIC=y 299CONFIG_TCP_CONG_BIC=y
295CONFIG_IPV6=y 300CONFIG_IPV6=y
296# CONFIG_IPV6_PRIVACY is not set 301# CONFIG_IPV6_PRIVACY is not set
302# CONFIG_IPV6_ROUTER_PREF is not set
297# CONFIG_INET6_AH is not set 303# CONFIG_INET6_AH is not set
298# CONFIG_INET6_ESP is not set 304# CONFIG_INET6_ESP is not set
299# CONFIG_INET6_IPCOMP is not set 305# CONFIG_INET6_IPCOMP is not set
306# CONFIG_INET6_XFRM_TUNNEL is not set
300# CONFIG_INET6_TUNNEL is not set 307# CONFIG_INET6_TUNNEL is not set
301# CONFIG_IPV6_TUNNEL is not set 308# CONFIG_IPV6_TUNNEL is not set
302# CONFIG_NETFILTER is not set 309# CONFIG_NETFILTER is not set
@@ -446,7 +453,6 @@ CONFIG_BLK_DEV_PIIX=y
446# CONFIG_BLK_DEV_NS87415 is not set 453# CONFIG_BLK_DEV_NS87415 is not set
447# CONFIG_BLK_DEV_PDC202XX_OLD is not set 454# CONFIG_BLK_DEV_PDC202XX_OLD is not set
448CONFIG_BLK_DEV_PDC202XX_NEW=y 455CONFIG_BLK_DEV_PDC202XX_NEW=y
449# CONFIG_PDC202XX_FORCE is not set
450# CONFIG_BLK_DEV_SVWKS is not set 456# CONFIG_BLK_DEV_SVWKS is not set
451# CONFIG_BLK_DEV_SIIMAGE is not set 457# CONFIG_BLK_DEV_SIIMAGE is not set
452# CONFIG_BLK_DEV_SIS5513 is not set 458# CONFIG_BLK_DEV_SIS5513 is not set
@@ -540,7 +546,6 @@ CONFIG_SCSI_SATA_INTEL_COMBINED=y
540# CONFIG_SCSI_INIA100 is not set 546# CONFIG_SCSI_INIA100 is not set
541# CONFIG_SCSI_SYM53C8XX_2 is not set 547# CONFIG_SCSI_SYM53C8XX_2 is not set
542# CONFIG_SCSI_IPR is not set 548# CONFIG_SCSI_IPR is not set
543# CONFIG_SCSI_QLOGIC_FC is not set
544# CONFIG_SCSI_QLOGIC_1280 is not set 549# CONFIG_SCSI_QLOGIC_1280 is not set
545# CONFIG_SCSI_QLA_FC is not set 550# CONFIG_SCSI_QLA_FC is not set
546# CONFIG_SCSI_LPFC is not set 551# CONFIG_SCSI_LPFC is not set
@@ -573,7 +578,33 @@ CONFIG_FUSION_MAX_SGE=128
573# 578#
574# IEEE 1394 (FireWire) support 579# IEEE 1394 (FireWire) support
575# 580#
576# CONFIG_IEEE1394 is not set 581CONFIG_IEEE1394=y
582
583#
584# Subsystem Options
585#
586# CONFIG_IEEE1394_VERBOSEDEBUG is not set
587# CONFIG_IEEE1394_OUI_DB is not set
588# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
589# CONFIG_IEEE1394_EXPORT_FULL_API is not set
590
591#
592# Device Drivers
593#
594
595#
596# Texas Instruments PCILynx requires I2C
597#
598CONFIG_IEEE1394_OHCI1394=y
599
600#
601# Protocol Drivers
602#
603# CONFIG_IEEE1394_VIDEO1394 is not set
604# CONFIG_IEEE1394_SBP2 is not set
605# CONFIG_IEEE1394_ETH1394 is not set
606# CONFIG_IEEE1394_DV1394 is not set
607CONFIG_IEEE1394_RAWIO=y
577 608
578# 609#
579# I2O device support 610# I2O device support
@@ -762,7 +793,7 @@ CONFIG_HW_CONSOLE=y
762# 793#
763CONFIG_SERIAL_8250=y 794CONFIG_SERIAL_8250=y
764CONFIG_SERIAL_8250_CONSOLE=y 795CONFIG_SERIAL_8250_CONSOLE=y
765# CONFIG_SERIAL_8250_ACPI is not set 796CONFIG_SERIAL_8250_PCI=y
766CONFIG_SERIAL_8250_NR_UARTS=4 797CONFIG_SERIAL_8250_NR_UARTS=4
767CONFIG_SERIAL_8250_RUNTIME_UARTS=4 798CONFIG_SERIAL_8250_RUNTIME_UARTS=4
768# CONFIG_SERIAL_8250_EXTENDED is not set 799# CONFIG_SERIAL_8250_EXTENDED is not set
@@ -772,6 +803,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4
772# 803#
773CONFIG_SERIAL_CORE=y 804CONFIG_SERIAL_CORE=y
774CONFIG_SERIAL_CORE_CONSOLE=y 805CONFIG_SERIAL_CORE_CONSOLE=y
806# CONFIG_SERIAL_JSM is not set
775CONFIG_UNIX98_PTYS=y 807CONFIG_UNIX98_PTYS=y
776CONFIG_LEGACY_PTYS=y 808CONFIG_LEGACY_PTYS=y
777CONFIG_LEGACY_PTY_COUNT=256 809CONFIG_LEGACY_PTY_COUNT=256
@@ -835,6 +867,8 @@ CONFIG_RTC=y
835CONFIG_AGP=y 867CONFIG_AGP=y
836CONFIG_AGP_AMD64=y 868CONFIG_AGP_AMD64=y
837CONFIG_AGP_INTEL=y 869CONFIG_AGP_INTEL=y
870# CONFIG_AGP_SIS is not set
871# CONFIG_AGP_VIA is not set
838# CONFIG_DRM is not set 872# CONFIG_DRM is not set
839# CONFIG_MWAVE is not set 873# CONFIG_MWAVE is not set
840CONFIG_RAW_DRIVER=y 874CONFIG_RAW_DRIVER=y
@@ -871,6 +905,7 @@ CONFIG_HPET_MMAP=y
871# 905#
872CONFIG_HWMON=y 906CONFIG_HWMON=y
873# CONFIG_HWMON_VID is not set 907# CONFIG_HWMON_VID is not set
908# CONFIG_SENSORS_F71805F is not set
874# CONFIG_SENSORS_HDAPS is not set 909# CONFIG_SENSORS_HDAPS is not set
875# CONFIG_HWMON_DEBUG_CHIP is not set 910# CONFIG_HWMON_DEBUG_CHIP is not set
876 911
@@ -880,10 +915,6 @@ CONFIG_HWMON=y
880# CONFIG_IBM_ASM is not set 915# CONFIG_IBM_ASM is not set
881 916
882# 917#
883# Multimedia Capabilities Port drivers
884#
885
886#
887# Multimedia devices 918# Multimedia devices
888# 919#
889# CONFIG_VIDEO_DEV is not set 920# CONFIG_VIDEO_DEV is not set
@@ -892,6 +923,7 @@ CONFIG_HWMON=y
892# Digital Video Broadcasting Devices 923# Digital Video Broadcasting Devices
893# 924#
894# CONFIG_DVB is not set 925# CONFIG_DVB is not set
926# CONFIG_USB_DABUSB is not set
895 927
896# 928#
897# Graphics support 929# Graphics support
@@ -903,6 +935,8 @@ CONFIG_VIDEO_SELECT=y
903# Console display driver support 935# Console display driver support
904# 936#
905CONFIG_VGA_CONSOLE=y 937CONFIG_VGA_CONSOLE=y
938CONFIG_VGACON_SOFT_SCROLLBACK=y
939CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=256
906CONFIG_DUMMY_CONSOLE=y 940CONFIG_DUMMY_CONSOLE=y
907 941
908# 942#
@@ -947,6 +981,7 @@ CONFIG_SOUND_ICH=y
947# 981#
948CONFIG_USB_ARCH_HAS_HCD=y 982CONFIG_USB_ARCH_HAS_HCD=y
949CONFIG_USB_ARCH_HAS_OHCI=y 983CONFIG_USB_ARCH_HAS_OHCI=y
984CONFIG_USB_ARCH_HAS_EHCI=y
950CONFIG_USB=y 985CONFIG_USB=y
951# CONFIG_USB_DEBUG is not set 986# CONFIG_USB_DEBUG is not set
952 987
@@ -975,7 +1010,6 @@ CONFIG_USB_UHCI_HCD=y
975# 1010#
976# USB Device Class drivers 1011# USB Device Class drivers
977# 1012#
978# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
979# CONFIG_USB_ACM is not set 1013# CONFIG_USB_ACM is not set
980CONFIG_USB_PRINTER=y 1014CONFIG_USB_PRINTER=y
981 1015
@@ -1012,9 +1046,7 @@ CONFIG_USB_HIDINPUT=y
1012# CONFIG_USB_ACECAD is not set 1046# CONFIG_USB_ACECAD is not set
1013# CONFIG_USB_KBTAB is not set 1047# CONFIG_USB_KBTAB is not set
1014# CONFIG_USB_POWERMATE is not set 1048# CONFIG_USB_POWERMATE is not set
1015# CONFIG_USB_MTOUCH is not set 1049# CONFIG_USB_TOUCHSCREEN is not set
1016# CONFIG_USB_ITMTOUCH is not set
1017# CONFIG_USB_EGALAX is not set
1018# CONFIG_USB_YEALINK is not set 1050# CONFIG_USB_YEALINK is not set
1019# CONFIG_USB_XPAD is not set 1051# CONFIG_USB_XPAD is not set
1020# CONFIG_USB_ATI_REMOTE is not set 1052# CONFIG_USB_ATI_REMOTE is not set
@@ -1029,15 +1061,6 @@ CONFIG_USB_HIDINPUT=y
1029# CONFIG_USB_MICROTEK is not set 1061# CONFIG_USB_MICROTEK is not set
1030 1062
1031# 1063#
1032# USB Multimedia devices
1033#
1034# CONFIG_USB_DABUSB is not set
1035
1036#
1037# Video4Linux support is needed for USB Multimedia device support
1038#
1039
1040#
1041# USB Network Adapters 1064# USB Network Adapters
1042# 1065#
1043# CONFIG_USB_CATC is not set 1066# CONFIG_USB_CATC is not set
@@ -1089,19 +1112,33 @@ CONFIG_USB_MON=y
1089# CONFIG_MMC is not set 1112# CONFIG_MMC is not set
1090 1113
1091# 1114#
1115# LED devices
1116#
1117# CONFIG_NEW_LEDS is not set
1118
1119#
1120# LED drivers
1121#
1122
1123#
1124# LED Triggers
1125#
1126
1127#
1092# InfiniBand support 1128# InfiniBand support
1093# 1129#
1094# CONFIG_INFINIBAND is not set 1130# CONFIG_INFINIBAND is not set
1131# CONFIG_IPATH_CORE is not set
1095 1132
1096# 1133#
1097# SN Devices 1134# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
1098# 1135#
1136# CONFIG_EDAC is not set
1099 1137
1100# 1138#
1101# EDAC - error detection and reporting (RAS) 1139# Real Time Clock
1102# 1140#
1103# CONFIG_EDAC is not set 1141# CONFIG_RTC_CLASS is not set
1104# CONFIG_EDAC_POLL is not set
1105 1142
1106# 1143#
1107# Firmware Drivers 1144# Firmware Drivers
@@ -1172,7 +1209,6 @@ CONFIG_TMPFS=y
1172CONFIG_HUGETLBFS=y 1209CONFIG_HUGETLBFS=y
1173CONFIG_HUGETLB_PAGE=y 1210CONFIG_HUGETLB_PAGE=y
1174CONFIG_RAMFS=y 1211CONFIG_RAMFS=y
1175CONFIG_RELAYFS_FS=y
1176# CONFIG_CONFIGFS_FS is not set 1212# CONFIG_CONFIGFS_FS is not set
1177 1213
1178# 1214#
@@ -1295,10 +1331,9 @@ CONFIG_DETECT_SOFTLOCKUP=y
1295CONFIG_DEBUG_FS=y 1331CONFIG_DEBUG_FS=y
1296# CONFIG_DEBUG_VM is not set 1332# CONFIG_DEBUG_VM is not set
1297# CONFIG_FRAME_POINTER is not set 1333# CONFIG_FRAME_POINTER is not set
1298# CONFIG_FORCED_INLINING is not set
1299# CONFIG_UNWIND_INFO is not set 1334# CONFIG_UNWIND_INFO is not set
1335# CONFIG_FORCED_INLINING is not set
1300# CONFIG_RCU_TORTURE_TEST is not set 1336# CONFIG_RCU_TORTURE_TEST is not set
1301CONFIG_INIT_DEBUG=y
1302# CONFIG_DEBUG_RODATA is not set 1337# CONFIG_DEBUG_RODATA is not set
1303# CONFIG_IOMMU_DEBUG is not set 1338# CONFIG_IOMMU_DEBUG is not set
1304 1339
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile
index 929e6b0771f8..e9263b4975e0 100644
--- a/arch/x86_64/ia32/Makefile
+++ b/arch/x86_64/ia32/Makefile
@@ -27,5 +27,5 @@ $(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
27$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE 27$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
28 $(call if_changed,syscall) 28 $(call if_changed,syscall)
29 29
30AFLAGS_vsyscall-sysenter.o = -m32 30AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32
31AFLAGS_vsyscall-syscall.o = -m32 31AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 572b3b28772d..926c4743d13b 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -58,7 +58,7 @@ struct elf_phdr;
58 58
59#define USE_ELF_CORE_DUMP 1 59#define USE_ELF_CORE_DUMP 1
60 60
61/* Overwrite elfcore.h */ 61/* Override elfcore.h */
62#define _LINUX_ELFCORE_H 1 62#define _LINUX_ELFCORE_H 1
63typedef unsigned int elf_greg_t; 63typedef unsigned int elf_greg_t;
64 64
@@ -339,7 +339,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
339 struct mm_struct *mm = current->mm; 339 struct mm_struct *mm = current->mm;
340 int i, ret; 340 int i, ret;
341 341
342 stack_base = IA32_STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE; 342 stack_base = stack_top - MAX_ARG_PAGES * PAGE_SIZE;
343 mm->arg_start = bprm->p + stack_base; 343 mm->arg_start = bprm->p + stack_base;
344 344
345 bprm->p += stack_base; 345 bprm->p += stack_base;
@@ -357,7 +357,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
357 { 357 {
358 mpnt->vm_mm = mm; 358 mpnt->vm_mm = mm;
359 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; 359 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
360 mpnt->vm_end = IA32_STACK_TOP; 360 mpnt->vm_end = stack_top;
361 if (executable_stack == EXSTACK_ENABLE_X) 361 if (executable_stack == EXSTACK_ENABLE_X)
362 mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; 362 mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
363 else if (executable_stack == EXSTACK_DISABLE_X) 363 else if (executable_stack == EXSTACK_DISABLE_X)
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index ada4535d0161..5a92fed2d1d5 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -15,6 +15,8 @@
15#include <asm/vsyscall32.h> 15#include <asm/vsyscall32.h>
16#include <linux/linkage.h> 16#include <linux/linkage.h>
17 17
18#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
19
18 .macro IA32_ARG_FIXUP noebp=0 20 .macro IA32_ARG_FIXUP noebp=0
19 movl %edi,%r8d 21 movl %edi,%r8d
20 .if \noebp 22 .if \noebp
@@ -109,8 +111,8 @@ ENTRY(ia32_sysenter_target)
109 CFI_REMEMBER_STATE 111 CFI_REMEMBER_STATE
110 jnz sysenter_tracesys 112 jnz sysenter_tracesys
111sysenter_do_call: 113sysenter_do_call:
112 cmpl $(IA32_NR_syscalls),%eax 114 cmpl $(IA32_NR_syscalls-1),%eax
113 jae ia32_badsys 115 ja ia32_badsys
114 IA32_ARG_FIXUP 1 116 IA32_ARG_FIXUP 1
115 call *ia32_sys_call_table(,%rax,8) 117 call *ia32_sys_call_table(,%rax,8)
116 movq %rax,RAX-ARGOFFSET(%rsp) 118 movq %rax,RAX-ARGOFFSET(%rsp)
@@ -210,8 +212,8 @@ ENTRY(ia32_cstar_target)
210 CFI_REMEMBER_STATE 212 CFI_REMEMBER_STATE
211 jnz cstar_tracesys 213 jnz cstar_tracesys
212cstar_do_call: 214cstar_do_call:
213 cmpl $IA32_NR_syscalls,%eax 215 cmpl $IA32_NR_syscalls-1,%eax
214 jae ia32_badsys 216 ja ia32_badsys
215 IA32_ARG_FIXUP 1 217 IA32_ARG_FIXUP 1
216 call *ia32_sys_call_table(,%rax,8) 218 call *ia32_sys_call_table(,%rax,8)
217 movq %rax,RAX-ARGOFFSET(%rsp) 219 movq %rax,RAX-ARGOFFSET(%rsp)
@@ -296,8 +298,8 @@ ENTRY(ia32_syscall)
296 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) 298 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
297 jnz ia32_tracesys 299 jnz ia32_tracesys
298ia32_do_syscall: 300ia32_do_syscall:
299 cmpl $(IA32_NR_syscalls),%eax 301 cmpl $(IA32_NR_syscalls-1),%eax
300 jae ia32_badsys 302 ja ia32_badsys
301 IA32_ARG_FIXUP 303 IA32_ARG_FIXUP
302 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative 304 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
303ia32_sysret: 305ia32_sysret:
@@ -501,7 +503,7 @@ ia32_sys_call_table:
501 .quad sys_setdomainname 503 .quad sys_setdomainname
502 .quad sys_uname 504 .quad sys_uname
503 .quad sys_modify_ldt 505 .quad sys_modify_ldt
504 .quad sys32_adjtimex 506 .quad compat_sys_adjtimex
505 .quad sys32_mprotect /* 125 */ 507 .quad sys32_mprotect /* 125 */
506 .quad compat_sys_sigprocmask 508 .quad compat_sys_sigprocmask
507 .quad quiet_ni_syscall /* create_module */ 509 .quad quiet_ni_syscall /* create_module */
@@ -677,7 +679,7 @@ ia32_sys_call_table:
677 .quad sys_mknodat 679 .quad sys_mknodat
678 .quad sys_fchownat 680 .quad sys_fchownat
679 .quad compat_sys_futimesat 681 .quad compat_sys_futimesat
680 .quad compat_sys_newfstatat /* 300 */ 682 .quad sys32_fstatat /* 300 */
681 .quad sys_unlinkat 683 .quad sys_unlinkat
682 .quad sys_renameat 684 .quad sys_renameat
683 .quad sys_linkat 685 .quad sys_linkat
@@ -685,10 +687,13 @@ ia32_sys_call_table:
685 .quad sys_readlinkat /* 305 */ 687 .quad sys_readlinkat /* 305 */
686 .quad sys_fchmodat 688 .quad sys_fchmodat
687 .quad sys_faccessat 689 .quad sys_faccessat
688 .quad sys_ni_syscall /* pselect6 for now */ 690 .quad quiet_ni_syscall /* pselect6 for now */
689 .quad sys_ni_syscall /* ppoll for now */ 691 .quad quiet_ni_syscall /* ppoll for now */
690 .quad sys_unshare /* 310 */ 692 .quad sys_unshare /* 310 */
693 .quad compat_sys_set_robust_list
694 .quad compat_sys_get_robust_list
695 .quad sys_splice
696 .quad sys_sync_file_range
697 .quad sys_tee
698 .quad compat_sys_vmsplice
691ia32_syscall_end: 699ia32_syscall_end:
692 .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
693 .quad ni_syscall
694 .endr
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index 54481af5344a..f182b20858e2 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -30,7 +30,6 @@
30#include <linux/resource.h> 30#include <linux/resource.h>
31#include <linux/times.h> 31#include <linux/times.h>
32#include <linux/utsname.h> 32#include <linux/utsname.h>
33#include <linux/timex.h>
34#include <linux/smp.h> 33#include <linux/smp.h>
35#include <linux/smp_lock.h> 34#include <linux/smp_lock.h>
36#include <linux/sem.h> 35#include <linux/sem.h>
@@ -180,6 +179,28 @@ sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
180 return ret; 179 return ret;
181} 180}
182 181
182asmlinkage long
183sys32_fstatat(unsigned int dfd, char __user *filename,
184 struct stat64 __user* statbuf, int flag)
185{
186 struct kstat stat;
187 int error = -EINVAL;
188
189 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
190 goto out;
191
192 if (flag & AT_SYMLINK_NOFOLLOW)
193 error = vfs_lstat_fd(dfd, filename, &stat);
194 else
195 error = vfs_stat_fd(dfd, filename, &stat);
196
197 if (!error)
198 error = cp_stat64(statbuf, &stat);
199
200out:
201 return error;
202}
203
183/* 204/*
184 * Linux/i386 didn't use to be able to handle more than 205 * Linux/i386 didn't use to be able to handle more than
185 * 4 system call parameters, so these system calls used a memory 206 * 4 system call parameters, so these system calls used a memory
@@ -408,24 +429,12 @@ put_tv32(struct compat_timeval __user *o, struct timeval *i)
408 return err; 429 return err;
409} 430}
410 431
411extern int do_setitimer(int which, struct itimerval *, struct itimerval *); 432extern unsigned int alarm_setitimer(unsigned int seconds);
412 433
413asmlinkage long 434asmlinkage long
414sys32_alarm(unsigned int seconds) 435sys32_alarm(unsigned int seconds)
415{ 436{
416 struct itimerval it_new, it_old; 437 return alarm_setitimer(seconds);
417 unsigned int oldalarm;
418
419 it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
420 it_new.it_value.tv_sec = seconds;
421 it_new.it_value.tv_usec = 0;
422 do_setitimer(ITIMER_REAL, &it_new, &it_old);
423 oldalarm = it_old.it_value.tv_sec;
424 /* ehhh.. We can't return 0 if we have an alarm pending.. */
425 /* And we'd better return too much than too little anyway */
426 if (it_old.it_value.tv_usec)
427 oldalarm++;
428 return oldalarm;
429} 438}
430 439
431/* Translations due to time_t size differences. Which affects all 440/* Translations due to time_t size differences. Which affects all
@@ -757,82 +766,6 @@ sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count)
757 return ret; 766 return ret;
758} 767}
759 768
760/* Handle adjtimex compatibility. */
761
762struct timex32 {
763 u32 modes;
764 s32 offset, freq, maxerror, esterror;
765 s32 status, constant, precision, tolerance;
766 struct compat_timeval time;
767 s32 tick;
768 s32 ppsfreq, jitter, shift, stabil;
769 s32 jitcnt, calcnt, errcnt, stbcnt;
770 s32 :32; s32 :32; s32 :32; s32 :32;
771 s32 :32; s32 :32; s32 :32; s32 :32;
772 s32 :32; s32 :32; s32 :32; s32 :32;
773};
774
775extern int do_adjtimex(struct timex *);
776
777asmlinkage long
778sys32_adjtimex(struct timex32 __user *utp)
779{
780 struct timex txc;
781 int ret;
782
783 memset(&txc, 0, sizeof(struct timex));
784
785 if (!access_ok(VERIFY_READ, utp, sizeof(struct timex32)) ||
786 __get_user(txc.modes, &utp->modes) ||
787 __get_user(txc.offset, &utp->offset) ||
788 __get_user(txc.freq, &utp->freq) ||
789 __get_user(txc.maxerror, &utp->maxerror) ||
790 __get_user(txc.esterror, &utp->esterror) ||
791 __get_user(txc.status, &utp->status) ||
792 __get_user(txc.constant, &utp->constant) ||
793 __get_user(txc.precision, &utp->precision) ||
794 __get_user(txc.tolerance, &utp->tolerance) ||
795 __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
796 __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
797 __get_user(txc.tick, &utp->tick) ||
798 __get_user(txc.ppsfreq, &utp->ppsfreq) ||
799 __get_user(txc.jitter, &utp->jitter) ||
800 __get_user(txc.shift, &utp->shift) ||
801 __get_user(txc.stabil, &utp->stabil) ||
802 __get_user(txc.jitcnt, &utp->jitcnt) ||
803 __get_user(txc.calcnt, &utp->calcnt) ||
804 __get_user(txc.errcnt, &utp->errcnt) ||
805 __get_user(txc.stbcnt, &utp->stbcnt))
806 return -EFAULT;
807
808 ret = do_adjtimex(&txc);
809
810 if (!access_ok(VERIFY_WRITE, utp, sizeof(struct timex32)) ||
811 __put_user(txc.modes, &utp->modes) ||
812 __put_user(txc.offset, &utp->offset) ||
813 __put_user(txc.freq, &utp->freq) ||
814 __put_user(txc.maxerror, &utp->maxerror) ||
815 __put_user(txc.esterror, &utp->esterror) ||
816 __put_user(txc.status, &utp->status) ||
817 __put_user(txc.constant, &utp->constant) ||
818 __put_user(txc.precision, &utp->precision) ||
819 __put_user(txc.tolerance, &utp->tolerance) ||
820 __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
821 __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
822 __put_user(txc.tick, &utp->tick) ||
823 __put_user(txc.ppsfreq, &utp->ppsfreq) ||
824 __put_user(txc.jitter, &utp->jitter) ||
825 __put_user(txc.shift, &utp->shift) ||
826 __put_user(txc.stabil, &utp->stabil) ||
827 __put_user(txc.jitcnt, &utp->jitcnt) ||
828 __put_user(txc.calcnt, &utp->calcnt) ||
829 __put_user(txc.errcnt, &utp->errcnt) ||
830 __put_user(txc.stbcnt, &utp->stbcnt))
831 ret = -EFAULT;
832
833 return ret;
834}
835
836asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, 769asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
837 unsigned long prot, unsigned long flags, 770 unsigned long prot, unsigned long flags,
838 unsigned long fd, unsigned long pgoff) 771 unsigned long fd, unsigned long pgoff)
diff --git a/arch/x86_64/ia32/vsyscall-sigreturn.S b/arch/x86_64/ia32/vsyscall-sigreturn.S
index d90321fe9bba..1384367cdbe1 100644
--- a/arch/x86_64/ia32/vsyscall-sigreturn.S
+++ b/arch/x86_64/ia32/vsyscall-sigreturn.S
@@ -32,9 +32,28 @@ __kernel_rt_sigreturn:
32 .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn 32 .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
33 33
34 .section .eh_frame,"a",@progbits 34 .section .eh_frame,"a",@progbits
35.LSTARTFRAMES:
36 .long .LENDCIES-.LSTARTCIES
37.LSTARTCIES:
38 .long 0 /* CIE ID */
39 .byte 1 /* Version number */
40 .string "zRS" /* NUL-terminated augmentation string */
41 .uleb128 1 /* Code alignment factor */
42 .sleb128 -4 /* Data alignment factor */
43 .byte 8 /* Return address register column */
44 .uleb128 1 /* Augmentation value length */
45 .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
46 .byte 0x0c /* DW_CFA_def_cfa */
47 .uleb128 4
48 .uleb128 4
49 .byte 0x88 /* DW_CFA_offset, column 0x8 */
50 .uleb128 1
51 .align 4
52.LENDCIES:
53
35 .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */ 54 .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */
36.LSTARTFDE2: 55.LSTARTFDE2:
37 .long .LSTARTFDE2-.LSTARTFRAME /* CIE pointer */ 56 .long .LSTARTFDE2-.LSTARTFRAMES /* CIE pointer */
38 /* HACK: The dwarf2 unwind routines will subtract 1 from the 57 /* HACK: The dwarf2 unwind routines will subtract 1 from the
39 return address to get an address in the middle of the 58 return address to get an address in the middle of the
40 presumed call instruction. Since we didn't get here via 59 presumed call instruction. Since we didn't get here via
@@ -97,7 +116,7 @@ __kernel_rt_sigreturn:
97 116
98 .long .LENDFDE3-.LSTARTFDE3 /* Length FDE */ 117 .long .LENDFDE3-.LSTARTFDE3 /* Length FDE */
99.LSTARTFDE3: 118.LSTARTFDE3:
100 .long .LSTARTFDE3-.LSTARTFRAME /* CIE pointer */ 119 .long .LSTARTFDE3-.LSTARTFRAMES /* CIE pointer */
101 /* HACK: See above wrt unwind library assumptions. */ 120 /* HACK: See above wrt unwind library assumptions. */
102 .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ 121 .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
103 .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 122 .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 72fe60c20d39..059c88313f4e 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 dmi_scan.o pci-dma.o pci-nommu.o 11 pci-dma.o pci-nommu.o
12 12
13obj-$(CONFIG_X86_MCE) += mce.o 13obj-$(CONFIG_X86_MCE) += mce.o
14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -43,11 +43,9 @@ CFLAGS_vsyscall.o := $(PROFILING) -g0
43 43
44bootflag-y += ../../i386/kernel/bootflag.o 44bootflag-y += ../../i386/kernel/bootflag.o
45cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o 45cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o
46topology-y += ../../i386/mach-default/topology.o 46topology-y += ../../i386/kernel/topology.o
47microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o 47microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o
48intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o 48intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
49quirks-y += ../../i386/kernel/quirks.o 49quirks-y += ../../i386/kernel/quirks.o
50i8237-y += ../../i386/kernel/i8237.o 50i8237-y += ../../i386/kernel/i8237.o
51msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o 51msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
52dmi_scan-y += ../../i386/kernel/dmi_scan.o
53
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index e4e2b7d01f89..70b9d21ed675 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -60,7 +60,7 @@ static u32 __init allocate_aperture(void)
60 printk("Cannot allocate aperture memory hole (%p,%uK)\n", 60 printk("Cannot allocate aperture memory hole (%p,%uK)\n",
61 p, aper_size>>10); 61 p, aper_size>>10);
62 if (p) 62 if (p)
63 free_bootmem_node(nd0, (unsigned long)p, aper_size); 63 free_bootmem_node(nd0, __pa(p), aper_size);
64 return 0; 64 return 0;
65 } 65 }
66 printk("Mapping aperture over %d KB of RAM @ %lx\n", 66 printk("Mapping aperture over %d KB of RAM @ %lx\n",
@@ -80,7 +80,7 @@ static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size)
80 printk("Aperture from %s beyond 4GB. Ignoring.\n",name); 80 printk("Aperture from %s beyond 4GB. Ignoring.\n",name);
81 return 0; 81 return 0;
82 } 82 }
83 if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) { 83 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
84 printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); 84 printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name);
85 return 0; 85 return 0;
86 } 86 }
@@ -161,7 +161,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
161 int num, slot, func; 161 int num, slot, func;
162 162
163 /* Poor man's PCI discovery */ 163 /* Poor man's PCI discovery */
164 for (num = 0; num < 32; num++) { 164 for (num = 0; num < 256; num++) {
165 for (slot = 0; slot < 32; slot++) { 165 for (slot = 0; slot < 32; slot++) {
166 for (func = 0; func < 8; func++) { 166 for (func = 0; func < 8; func++) {
167 u32 class, cap; 167 u32 class, cap;
@@ -248,7 +248,7 @@ void __init iommu_hole_init(void)
248 /* Got the aperture from the AGP bridge */ 248 /* Got the aperture from the AGP bridge */
249 } else if (swiotlb && !valid_agp) { 249 } else if (swiotlb && !valid_agp) {
250 /* Do nothing */ 250 /* Do nothing */
251 } else if ((!no_iommu && end_pfn >= MAX_DMA32_PFN) || 251 } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) ||
252 force_iommu || 252 force_iommu ||
253 valid_agp || 253 valid_agp ||
254 fallback_aper_force) { 254 fallback_aper_force) {
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 6147770b4347..100a30c40044 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -342,6 +342,7 @@ void __init init_bsp_APIC(void)
342void __cpuinit setup_local_APIC (void) 342void __cpuinit setup_local_APIC (void)
343{ 343{
344 unsigned int value, maxlvt; 344 unsigned int value, maxlvt;
345 int i, j;
345 346
346 value = apic_read(APIC_LVR); 347 value = apic_read(APIC_LVR);
347 348
@@ -371,6 +372,25 @@ void __cpuinit setup_local_APIC (void)
371 apic_write(APIC_TASKPRI, value); 372 apic_write(APIC_TASKPRI, value);
372 373
373 /* 374 /*
375 * After a crash, we no longer service the interrupts and a pending
376 * interrupt from previous kernel might still have ISR bit set.
377 *
378 * Most probably by now CPU has serviced that pending interrupt and
379 * it might not have done the ack_APIC_irq() because it thought,
380 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
381 * does not clear the ISR bit and cpu thinks it has already serivced
382 * the interrupt. Hence a vector might get locked. It was noticed
383 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
384 */
385 for (i = APIC_ISR_NR - 1; i >= 0; i--) {
386 value = apic_read(APIC_ISR + i*0x10);
387 for (j = 31; j >= 0; j--) {
388 if (value & (1<<j))
389 ack_APIC_irq();
390 }
391 }
392
393 /*
374 * Now that we are all set up, enable the APIC 394 * Now that we are all set up, enable the APIC
375 */ 395 */
376 value = apic_read(APIC_SPIV); 396 value = apic_read(APIC_SPIV);
@@ -595,7 +615,7 @@ static int __init apic_set_verbosity(char *str)
595 printk(KERN_WARNING "APIC Verbosity level %s not recognised" 615 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
596 " use apic=verbose or apic=debug", str); 616 " use apic=verbose or apic=debug", str);
597 617
598 return 0; 618 return 1;
599} 619}
600 620
601__setup("apic=", apic_set_verbosity); 621__setup("apic=", apic_set_verbosity);
@@ -708,7 +728,7 @@ static void setup_APIC_timer(unsigned int clocks)
708 local_irq_save(flags); 728 local_irq_save(flags);
709 729
710 /* wait for irq slice */ 730 /* wait for irq slice */
711 if (vxtime.hpet_address) { 731 if (vxtime.hpet_address && hpet_use_timer) {
712 int trigger = hpet_readl(HPET_T0_CMP); 732 int trigger = hpet_readl(HPET_T0_CMP);
713 while (hpet_readl(HPET_COUNTER) >= trigger) 733 while (hpet_readl(HPET_COUNTER) >= trigger)
714 /* do nothing */ ; 734 /* do nothing */ ;
@@ -1117,41 +1137,42 @@ int __init APIC_init_uniprocessor (void)
1117static __init int setup_disableapic(char *str) 1137static __init int setup_disableapic(char *str)
1118{ 1138{
1119 disable_apic = 1; 1139 disable_apic = 1;
1120 return 0; 1140 return 1;
1121} 1141}
1122 1142
1123static __init int setup_nolapic(char *str) 1143static __init int setup_nolapic(char *str)
1124{ 1144{
1125 disable_apic = 1; 1145 disable_apic = 1;
1126 return 0; 1146 return 1;
1127} 1147}
1128 1148
1129static __init int setup_noapictimer(char *str) 1149static __init int setup_noapictimer(char *str)
1130{ 1150{
1131 if (str[0] != ' ' && str[0] != 0) 1151 if (str[0] != ' ' && str[0] != 0)
1132 return -1; 1152 return 0;
1133 disable_apic_timer = 1; 1153 disable_apic_timer = 1;
1134 return 0; 1154 return 1;
1135} 1155}
1136 1156
1137static __init int setup_apicmaintimer(char *str) 1157static __init int setup_apicmaintimer(char *str)
1138{ 1158{
1139 apic_runs_main_timer = 1; 1159 apic_runs_main_timer = 1;
1140 nohpet = 1; 1160 nohpet = 1;
1141 return 0; 1161 return 1;
1142} 1162}
1143__setup("apicmaintimer", setup_apicmaintimer); 1163__setup("apicmaintimer", setup_apicmaintimer);
1144 1164
1145static __init int setup_noapicmaintimer(char *str) 1165static __init int setup_noapicmaintimer(char *str)
1146{ 1166{
1147 apic_runs_main_timer = -1; 1167 apic_runs_main_timer = -1;
1148 return 0; 1168 return 1;
1149} 1169}
1150__setup("noapicmaintimer", setup_noapicmaintimer); 1170__setup("noapicmaintimer", setup_noapicmaintimer);
1151 1171
1152static __init int setup_apicpmtimer(char *s) 1172static __init int setup_apicpmtimer(char *s)
1153{ 1173{
1154 apic_calibrate_pmtmr = 1; 1174 apic_calibrate_pmtmr = 1;
1175 notsc_setup(NULL);
1155 return setup_apicmaintimer(NULL); 1176 return setup_apicmaintimer(NULL);
1156} 1177}
1157__setup("apicpmtimer", setup_apicpmtimer); 1178__setup("apicpmtimer", setup_apicpmtimer);
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 293cd71a266a..1ef6028f721e 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -76,11 +76,22 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
76 *addrp = __pa_symbol(&_end); 76 *addrp = __pa_symbol(&_end);
77 return 1; 77 return 1;
78 } 78 }
79
80 if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
81 *addrp = ebda_addr + ebda_size;
82 return 1;
83 }
84
79 /* XXX ramdisk image here? */ 85 /* XXX ramdisk image here? */
80 return 0; 86 return 0;
81} 87}
82 88
83int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) 89/*
90 * This function checks if any part of the range <start,end> is mapped
91 * with type.
92 */
93int __meminit
94e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
84{ 95{
85 int i; 96 int i;
86 for (i = 0; i < e820.nr_map; i++) { 97 for (i = 0; i < e820.nr_map; i++) {
@@ -94,6 +105,35 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
94 return 0; 105 return 0;
95} 106}
96 107
108/*
109 * This function checks if the entire range <start,end> is mapped with type.
110 *
111 * Note: this function only works correct if the e820 table is sorted and
112 * not-overlapping, which is the case
113 */
114int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
115{
116 int i;
117 for (i = 0; i < e820.nr_map; i++) {
118 struct e820entry *ei = &e820.map[i];
119 if (type && ei->type != type)
120 continue;
121 /* is the region (part) in overlap with the current region ?*/
122 if (ei->addr >= end || ei->addr + ei->size <= start)
123 continue;
124
125 /* if the region is at the beginning of <start,end> we move
126 * start to the end of the region since it's ok until there
127 */
128 if (ei->addr <= start)
129 start = ei->addr + ei->size;
130 /* if start is now at or beyond end, we're done, full coverage */
131 if (start >= end)
132 return 1; /* we're done */
133 }
134 return 0;
135}
136
97/* 137/*
98 * Find a free area in a specific range. 138 * Find a free area in a specific range.
99 */ 139 */
@@ -109,7 +149,7 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi
109 addr = start; 149 addr = start;
110 if (addr > ei->addr + ei->size) 150 if (addr > ei->addr + ei->size)
111 continue; 151 continue;
112 while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size) 152 while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
113 ; 153 ;
114 last = addr + size; 154 last = addr + size;
115 if (last > ei->addr + ei->size) 155 if (last > ei->addr + ei->size)
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 6dffb498ccd7..b93ef5b51980 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -17,11 +17,8 @@
17#define VGABASE ((void __iomem *)0xffffffff800b8000UL) 17#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
18#endif 18#endif
19 19
20#define MAX_YPOS max_ypos
21#define MAX_XPOS max_xpos
22
23static int max_ypos = 25, max_xpos = 80; 20static int max_ypos = 25, max_xpos = 80;
24static int current_ypos = 1, current_xpos = 0; 21static int current_ypos = 25, current_xpos = 0;
25 22
26static void early_vga_write(struct console *con, const char *str, unsigned n) 23static void early_vga_write(struct console *con, const char *str, unsigned n)
27{ 24{
@@ -29,26 +26,26 @@ static void early_vga_write(struct console *con, const char *str, unsigned n)
29 int i, k, j; 26 int i, k, j;
30 27
31 while ((c = *str++) != '\0' && n-- > 0) { 28 while ((c = *str++) != '\0' && n-- > 0) {
32 if (current_ypos >= MAX_YPOS) { 29 if (current_ypos >= max_ypos) {
33 /* scroll 1 line up */ 30 /* scroll 1 line up */
34 for (k = 1, j = 0; k < MAX_YPOS; k++, j++) { 31 for (k = 1, j = 0; k < max_ypos; k++, j++) {
35 for (i = 0; i < MAX_XPOS; i++) { 32 for (i = 0; i < max_xpos; i++) {
36 writew(readw(VGABASE + 2*(MAX_XPOS*k + i)), 33 writew(readw(VGABASE+2*(max_xpos*k+i)),
37 VGABASE + 2*(MAX_XPOS*j + i)); 34 VGABASE + 2*(max_xpos*j + i));
38 } 35 }
39 } 36 }
40 for (i = 0; i < MAX_XPOS; i++) 37 for (i = 0; i < max_xpos; i++)
41 writew(0x720, VGABASE + 2*(MAX_XPOS*j + i)); 38 writew(0x720, VGABASE + 2*(max_xpos*j + i));
42 current_ypos = MAX_YPOS-1; 39 current_ypos = max_ypos-1;
43 } 40 }
44 if (c == '\n') { 41 if (c == '\n') {
45 current_xpos = 0; 42 current_xpos = 0;
46 current_ypos++; 43 current_ypos++;
47 } else if (c != '\r') { 44 } else if (c != '\r') {
48 writew(((0x7 << 8) | (unsigned short) c), 45 writew(((0x7 << 8) | (unsigned short) c),
49 VGABASE + 2*(MAX_XPOS*current_ypos + 46 VGABASE + 2*(max_xpos*current_ypos +
50 current_xpos++)); 47 current_xpos++));
51 if (current_xpos >= MAX_XPOS) { 48 if (current_xpos >= max_xpos) {
52 current_xpos = 0; 49 current_xpos = 0;
53 current_ypos++; 50 current_ypos++;
54 } 51 }
@@ -63,7 +60,7 @@ static struct console early_vga_console = {
63 .index = -1, 60 .index = -1,
64}; 61};
65 62
66/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ 63/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */
67 64
68static int early_serial_base = 0x3f8; /* ttyS0 */ 65static int early_serial_base = 0x3f8; /* ttyS0 */
69 66
@@ -83,30 +80,30 @@ static int early_serial_base = 0x3f8; /* ttyS0 */
83#define DLL 0 /* Divisor Latch Low */ 80#define DLL 0 /* Divisor Latch Low */
84#define DLH 1 /* Divisor latch High */ 81#define DLH 1 /* Divisor latch High */
85 82
86static int early_serial_putc(unsigned char ch) 83static int early_serial_putc(unsigned char ch)
87{ 84{
88 unsigned timeout = 0xffff; 85 unsigned timeout = 0xffff;
89 while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) 86 while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
90 cpu_relax(); 87 cpu_relax();
91 outb(ch, early_serial_base + TXR); 88 outb(ch, early_serial_base + TXR);
92 return timeout ? 0 : -1; 89 return timeout ? 0 : -1;
93} 90}
94 91
95static void early_serial_write(struct console *con, const char *s, unsigned n) 92static void early_serial_write(struct console *con, const char *s, unsigned n)
96{ 93{
97 while (*s && n-- > 0) { 94 while (*s && n-- > 0) {
98 early_serial_putc(*s); 95 early_serial_putc(*s);
99 if (*s == '\n') 96 if (*s == '\n')
100 early_serial_putc('\r'); 97 early_serial_putc('\r');
101 s++; 98 s++;
102 } 99 }
103} 100}
104 101
105#define DEFAULT_BAUD 9600 102#define DEFAULT_BAUD 9600
106 103
107static __init void early_serial_init(char *s) 104static __init void early_serial_init(char *s)
108{ 105{
109 unsigned char c; 106 unsigned char c;
110 unsigned divisor; 107 unsigned divisor;
111 unsigned baud = DEFAULT_BAUD; 108 unsigned baud = DEFAULT_BAUD;
112 char *e; 109 char *e;
@@ -115,7 +112,7 @@ static __init void early_serial_init(char *s)
115 ++s; 112 ++s;
116 113
117 if (*s) { 114 if (*s) {
118 unsigned port; 115 unsigned port;
119 if (!strncmp(s,"0x",2)) { 116 if (!strncmp(s,"0x",2)) {
120 early_serial_base = simple_strtoul(s, &e, 16); 117 early_serial_base = simple_strtoul(s, &e, 16);
121 } else { 118 } else {
@@ -139,16 +136,16 @@ static __init void early_serial_init(char *s)
139 outb(0x3, early_serial_base + MCR); /* DTR + RTS */ 136 outb(0x3, early_serial_base + MCR); /* DTR + RTS */
140 137
141 if (*s) { 138 if (*s) {
142 baud = simple_strtoul(s, &e, 0); 139 baud = simple_strtoul(s, &e, 0);
143 if (baud == 0 || s == e) 140 if (baud == 0 || s == e)
144 baud = DEFAULT_BAUD; 141 baud = DEFAULT_BAUD;
145 } 142 }
146 143
147 divisor = 115200 / baud; 144 divisor = 115200 / baud;
148 c = inb(early_serial_base + LCR); 145 c = inb(early_serial_base + LCR);
149 outb(c | DLAB, early_serial_base + LCR); 146 outb(c | DLAB, early_serial_base + LCR);
150 outb(divisor & 0xff, early_serial_base + DLL); 147 outb(divisor & 0xff, early_serial_base + DLL);
151 outb((divisor >> 8) & 0xff, early_serial_base + DLH); 148 outb((divisor >> 8) & 0xff, early_serial_base + DLH);
152 outb(c & ~DLAB, early_serial_base + LCR); 149 outb(c & ~DLAB, early_serial_base + LCR);
153} 150}
154 151
@@ -205,67 +202,68 @@ struct console *early_console = &early_vga_console;
205static int early_console_initialized = 0; 202static int early_console_initialized = 0;
206 203
207void early_printk(const char *fmt, ...) 204void early_printk(const char *fmt, ...)
208{ 205{
209 char buf[512]; 206 char buf[512];
210 int n; 207 int n;
211 va_list ap; 208 va_list ap;
212 209
213 va_start(ap,fmt); 210 va_start(ap,fmt);
214 n = vscnprintf(buf,512,fmt,ap); 211 n = vscnprintf(buf,512,fmt,ap);
215 early_console->write(early_console,buf,n); 212 early_console->write(early_console,buf,n);
216 va_end(ap); 213 va_end(ap);
217} 214}
218 215
219static int __initdata keep_early; 216static int __initdata keep_early;
220 217
221int __init setup_early_printk(char *opt) 218int __init setup_early_printk(char *opt)
222{ 219{
223 char *space; 220 char *space;
224 char buf[256]; 221 char buf[256];
225 222
226 if (early_console_initialized) 223 if (early_console_initialized)
227 return -1; 224 return 1;
228 225
229 strlcpy(buf,opt,sizeof(buf)); 226 strlcpy(buf,opt,sizeof(buf));
230 space = strchr(buf, ' '); 227 space = strchr(buf, ' ');
231 if (space) 228 if (space)
232 *space = 0; 229 *space = 0;
233 230
234 if (strstr(buf,"keep")) 231 if (strstr(buf,"keep"))
235 keep_early = 1; 232 keep_early = 1;
236 233
237 if (!strncmp(buf, "serial", 6)) { 234 if (!strncmp(buf, "serial", 6)) {
238 early_serial_init(buf + 6); 235 early_serial_init(buf + 6);
239 early_console = &early_serial_console; 236 early_console = &early_serial_console;
240 } else if (!strncmp(buf, "ttyS", 4)) { 237 } else if (!strncmp(buf, "ttyS", 4)) {
241 early_serial_init(buf); 238 early_serial_init(buf);
242 early_console = &early_serial_console; 239 early_console = &early_serial_console;
243 } else if (!strncmp(buf, "vga", 3) 240 } else if (!strncmp(buf, "vga", 3)
244 && SCREEN_INFO.orig_video_isVGA == 1) { 241 && SCREEN_INFO.orig_video_isVGA == 1) {
245 max_xpos = SCREEN_INFO.orig_video_cols; 242 max_xpos = SCREEN_INFO.orig_video_cols;
246 max_ypos = SCREEN_INFO.orig_video_lines; 243 max_ypos = SCREEN_INFO.orig_video_lines;
247 early_console = &early_vga_console; 244 current_ypos = SCREEN_INFO.orig_y;
245 early_console = &early_vga_console;
248 } else if (!strncmp(buf, "simnow", 6)) { 246 } else if (!strncmp(buf, "simnow", 6)) {
249 simnow_init(buf + 6); 247 simnow_init(buf + 6);
250 early_console = &simnow_console; 248 early_console = &simnow_console;
251 keep_early = 1; 249 keep_early = 1;
252 } 250 }
253 early_console_initialized = 1; 251 early_console_initialized = 1;
254 register_console(early_console); 252 register_console(early_console);
255 return 0; 253 return 0;
256} 254}
257 255
258void __init disable_early_printk(void) 256void __init disable_early_printk(void)
259{ 257{
260 if (!early_console_initialized || !early_console) 258 if (!early_console_initialized || !early_console)
261 return; 259 return;
262 if (!keep_early) { 260 if (!keep_early) {
263 printk("disabling early console\n"); 261 printk("disabling early console\n");
264 unregister_console(early_console); 262 unregister_console(early_console);
265 early_console_initialized = 0; 263 early_console_initialized = 0;
266 } else { 264 } else {
267 printk("keeping early console\n"); 265 printk("keeping early console\n");
268 } 266 }
269} 267}
270 268
271__setup("earlyprintk=", setup_early_printk); 269__setup("earlyprintk=", setup_early_printk);
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index b150c87a08c6..586b34c00c48 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -180,6 +180,10 @@ rff_trace:
180 * 180 *
181 * XXX if we had a free scratch register we could save the RSP into the stack frame 181 * XXX if we had a free scratch register we could save the RSP into the stack frame
182 * and report it properly in ps. Unfortunately we haven't. 182 * and report it properly in ps. Unfortunately we haven't.
183 *
184 * When user can change the frames always force IRET. That is because
185 * it deals with uncanonical addresses better. SYSRET has trouble
186 * with them due to bugs in both AMD and Intel CPUs.
183 */ 187 */
184 188
185ENTRY(system_call) 189ENTRY(system_call)
@@ -254,7 +258,10 @@ sysret_signal:
254 xorl %esi,%esi # oldset -> arg2 258 xorl %esi,%esi # oldset -> arg2
255 call ptregscall_common 259 call ptregscall_common
2561: movl $_TIF_NEED_RESCHED,%edi 2601: movl $_TIF_NEED_RESCHED,%edi
257 jmp sysret_check 261 /* Use IRET because user could have changed frame. This
262 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
263 cli
264 jmp int_with_check
258 265
259badsys: 266badsys:
260 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 267 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -274,13 +281,9 @@ tracesys:
274 ja 1f 281 ja 1f
275 movq %r10,%rcx /* fixup for C */ 282 movq %r10,%rcx /* fixup for C */
276 call *sys_call_table(,%rax,8) 283 call *sys_call_table(,%rax,8)
277 movq %rax,RAX-ARGOFFSET(%rsp) 2841: movq %rax,RAX-ARGOFFSET(%rsp)
2781: SAVE_REST 285 /* Use IRET because user could have changed frame */
279 movq %rsp,%rdi 286 jmp int_ret_from_sys_call
280 call syscall_trace_leave
281 RESTORE_TOP_OF_STACK %rbx
282 RESTORE_REST
283 jmp ret_from_sys_call
284 CFI_ENDPROC 287 CFI_ENDPROC
285 288
286/* 289/*
@@ -408,25 +411,9 @@ ENTRY(stub_execve)
408 CFI_ADJUST_CFA_OFFSET -8 411 CFI_ADJUST_CFA_OFFSET -8
409 CFI_REGISTER rip, r11 412 CFI_REGISTER rip, r11
410 SAVE_REST 413 SAVE_REST
411 movq %r11, %r15
412 CFI_REGISTER rip, r15
413 FIXUP_TOP_OF_STACK %r11 414 FIXUP_TOP_OF_STACK %r11
414 call sys_execve 415 call sys_execve
415 GET_THREAD_INFO(%rcx)
416 bt $TIF_IA32,threadinfo_flags(%rcx)
417 CFI_REMEMBER_STATE
418 jc exec_32bit
419 RESTORE_TOP_OF_STACK %r11 416 RESTORE_TOP_OF_STACK %r11
420 movq %r15, %r11
421 CFI_REGISTER rip, r11
422 RESTORE_REST
423 pushq %r11
424 CFI_ADJUST_CFA_OFFSET 8
425 CFI_REL_OFFSET rip, 0
426 ret
427
428exec_32bit:
429 CFI_RESTORE_STATE
430 movq %rax,RAX(%rsp) 417 movq %rax,RAX(%rsp)
431 RESTORE_REST 418 RESTORE_REST
432 jmp int_ret_from_sys_call 419 jmp int_ret_from_sys_call
@@ -553,7 +540,8 @@ iret_label:
553 /* force a signal here? this matches i386 behaviour */ 540 /* force a signal here? this matches i386 behaviour */
554 /* running with kernel gs */ 541 /* running with kernel gs */
555bad_iret: 542bad_iret:
556 movq $-9999,%rdi /* better code? */ 543 movq $11,%rdi /* SIGSEGV */
544 sti
557 jmp do_exit 545 jmp do_exit
558 .previous 546 .previous
559 547
diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist
new file mode 100644
index 000000000000..2bcebdc3eedb
--- /dev/null
+++ b/arch/x86_64/kernel/functionlist
@@ -0,0 +1,1286 @@
1*(.text.flush_thread)
2*(.text.check_poison_obj)
3*(.text.copy_page)
4*(.text.__set_personality)
5*(.text.gart_map_sg)
6*(.text.kmem_cache_free)
7*(.text.find_get_page)
8*(.text._raw_spin_lock)
9*(.text.ide_outb)
10*(.text.unmap_vmas)
11*(.text.copy_page_range)
12*(.text.kprobe_handler)
13*(.text.__handle_mm_fault)
14*(.text.__d_lookup)
15*(.text.copy_user_generic)
16*(.text.__link_path_walk)
17*(.text.get_page_from_freelist)
18*(.text.kmem_cache_alloc)
19*(.text.drive_cmd_intr)
20*(.text.ia32_setup_sigcontext)
21*(.text.huge_pte_offset)
22*(.text.do_page_fault)
23*(.text.page_remove_rmap)
24*(.text.release_pages)
25*(.text.ide_end_request)
26*(.text.__mutex_lock_slowpath)
27*(.text.__find_get_block)
28*(.text.kfree)
29*(.text.vfs_read)
30*(.text._raw_spin_unlock)
31*(.text.free_hot_cold_page)
32*(.text.fget_light)
33*(.text.schedule)
34*(.text.memcmp)
35*(.text.touch_atime)
36*(.text.__might_sleep)
37*(.text.__down_read_trylock)
38*(.text.arch_pick_mmap_layout)
39*(.text.find_vma)
40*(.text.__make_request)
41*(.text.do_generic_mapping_read)
42*(.text.mutex_lock_interruptible)
43*(.text.__generic_file_aio_read)
44*(.text._atomic_dec_and_lock)
45*(.text.__wake_up_bit)
46*(.text.add_to_page_cache)
47*(.text.cache_alloc_debugcheck_after)
48*(.text.vm_normal_page)
49*(.text.mutex_debug_check_no_locks_freed)
50*(.text.net_rx_action)
51*(.text.__find_first_zero_bit)
52*(.text.put_page)
53*(.text._raw_read_lock)
54*(.text.__delay)
55*(.text.dnotify_parent)
56*(.text.do_path_lookup)
57*(.text.do_sync_read)
58*(.text.do_lookup)
59*(.text.bit_waitqueue)
60*(.text.file_read_actor)
61*(.text.strncpy_from_user)
62*(.text.__pagevec_lru_add_active)
63*(.text.fget)
64*(.text.dput)
65*(.text.__strnlen_user)
66*(.text.inotify_inode_queue_event)
67*(.text.rw_verify_area)
68*(.text.ide_intr)
69*(.text.inotify_dentry_parent_queue_event)
70*(.text.permission)
71*(.text.memscan)
72*(.text.hpet_rtc_interrupt)
73*(.text.do_mmap_pgoff)
74*(.text.current_fs_time)
75*(.text.vfs_getattr)
76*(.text.kmem_flagcheck)
77*(.text.mark_page_accessed)
78*(.text.free_pages_and_swap_cache)
79*(.text.generic_fillattr)
80*(.text.__block_prepare_write)
81*(.text.__set_page_dirty_nobuffers)
82*(.text.link_path_walk)
83*(.text.find_get_pages_tag)
84*(.text.ide_do_request)
85*(.text.__alloc_pages)
86*(.text.generic_permission)
87*(.text.mod_page_state_offset)
88*(.text.free_pgd_range)
89*(.text.generic_file_buffered_write)
90*(.text.number)
91*(.text.ide_do_rw_disk)
92*(.text.__brelse)
93*(.text.__mod_page_state_offset)
94*(.text.rotate_reclaimable_page)
95*(.text.find_vma_prepare)
96*(.text.find_vma_prev)
97*(.text.lru_cache_add_active)
98*(.text.__kmalloc_track_caller)
99*(.text.smp_invalidate_interrupt)
100*(.text.handle_IRQ_event)
101*(.text.__find_get_block_slow)
102*(.text.do_wp_page)
103*(.text.do_select)
104*(.text.set_user_nice)
105*(.text.sys_read)
106*(.text.do_munmap)
107*(.text.csum_partial)
108*(.text.__do_softirq)
109*(.text.may_open)
110*(.text.getname)
111*(.text.get_empty_filp)
112*(.text.__fput)
113*(.text.remove_mapping)
114*(.text.filp_ctor)
115*(.text.poison_obj)
116*(.text.unmap_region)
117*(.text.test_set_page_writeback)
118*(.text.__do_page_cache_readahead)
119*(.text.sock_def_readable)
120*(.text.ide_outl)
121*(.text.shrink_zone)
122*(.text.rb_insert_color)
123*(.text.get_request)
124*(.text.sys_pread64)
125*(.text.spin_bug)
126*(.text.ide_outsl)
127*(.text.mask_and_ack_8259A)
128*(.text.filemap_nopage)
129*(.text.page_add_file_rmap)
130*(.text.find_lock_page)
131*(.text.tcp_poll)
132*(.text.__mark_inode_dirty)
133*(.text.file_ra_state_init)
134*(.text.generic_file_llseek)
135*(.text.__pagevec_lru_add)
136*(.text.page_cache_readahead)
137*(.text.n_tty_receive_buf)
138*(.text.zonelist_policy)
139*(.text.vma_adjust)
140*(.text.test_clear_page_dirty)
141*(.text.sync_buffer)
142*(.text.do_exit)
143*(.text.__bitmap_weight)
144*(.text.alloc_pages_current)
145*(.text.get_unused_fd)
146*(.text.zone_watermark_ok)
147*(.text.cpuset_update_task_memory_state)
148*(.text.__bitmap_empty)
149*(.text.sys_munmap)
150*(.text.__inode_dir_notify)
151*(.text.__generic_file_aio_write_nolock)
152*(.text.__pte_alloc)
153*(.text.sys_select)
154*(.text.vm_acct_memory)
155*(.text.vfs_write)
156*(.text.__lru_add_drain)
157*(.text.prio_tree_insert)
158*(.text.generic_file_aio_read)
159*(.text.vma_merge)
160*(.text.block_write_full_page)
161*(.text.__page_set_anon_rmap)
162*(.text.apic_timer_interrupt)
163*(.text.release_console_sem)
164*(.text.sys_write)
165*(.text.sys_brk)
166*(.text.dup_mm)
167*(.text.read_current_timer)
168*(.text.ll_rw_block)
169*(.text.blk_rq_map_sg)
170*(.text.dbg_userword)
171*(.text.__block_commit_write)
172*(.text.cache_grow)
173*(.text.copy_strings)
174*(.text.release_task)
175*(.text.do_sync_write)
176*(.text.unlock_page)
177*(.text.load_elf_binary)
178*(.text.__follow_mount)
179*(.text.__getblk)
180*(.text.do_sys_open)
181*(.text.current_kernel_time)
182*(.text.call_rcu)
183*(.text.write_chan)
184*(.text.vsnprintf)
185*(.text.dummy_inode_setsecurity)
186*(.text.submit_bh)
187*(.text.poll_freewait)
188*(.text.bio_alloc_bioset)
189*(.text.skb_clone)
190*(.text.page_waitqueue)
191*(.text.__mutex_lock_interruptible_slowpath)
192*(.text.get_index)
193*(.text.csum_partial_copy_generic)
194*(.text.bad_range)
195*(.text.remove_vma)
196*(.text.cp_new_stat)
197*(.text.alloc_arraycache)
198*(.text.test_clear_page_writeback)
199*(.text.strsep)
200*(.text.open_namei)
201*(.text._raw_read_unlock)
202*(.text.get_vma_policy)
203*(.text.__down_write_trylock)
204*(.text.find_get_pages)
205*(.text.tcp_rcv_established)
206*(.text.generic_make_request)
207*(.text.__block_write_full_page)
208*(.text.cfq_set_request)
209*(.text.sys_inotify_init)
210*(.text.split_vma)
211*(.text.__mod_timer)
212*(.text.get_options)
213*(.text.vma_link)
214*(.text.mpage_writepages)
215*(.text.truncate_complete_page)
216*(.text.tcp_recvmsg)
217*(.text.sigprocmask)
218*(.text.filemap_populate)
219*(.text.sys_close)
220*(.text.inotify_dev_queue_event)
221*(.text.do_task_stat)
222*(.text.__dentry_open)
223*(.text.unlink_file_vma)
224*(.text.__pollwait)
225*(.text.packet_rcv_spkt)
226*(.text.drop_buffers)
227*(.text.free_pgtables)
228*(.text.generic_file_direct_write)
229*(.text.copy_process)
230*(.text.netif_receive_skb)
231*(.text.dnotify_flush)
232*(.text.print_bad_pte)
233*(.text.anon_vma_unlink)
234*(.text.sys_mprotect)
235*(.text.sync_sb_inodes)
236*(.text.find_inode_fast)
237*(.text.dummy_inode_readlink)
238*(.text.putname)
239*(.text.init_smp_flush)
240*(.text.dbg_redzone2)
241*(.text.sk_run_filter)
242*(.text.may_expand_vm)
243*(.text.generic_file_aio_write)
244*(.text.find_next_zero_bit)
245*(.text.file_kill)
246*(.text.audit_getname)
247*(.text.arch_unmap_area_topdown)
248*(.text.alloc_page_vma)
249*(.text.tcp_transmit_skb)
250*(.text.rb_next)
251*(.text.dbg_redzone1)
252*(.text.generic_file_mmap)
253*(.text.vfs_fstat)
254*(.text.sys_time)
255*(.text.page_lock_anon_vma)
256*(.text.get_unmapped_area)
257*(.text.remote_llseek)
258*(.text.__up_read)
259*(.text.fd_install)
260*(.text.eventpoll_init_file)
261*(.text.dma_alloc_coherent)
262*(.text.create_empty_buffers)
263*(.text.__mutex_unlock_slowpath)
264*(.text.dup_fd)
265*(.text.d_alloc)
266*(.text.tty_ldisc_try)
267*(.text.sys_stime)
268*(.text.__rb_rotate_right)
269*(.text.d_validate)
270*(.text.rb_erase)
271*(.text.path_release)
272*(.text.memmove)
273*(.text.invalidate_complete_page)
274*(.text.clear_inode)
275*(.text.cache_estimate)
276*(.text.alloc_buffer_head)
277*(.text.smp_call_function_interrupt)
278*(.text.flush_tlb_others)
279*(.text.file_move)
280*(.text.balance_dirty_pages_ratelimited)
281*(.text.vma_prio_tree_add)
282*(.text.timespec_trunc)
283*(.text.mempool_alloc)
284*(.text.iget_locked)
285*(.text.d_alloc_root)
286*(.text.cpuset_populate_dir)
287*(.text.anon_vma_prepare)
288*(.text.sys_newstat)
289*(.text.alloc_page_interleave)
290*(.text.__path_lookup_intent_open)
291*(.text.__pagevec_free)
292*(.text.inode_init_once)
293*(.text.free_vfsmnt)
294*(.text.__user_walk_fd)
295*(.text.cfq_idle_slice_timer)
296*(.text.sys_mmap)
297*(.text.sys_llseek)
298*(.text.prio_tree_remove)
299*(.text.filp_close)
300*(.text.file_permission)
301*(.text.vma_prio_tree_remove)
302*(.text.tcp_ack)
303*(.text.nameidata_to_filp)
304*(.text.sys_lseek)
305*(.text.percpu_counter_mod)
306*(.text.igrab)
307*(.text.__bread)
308*(.text.alloc_inode)
309*(.text.filldir)
310*(.text.__rb_rotate_left)
311*(.text.irq_affinity_write_proc)
312*(.text.init_request_from_bio)
313*(.text.find_or_create_page)
314*(.text.tty_poll)
315*(.text.tcp_sendmsg)
316*(.text.ide_wait_stat)
317*(.text.free_buffer_head)
318*(.text.flush_signal_handlers)
319*(.text.tcp_v4_rcv)
320*(.text.nr_blockdev_pages)
321*(.text.locks_remove_flock)
322*(.text.__iowrite32_copy)
323*(.text.do_filp_open)
324*(.text.try_to_release_page)
325*(.text.page_add_new_anon_rmap)
326*(.text.kmem_cache_size)
327*(.text.eth_type_trans)
328*(.text.try_to_free_buffers)
329*(.text.schedule_tail)
330*(.text.proc_lookup)
331*(.text.no_llseek)
332*(.text.kfree_skbmem)
333*(.text.do_wait)
334*(.text.do_mpage_readpage)
335*(.text.vfs_stat_fd)
336*(.text.tty_write)
337*(.text.705)
338*(.text.sync_page)
339*(.text.__remove_shared_vm_struct)
340*(.text.__kfree_skb)
341*(.text.sock_poll)
342*(.text.get_request_wait)
343*(.text.do_sigaction)
344*(.text.do_brk)
345*(.text.tcp_event_data_recv)
346*(.text.read_chan)
347*(.text.pipe_writev)
348*(.text.__emul_lookup_dentry)
349*(.text.rtc_get_rtc_time)
350*(.text.print_objinfo)
351*(.text.file_update_time)
352*(.text.do_signal)
353*(.text.disable_8259A_irq)
354*(.text.blk_queue_bounce)
355*(.text.__anon_vma_link)
356*(.text.__vma_link)
357*(.text.vfs_rename)
358*(.text.sys_newlstat)
359*(.text.sys_newfstat)
360*(.text.sys_mknod)
361*(.text.__show_regs)
362*(.text.iput)
363*(.text.get_signal_to_deliver)
364*(.text.flush_tlb_page)
365*(.text.debug_mutex_wake_waiter)
366*(.text.copy_thread)
367*(.text.clear_page_dirty_for_io)
368*(.text.buffer_io_error)
369*(.text.vfs_permission)
370*(.text.truncate_inode_pages_range)
371*(.text.sys_recvfrom)
372*(.text.remove_suid)
373*(.text.mark_buffer_dirty)
374*(.text.local_bh_enable)
375*(.text.get_zeroed_page)
376*(.text.get_vmalloc_info)
377*(.text.flush_old_exec)
378*(.text.dummy_inode_permission)
379*(.text.__bio_add_page)
380*(.text.prio_tree_replace)
381*(.text.notify_change)
382*(.text.mntput_no_expire)
383*(.text.fput)
384*(.text.__end_that_request_first)
385*(.text.wake_up_bit)
386*(.text.unuse_mm)
387*(.text.skb_release_data)
388*(.text.shrink_icache_memory)
389*(.text.sched_balance_self)
390*(.text.__pmd_alloc)
391*(.text.pipe_poll)
392*(.text.normal_poll)
393*(.text.__free_pages)
394*(.text.follow_mount)
395*(.text.cdrom_start_packet_command)
396*(.text.blk_recount_segments)
397*(.text.bio_put)
398*(.text.__alloc_skb)
399*(.text.__wake_up)
400*(.text.vm_stat_account)
401*(.text.sys_fcntl)
402*(.text.sys_fadvise64)
403*(.text._raw_write_unlock)
404*(.text.__pud_alloc)
405*(.text.alloc_page_buffers)
406*(.text.vfs_llseek)
407*(.text.sockfd_lookup)
408*(.text._raw_write_lock)
409*(.text.put_compound_page)
410*(.text.prune_dcache)
411*(.text.pipe_readv)
412*(.text.mempool_free)
413*(.text.make_ahead_window)
414*(.text.lru_add_drain)
415*(.text.constant_test_bit)
416*(.text.__clear_user)
417*(.text.arch_unmap_area)
418*(.text.anon_vma_link)
419*(.text.sys_chroot)
420*(.text.setup_arg_pages)
421*(.text.radix_tree_preload)
422*(.text.init_rwsem)
423*(.text.generic_osync_inode)
424*(.text.generic_delete_inode)
425*(.text.do_sys_poll)
426*(.text.dev_queue_xmit)
427*(.text.default_llseek)
428*(.text.__writeback_single_inode)
429*(.text.vfs_ioctl)
430*(.text.__up_write)
431*(.text.unix_poll)
432*(.text.sys_rt_sigprocmask)
433*(.text.sock_recvmsg)
434*(.text.recalc_bh_state)
435*(.text.__put_unused_fd)
436*(.text.process_backlog)
437*(.text.locks_remove_posix)
438*(.text.lease_modify)
439*(.text.expand_files)
440*(.text.end_buffer_read_nobh)
441*(.text.d_splice_alias)
442*(.text.debug_mutex_init_waiter)
443*(.text.copy_from_user)
444*(.text.cap_vm_enough_memory)
445*(.text.show_vfsmnt)
446*(.text.release_sock)
447*(.text.pfifo_fast_enqueue)
448*(.text.half_md4_transform)
449*(.text.fs_may_remount_ro)
450*(.text.do_fork)
451*(.text.copy_hugetlb_page_range)
452*(.text.cache_free_debugcheck)
453*(.text.__tcp_select_window)
454*(.text.task_handoff_register)
455*(.text.sys_open)
456*(.text.strlcpy)
457*(.text.skb_copy_datagram_iovec)
458*(.text.set_up_list3s)
459*(.text.release_open_intent)
460*(.text.qdisc_restart)
461*(.text.n_tty_chars_in_buffer)
462*(.text.inode_change_ok)
463*(.text.__downgrade_write)
464*(.text.debug_mutex_unlock)
465*(.text.add_timer_randomness)
466*(.text.sock_common_recvmsg)
467*(.text.set_bh_page)
468*(.text.printk_lock)
469*(.text.path_release_on_umount)
470*(.text.ip_output)
471*(.text.ide_build_dmatable)
472*(.text.__get_user_8)
473*(.text.end_buffer_read_sync)
474*(.text.__d_path)
475*(.text.d_move)
476*(.text.del_timer)
477*(.text.constant_test_bit)
478*(.text.blockable_page_cache_readahead)
479*(.text.tty_read)
480*(.text.sys_readlink)
481*(.text.sys_faccessat)
482*(.text.read_swap_cache_async)
483*(.text.pty_write_room)
484*(.text.page_address_in_vma)
485*(.text.kthread)
486*(.text.cfq_exit_io_context)
487*(.text.__tcp_push_pending_frames)
488*(.text.sys_pipe)
489*(.text.submit_bio)
490*(.text.pid_revalidate)
491*(.text.page_referenced_file)
492*(.text.lock_sock)
493*(.text.get_page_state_node)
494*(.text.generic_block_bmap)
495*(.text.do_setitimer)
496*(.text.dev_queue_xmit_nit)
497*(.text.copy_from_read_buf)
498*(.text.__const_udelay)
499*(.text.console_conditional_schedule)
500*(.text.wake_up_new_task)
501*(.text.wait_for_completion_interruptible)
502*(.text.tcp_rcv_rtt_update)
503*(.text.sys_mlockall)
504*(.text.set_fs_altroot)
505*(.text.schedule_timeout)
506*(.text.nr_free_pagecache_pages)
507*(.text.nf_iterate)
508*(.text.mapping_tagged)
509*(.text.ip_queue_xmit)
510*(.text.ip_local_deliver)
511*(.text.follow_page)
512*(.text.elf_map)
513*(.text.dummy_file_permission)
514*(.text.dispose_list)
515*(.text.dentry_open)
516*(.text.dentry_iput)
517*(.text.bio_alloc)
518*(.text.alloc_skb_from_cache)
519*(.text.wait_on_page_bit)
520*(.text.vfs_readdir)
521*(.text.vfs_lstat)
522*(.text.seq_escape)
523*(.text.__posix_lock_file)
524*(.text.mm_release)
525*(.text.kref_put)
526*(.text.ip_rcv)
527*(.text.__iget)
528*(.text.free_pages)
529*(.text.find_mergeable_anon_vma)
530*(.text.find_extend_vma)
531*(.text.dummy_inode_listsecurity)
532*(.text.bio_add_page)
533*(.text.__vm_enough_memory)
534*(.text.vfs_stat)
535*(.text.tty_paranoia_check)
536*(.text.tcp_read_sock)
537*(.text.tcp_data_queue)
538*(.text.sys_uname)
539*(.text.sys_renameat)
540*(.text.__strncpy_from_user)
541*(.text.__mutex_init)
542*(.text.__lookup_hash)
543*(.text.kref_get)
544*(.text.ip_route_input)
545*(.text.__insert_inode_hash)
546*(.text.do_sock_write)
547*(.text.blk_done_softirq)
548*(.text.__wake_up_sync)
549*(.text.__vma_link_rb)
550*(.text.tty_ioctl)
551*(.text.tracesys)
552*(.text.sys_getdents)
553*(.text.sys_dup)
554*(.text.stub_execve)
555*(.text.sha_transform)
556*(.text.radix_tree_tag_clear)
557*(.text.put_unused_fd)
558*(.text.put_files_struct)
559*(.text.mpage_readpages)
560*(.text.may_delete)
561*(.text.kmem_cache_create)
562*(.text.ip_mc_output)
563*(.text.interleave_nodes)
564*(.text.groups_search)
565*(.text.generic_drop_inode)
566*(.text.generic_commit_write)
567*(.text.fcntl_setlk)
568*(.text.exit_mmap)
569*(.text.end_page_writeback)
570*(.text.__d_rehash)
571*(.text.debug_mutex_free_waiter)
572*(.text.csum_ipv6_magic)
573*(.text.count)
574*(.text.cleanup_rbuf)
575*(.text.check_spinlock_acquired_node)
576*(.text.can_vma_merge_after)
577*(.text.bio_endio)
578*(.text.alloc_pidmap)
579*(.text.write_ldt)
580*(.text.vmtruncate_range)
581*(.text.vfs_create)
582*(.text.__user_walk)
583*(.text.update_send_head)
584*(.text.unmap_underlying_metadata)
585*(.text.tty_ldisc_deref)
586*(.text.tcp_setsockopt)
587*(.text.tcp_send_ack)
588*(.text.sys_pause)
589*(.text.sys_gettimeofday)
590*(.text.sync_dirty_buffer)
591*(.text.strncmp)
592*(.text.release_posix_timer)
593*(.text.proc_file_read)
594*(.text.prepare_to_wait)
595*(.text.locks_mandatory_locked)
596*(.text.interruptible_sleep_on_timeout)
597*(.text.inode_sub_bytes)
598*(.text.in_group_p)
599*(.text.hrtimer_try_to_cancel)
600*(.text.filldir64)
601*(.text.fasync_helper)
602*(.text.dummy_sb_pivotroot)
603*(.text.d_lookup)
604*(.text.d_instantiate)
605*(.text.__d_find_alias)
606*(.text.cpu_idle_wait)
607*(.text.cond_resched_lock)
608*(.text.chown_common)
609*(.text.blk_congestion_wait)
610*(.text.activate_page)
611*(.text.unlock_buffer)
612*(.text.tty_wakeup)
613*(.text.tcp_v4_do_rcv)
614*(.text.tcp_current_mss)
615*(.text.sys_openat)
616*(.text.sys_fchdir)
617*(.text.strnlen_user)
618*(.text.strnlen)
619*(.text.strchr)
620*(.text.sock_common_getsockopt)
621*(.text.skb_checksum)
622*(.text.remove_wait_queue)
623*(.text.rb_replace_node)
624*(.text.radix_tree_node_ctor)
625*(.text.pty_chars_in_buffer)
626*(.text.profile_hit)
627*(.text.prio_tree_left)
628*(.text.pgd_clear_bad)
629*(.text.pfifo_fast_dequeue)
630*(.text.page_referenced)
631*(.text.open_exec)
632*(.text.mmput)
633*(.text.mm_init)
634*(.text.__ide_dma_off_quietly)
635*(.text.ide_dma_intr)
636*(.text.hrtimer_start)
637*(.text.get_io_context)
638*(.text.__get_free_pages)
639*(.text.find_first_zero_bit)
640*(.text.file_free_rcu)
641*(.text.dummy_socket_sendmsg)
642*(.text.do_unlinkat)
643*(.text.do_arch_prctl)
644*(.text.destroy_inode)
645*(.text.can_vma_merge_before)
646*(.text.block_sync_page)
647*(.text.block_prepare_write)
648*(.text.bio_init)
649*(.text.arch_ptrace)
650*(.text.wake_up_inode)
651*(.text.wait_on_retry_sync_kiocb)
652*(.text.vma_prio_tree_next)
653*(.text.tcp_rcv_space_adjust)
654*(.text.__tcp_ack_snd_check)
655*(.text.sys_utime)
656*(.text.sys_recvmsg)
657*(.text.sys_mremap)
658*(.text.sys_bdflush)
659*(.text.sleep_on)
660*(.text.set_page_dirty_lock)
661*(.text.seq_path)
662*(.text.schedule_timeout_interruptible)
663*(.text.sched_fork)
664*(.text.rt_run_flush)
665*(.text.profile_munmap)
666*(.text.prepare_binprm)
667*(.text.__pagevec_release_nonlru)
668*(.text.m_show)
669*(.text.lookup_mnt)
670*(.text.__lookup_mnt)
671*(.text.lock_timer_base)
672*(.text.is_subdir)
673*(.text.invalidate_bh_lru)
674*(.text.init_buffer_head)
675*(.text.ifind_fast)
676*(.text.ide_dma_start)
677*(.text.__get_page_state)
678*(.text.flock_to_posix_lock)
679*(.text.__find_symbol)
680*(.text.do_futex)
681*(.text.do_execve)
682*(.text.dirty_writeback_centisecs_handler)
683*(.text.dev_watchdog)
684*(.text.can_share_swap_page)
685*(.text.blkdev_put)
686*(.text.bio_get_nr_vecs)
687*(.text.xfrm_compile_policy)
688*(.text.vma_prio_tree_insert)
689*(.text.vfs_lstat_fd)
690*(.text.__user_path_lookup_open)
691*(.text.thread_return)
692*(.text.tcp_send_delayed_ack)
693*(.text.sock_def_error_report)
694*(.text.shrink_slab)
695*(.text.serial_out)
696*(.text.seq_read)
697*(.text.secure_ip_id)
698*(.text.search_binary_handler)
699*(.text.proc_pid_unhash)
700*(.text.pagevec_lookup)
701*(.text.new_inode)
702*(.text.memcpy_toiovec)
703*(.text.locks_free_lock)
704*(.text.__lock_page)
705*(.text.__lock_buffer)
706*(.text.load_module)
707*(.text.is_bad_inode)
708*(.text.invalidate_inode_buffers)
709*(.text.insert_vm_struct)
710*(.text.inode_setattr)
711*(.text.inode_add_bytes)
712*(.text.ide_read_24)
713*(.text.ide_get_error_location)
714*(.text.ide_do_drive_cmd)
715*(.text.get_locked_pte)
716*(.text.get_filesystem_list)
717*(.text.generic_file_open)
718*(.text.follow_down)
719*(.text.find_next_bit)
720*(.text.__find_first_bit)
721*(.text.exit_mm)
722*(.text.exec_keys)
723*(.text.end_buffer_write_sync)
724*(.text.end_bio_bh_io_sync)
725*(.text.dummy_socket_shutdown)
726*(.text.d_rehash)
727*(.text.d_path)
728*(.text.do_ioctl)
729*(.text.dget_locked)
730*(.text.copy_thread_group_keys)
731*(.text.cdrom_end_request)
732*(.text.cap_bprm_apply_creds)
733*(.text.blk_rq_bio_prep)
734*(.text.__bitmap_intersects)
735*(.text.bio_phys_segments)
736*(.text.bio_free)
737*(.text.arch_get_unmapped_area_topdown)
738*(.text.writeback_in_progress)
739*(.text.vfs_follow_link)
740*(.text.tcp_rcv_state_process)
741*(.text.tcp_check_space)
742*(.text.sys_stat)
743*(.text.sys_rt_sigreturn)
744*(.text.sys_rt_sigaction)
745*(.text.sys_remap_file_pages)
746*(.text.sys_pwrite64)
747*(.text.sys_fchownat)
748*(.text.sys_fchmodat)
749*(.text.strncat)
750*(.text.strlcat)
751*(.text.strcmp)
752*(.text.steal_locks)
753*(.text.sock_create)
754*(.text.sk_stream_rfree)
755*(.text.sk_stream_mem_schedule)
756*(.text.skip_atoi)
757*(.text.sk_alloc)
758*(.text.show_stat)
759*(.text.set_fs_pwd)
760*(.text.set_binfmt)
761*(.text.pty_unthrottle)
762*(.text.proc_symlink)
763*(.text.pipe_release)
764*(.text.pageout)
765*(.text.n_tty_write_wakeup)
766*(.text.n_tty_ioctl)
767*(.text.nr_free_zone_pages)
768*(.text.migration_thread)
769*(.text.mempool_free_slab)
770*(.text.meminfo_read_proc)
771*(.text.max_sane_readahead)
772*(.text.lru_cache_add)
773*(.text.kill_fasync)
774*(.text.kernel_read)
775*(.text.invalidate_mapping_pages)
776*(.text.inode_has_buffers)
777*(.text.init_once)
778*(.text.inet_sendmsg)
779*(.text.idedisk_issue_flush)
780*(.text.generic_file_write)
781*(.text.free_more_memory)
782*(.text.__free_fdtable)
783*(.text.filp_dtor)
784*(.text.exit_sem)
785*(.text.exit_itimers)
786*(.text.error_interrupt)
787*(.text.end_buffer_async_write)
788*(.text.eligible_child)
789*(.text.elf_map)
790*(.text.dump_task_regs)
791*(.text.dummy_task_setscheduler)
792*(.text.dummy_socket_accept)
793*(.text.dummy_file_free_security)
794*(.text.__down_read)
795*(.text.do_sock_read)
796*(.text.do_sigaltstack)
797*(.text.do_mremap)
798*(.text.current_io_context)
799*(.text.cpu_swap_callback)
800*(.text.copy_vma)
801*(.text.cap_bprm_set_security)
802*(.text.blk_insert_request)
803*(.text.bio_map_kern_endio)
804*(.text.bio_hw_segments)
805*(.text.bictcp_cong_avoid)
806*(.text.add_interrupt_randomness)
807*(.text.wait_for_completion)
808*(.text.version_read_proc)
809*(.text.unix_write_space)
810*(.text.tty_ldisc_ref_wait)
811*(.text.tty_ldisc_put)
812*(.text.try_to_wake_up)
813*(.text.tcp_v4_tw_remember_stamp)
814*(.text.tcp_try_undo_dsack)
815*(.text.tcp_may_send_now)
816*(.text.sys_waitid)
817*(.text.sys_sched_getparam)
818*(.text.sys_getppid)
819*(.text.sys_getcwd)
820*(.text.sys_dup2)
821*(.text.sys_chmod)
822*(.text.sys_chdir)
823*(.text.sprintf)
824*(.text.sock_wfree)
825*(.text.sock_aio_write)
826*(.text.skb_drop_fraglist)
827*(.text.skb_dequeue)
828*(.text.set_close_on_exec)
829*(.text.set_brk)
830*(.text.seq_puts)
831*(.text.SELECT_DRIVE)
832*(.text.sched_exec)
833*(.text.return_EIO)
834*(.text.remove_from_page_cache)
835*(.text.rcu_start_batch)
836*(.text.__put_task_struct)
837*(.text.proc_pid_readdir)
838*(.text.proc_get_inode)
839*(.text.prepare_to_wait_exclusive)
840*(.text.pipe_wait)
841*(.text.pipe_new)
842*(.text.pdflush_operation)
843*(.text.__pagevec_release)
844*(.text.pagevec_lookup_tag)
845*(.text.packet_rcv)
846*(.text.n_tty_set_room)
847*(.text.nr_free_pages)
848*(.text.__net_timestamp)
849*(.text.mpage_end_io_read)
850*(.text.mod_timer)
851*(.text.__memcpy)
852*(.text.mb_cache_shrink_fn)
853*(.text.lock_rename)
854*(.text.kstrdup)
855*(.text.is_ignored)
856*(.text.int_very_careful)
857*(.text.inotify_inode_is_dead)
858*(.text.inotify_get_cookie)
859*(.text.inode_get_bytes)
860*(.text.init_timer)
861*(.text.init_dev)
862*(.text.inet_getname)
863*(.text.ide_map_sg)
864*(.text.__ide_dma_end)
865*(.text.hrtimer_get_remaining)
866*(.text.get_task_mm)
867*(.text.get_random_int)
868*(.text.free_pipe_info)
869*(.text.filemap_write_and_wait_range)
870*(.text.exit_thread)
871*(.text.enter_idle)
872*(.text.end_that_request_first)
873*(.text.end_8259A_irq)
874*(.text.dummy_file_alloc_security)
875*(.text.do_group_exit)
876*(.text.debug_mutex_init)
877*(.text.cpuset_exit)
878*(.text.cpu_idle)
879*(.text.copy_semundo)
880*(.text.copy_files)
881*(.text.chrdev_open)
882*(.text.cdrom_transfer_packet_command)
883*(.text.cdrom_mode_sense)
884*(.text.blk_phys_contig_segment)
885*(.text.blk_get_queue)
886*(.text.bio_split)
887*(.text.audit_alloc)
888*(.text.anon_pipe_buf_release)
889*(.text.add_wait_queue_exclusive)
890*(.text.add_wait_queue)
891*(.text.acct_process)
892*(.text.account)
893*(.text.zeromap_page_range)
894*(.text.yield)
895*(.text.writeback_acquire)
896*(.text.worker_thread)
897*(.text.wait_on_page_writeback_range)
898*(.text.__wait_on_buffer)
899*(.text.vscnprintf)
900*(.text.vmalloc_to_pfn)
901*(.text.vgacon_save_screen)
902*(.text.vfs_unlink)
903*(.text.vfs_rmdir)
904*(.text.unregister_md_personality)
905*(.text.unlock_new_inode)
906*(.text.unix_stream_sendmsg)
907*(.text.unix_stream_recvmsg)
908*(.text.unhash_process)
909*(.text.udp_v4_lookup_longway)
910*(.text.tty_ldisc_flush)
911*(.text.tty_ldisc_enable)
912*(.text.tty_hung_up_p)
913*(.text.tty_buffer_free_all)
914*(.text.tso_fragment)
915*(.text.try_to_del_timer_sync)
916*(.text.tcp_v4_err)
917*(.text.tcp_unhash)
918*(.text.tcp_seq_next)
919*(.text.tcp_select_initial_window)
920*(.text.tcp_sacktag_write_queue)
921*(.text.tcp_cwnd_validate)
922*(.text.sys_vhangup)
923*(.text.sys_uselib)
924*(.text.sys_symlink)
925*(.text.sys_signal)
926*(.text.sys_poll)
927*(.text.sys_mount)
928*(.text.sys_kill)
929*(.text.sys_ioctl)
930*(.text.sys_inotify_add_watch)
931*(.text.sys_getuid)
932*(.text.sys_getrlimit)
933*(.text.sys_getitimer)
934*(.text.sys_getgroups)
935*(.text.sys_ftruncate)
936*(.text.sysfs_lookup)
937*(.text.sys_exit_group)
938*(.text.stub_fork)
939*(.text.sscanf)
940*(.text.sock_map_fd)
941*(.text.sock_get_timestamp)
942*(.text.__sock_create)
943*(.text.smp_call_function_single)
944*(.text.sk_stop_timer)
945*(.text.skb_copy_and_csum_datagram)
946*(.text.__skb_checksum_complete)
947*(.text.single_next)
948*(.text.sigqueue_alloc)
949*(.text.shrink_dcache_parent)
950*(.text.select_idle_routine)
951*(.text.run_workqueue)
952*(.text.run_local_timers)
953*(.text.remove_inode_hash)
954*(.text.remove_dquot_ref)
955*(.text.register_binfmt)
956*(.text.read_cache_pages)
957*(.text.rb_last)
958*(.text.pty_open)
959*(.text.proc_root_readdir)
960*(.text.proc_pid_flush)
961*(.text.proc_pident_lookup)
962*(.text.proc_fill_super)
963*(.text.proc_exe_link)
964*(.text.posix_locks_deadlock)
965*(.text.pipe_iov_copy_from_user)
966*(.text.opost)
967*(.text.nf_register_hook)
968*(.text.netif_rx_ni)
969*(.text.m_start)
970*(.text.mpage_writepage)
971*(.text.mm_alloc)
972*(.text.memory_open)
973*(.text.mark_buffer_async_write)
974*(.text.lru_add_drain_all)
975*(.text.locks_init_lock)
976*(.text.locks_delete_lock)
977*(.text.lock_hrtimer_base)
978*(.text.load_script)
979*(.text.__kill_fasync)
980*(.text.ip_mc_sf_allow)
981*(.text.__ioremap)
982*(.text.int_with_check)
983*(.text.int_sqrt)
984*(.text.install_thread_keyring)
985*(.text.init_page_buffers)
986*(.text.inet_sock_destruct)
987*(.text.idle_notifier_register)
988*(.text.ide_execute_command)
989*(.text.ide_end_drive_cmd)
990*(.text.__ide_dma_host_on)
991*(.text.hrtimer_run_queues)
992*(.text.hpet_mask_rtc_irq_bit)
993*(.text.__get_zone_counts)
994*(.text.get_zone_counts)
995*(.text.get_write_access)
996*(.text.get_fs_struct)
997*(.text.get_dirty_limits)
998*(.text.generic_readlink)
999*(.text.free_hot_page)
1000*(.text.finish_wait)
1001*(.text.find_inode)
1002*(.text.find_first_bit)
1003*(.text.__filemap_fdatawrite_range)
1004*(.text.__filemap_copy_from_user_iovec)
1005*(.text.exit_aio)
1006*(.text.elv_set_request)
1007*(.text.elv_former_request)
1008*(.text.dup_namespace)
1009*(.text.dupfd)
1010*(.text.dummy_socket_getsockopt)
1011*(.text.dummy_sb_post_mountroot)
1012*(.text.dummy_quotactl)
1013*(.text.dummy_inode_rename)
1014*(.text.__do_SAK)
1015*(.text.do_pipe)
1016*(.text.do_fsync)
1017*(.text.d_instantiate_unique)
1018*(.text.d_find_alias)
1019*(.text.deny_write_access)
1020*(.text.dentry_unhash)
1021*(.text.d_delete)
1022*(.text.datagram_poll)
1023*(.text.cpuset_fork)
1024*(.text.cpuid_read)
1025*(.text.copy_namespace)
1026*(.text.cond_resched)
1027*(.text.check_version)
1028*(.text.__change_page_attr)
1029*(.text.cfq_slab_kill)
1030*(.text.cfq_completed_request)
1031*(.text.cdrom_pc_intr)
1032*(.text.cdrom_decode_status)
1033*(.text.cap_capset_check)
1034*(.text.blk_put_request)
1035*(.text.bio_fs_destructor)
1036*(.text.bictcp_min_cwnd)
1037*(.text.alloc_chrdev_region)
1038*(.text.add_element)
1039*(.text.acct_update_integrals)
1040*(.text.write_boundary_block)
1041*(.text.writeback_release)
1042*(.text.writeback_inodes)
1043*(.text.wake_up_state)
1044*(.text.__wake_up_locked)
1045*(.text.wake_futex)
1046*(.text.wait_task_inactive)
1047*(.text.__wait_on_freeing_inode)
1048*(.text.wait_noreap_copyout)
1049*(.text.vmstat_start)
1050*(.text.vgacon_do_font_op)
1051*(.text.vfs_readv)
1052*(.text.vfs_quota_sync)
1053*(.text.update_queue)
1054*(.text.unshare_files)
1055*(.text.unmap_vm_area)
1056*(.text.unix_socketpair)
1057*(.text.unix_release_sock)
1058*(.text.unix_detach_fds)
1059*(.text.unix_create1)
1060*(.text.unix_bind)
1061*(.text.udp_sendmsg)
1062*(.text.udp_rcv)
1063*(.text.udp_queue_rcv_skb)
1064*(.text.uart_write)
1065*(.text.uart_startup)
1066*(.text.uart_open)
1067*(.text.tty_vhangup)
1068*(.text.tty_termios_baud_rate)
1069*(.text.tty_release)
1070*(.text.tty_ldisc_ref)
1071*(.text.throttle_vm_writeout)
1072*(.text.058)
1073*(.text.tcp_xmit_probe_skb)
1074*(.text.tcp_v4_send_check)
1075*(.text.tcp_v4_destroy_sock)
1076*(.text.tcp_sync_mss)
1077*(.text.tcp_snd_test)
1078*(.text.tcp_slow_start)
1079*(.text.tcp_send_fin)
1080*(.text.tcp_rtt_estimator)
1081*(.text.tcp_parse_options)
1082*(.text.tcp_ioctl)
1083*(.text.tcp_init_tso_segs)
1084*(.text.tcp_init_cwnd)
1085*(.text.tcp_getsockopt)
1086*(.text.tcp_fin)
1087*(.text.tcp_connect)
1088*(.text.tcp_cong_avoid)
1089*(.text.__tcp_checksum_complete_user)
1090*(.text.task_dumpable)
1091*(.text.sys_wait4)
1092*(.text.sys_utimes)
1093*(.text.sys_symlinkat)
1094*(.text.sys_socketpair)
1095*(.text.sys_rmdir)
1096*(.text.sys_readahead)
1097*(.text.sys_nanosleep)
1098*(.text.sys_linkat)
1099*(.text.sys_fstat)
1100*(.text.sysfs_readdir)
1101*(.text.sys_execve)
1102*(.text.sysenter_tracesys)
1103*(.text.sys_chown)
1104*(.text.stub_clone)
1105*(.text.strrchr)
1106*(.text.strncpy)
1107*(.text.stopmachine_set_state)
1108*(.text.sock_sendmsg)
1109*(.text.sock_release)
1110*(.text.sock_fasync)
1111*(.text.sock_close)
1112*(.text.sk_stream_write_space)
1113*(.text.sk_reset_timer)
1114*(.text.skb_split)
1115*(.text.skb_recv_datagram)
1116*(.text.skb_queue_tail)
1117*(.text.sk_attach_filter)
1118*(.text.si_swapinfo)
1119*(.text.simple_strtoll)
1120*(.text.set_termios)
1121*(.text.set_task_comm)
1122*(.text.set_shrinker)
1123*(.text.set_normalized_timespec)
1124*(.text.set_brk)
1125*(.text.serial_in)
1126*(.text.seq_printf)
1127*(.text.secure_dccp_sequence_number)
1128*(.text.rwlock_bug)
1129*(.text.rt_hash_code)
1130*(.text.__rta_fill)
1131*(.text.__request_resource)
1132*(.text.relocate_new_kernel)
1133*(.text.release_thread)
1134*(.text.release_mem)
1135*(.text.rb_prev)
1136*(.text.rb_first)
1137*(.text.random_poll)
1138*(.text.__put_super_and_need_restart)
1139*(.text.pty_write)
1140*(.text.ptrace_stop)
1141*(.text.proc_self_readlink)
1142*(.text.proc_root_lookup)
1143*(.text.proc_root_link)
1144*(.text.proc_pid_make_inode)
1145*(.text.proc_pid_attr_write)
1146*(.text.proc_lookupfd)
1147*(.text.proc_delete_inode)
1148*(.text.posix_same_owner)
1149*(.text.posix_block_lock)
1150*(.text.poll_initwait)
1151*(.text.pipe_write)
1152*(.text.pipe_read_fasync)
1153*(.text.pipe_ioctl)
1154*(.text.pdflush)
1155*(.text.pci_user_read_config_dword)
1156*(.text.page_readlink)
1157*(.text.null_lseek)
1158*(.text.nf_hook_slow)
1159*(.text.netlink_sock_destruct)
1160*(.text.netlink_broadcast)
1161*(.text.neigh_resolve_output)
1162*(.text.name_to_int)
1163*(.text.mwait_idle)
1164*(.text.mutex_trylock)
1165*(.text.mutex_debug_check_no_locks_held)
1166*(.text.m_stop)
1167*(.text.mpage_end_io_write)
1168*(.text.mpage_alloc)
1169*(.text.move_page_tables)
1170*(.text.mounts_open)
1171*(.text.__memset)
1172*(.text.memcpy_fromiovec)
1173*(.text.make_8259A_irq)
1174*(.text.lookup_user_key_possessed)
1175*(.text.lookup_create)
1176*(.text.locks_insert_lock)
1177*(.text.locks_alloc_lock)
1178*(.text.kthread_should_stop)
1179*(.text.kswapd)
1180*(.text.kobject_uevent)
1181*(.text.kobject_get_path)
1182*(.text.kobject_get)
1183*(.text.klist_children_put)
1184*(.text.__ip_route_output_key)
1185*(.text.ip_flush_pending_frames)
1186*(.text.ip_compute_csum)
1187*(.text.ip_append_data)
1188*(.text.ioc_set_batching)
1189*(.text.invalidate_inode_pages)
1190*(.text.__invalidate_device)
1191*(.text.install_arg_page)
1192*(.text.in_sched_functions)
1193*(.text.inotify_unmount_inodes)
1194*(.text.init_once)
1195*(.text.init_cdrom_command)
1196*(.text.inet_stream_connect)
1197*(.text.inet_sk_rebuild_header)
1198*(.text.inet_csk_addr2sockaddr)
1199*(.text.inet_create)
1200*(.text.ifind)
1201*(.text.ide_setup_dma)
1202*(.text.ide_outsw)
1203*(.text.ide_fixstring)
1204*(.text.ide_dma_setup)
1205*(.text.ide_cdrom_packet)
1206*(.text.ide_cd_put)
1207*(.text.ide_build_sglist)
1208*(.text.i8259A_shutdown)
1209*(.text.hung_up_tty_ioctl)
1210*(.text.hrtimer_nanosleep)
1211*(.text.hrtimer_init)
1212*(.text.hrtimer_cancel)
1213*(.text.hash_futex)
1214*(.text.group_send_sig_info)
1215*(.text.grab_cache_page_nowait)
1216*(.text.get_wchan)
1217*(.text.get_stack)
1218*(.text.get_page_state)
1219*(.text.getnstimeofday)
1220*(.text.get_node)
1221*(.text.get_kprobe)
1222*(.text.generic_unplug_device)
1223*(.text.free_task)
1224*(.text.frag_show)
1225*(.text.find_next_zero_string)
1226*(.text.filp_open)
1227*(.text.fillonedir)
1228*(.text.exit_io_context)
1229*(.text.exit_idle)
1230*(.text.exact_lock)
1231*(.text.eth_header)
1232*(.text.dummy_unregister_security)
1233*(.text.dummy_socket_post_create)
1234*(.text.dummy_socket_listen)
1235*(.text.dummy_quota_on)
1236*(.text.dummy_inode_follow_link)
1237*(.text.dummy_file_receive)
1238*(.text.dummy_file_mprotect)
1239*(.text.dummy_file_lock)
1240*(.text.dummy_file_ioctl)
1241*(.text.dummy_bprm_post_apply_creds)
1242*(.text.do_writepages)
1243*(.text.__down_interruptible)
1244*(.text.do_notify_resume)
1245*(.text.do_acct_process)
1246*(.text.del_timer_sync)
1247*(.text.default_rebuild_header)
1248*(.text.d_callback)
1249*(.text.dcache_readdir)
1250*(.text.ctrl_dumpfamily)
1251*(.text.cpuset_rmdir)
1252*(.text.copy_strings_kernel)
1253*(.text.con_write_room)
1254*(.text.complete_all)
1255*(.text.collect_sigign_sigcatch)
1256*(.text.clear_user)
1257*(.text.check_unthrottle)
1258*(.text.cdrom_release)
1259*(.text.cdrom_newpc_intr)
1260*(.text.cdrom_ioctl)
1261*(.text.cdrom_check_status)
1262*(.text.cdev_put)
1263*(.text.cdev_add)
1264*(.text.cap_ptrace)
1265*(.text.cap_bprm_secureexec)
1266*(.text.cache_alloc_refill)
1267*(.text.bmap)
1268*(.text.blk_run_queue)
1269*(.text.blk_queue_dma_alignment)
1270*(.text.blk_ordered_req_seq)
1271*(.text.blk_backing_dev_unplug)
1272*(.text.__bitmap_subset)
1273*(.text.__bitmap_and)
1274*(.text.bio_unmap_user)
1275*(.text.__bforget)
1276*(.text.bd_forget)
1277*(.text.bad_pipe_w)
1278*(.text.bad_get_user)
1279*(.text.audit_free)
1280*(.text.anon_vma_ctor)
1281*(.text.anon_pipe_buf_map)
1282*(.text.alloc_sock_iocb)
1283*(.text.alloc_fdset)
1284*(.text.aio_kick_handler)
1285*(.text.__add_entropy_words)
1286*(.text.add_disk_randomness)
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 692c737feddb..6df05e6034fa 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -26,6 +26,7 @@
26 */ 26 */
27 27
28 .text 28 .text
29 .section .bootstrap.text
29 .code32 30 .code32
30 .globl startup_32 31 .globl startup_32
31/* %bx: 1 if coming from smp trampoline on secondary cpu */ 32/* %bx: 1 if coming from smp trampoline on secondary cpu */
@@ -192,7 +193,8 @@ startup_64:
192 movq initial_code(%rip),%rax 193 movq initial_code(%rip),%rax
193 jmp *%rax 194 jmp *%rax
194 195
195 /* SMP bootup changes these two */ 196 /* SMP bootup changes these two */
197 .align 8
196 .globl initial_code 198 .globl initial_code
197initial_code: 199initial_code:
198 .quad x86_64_start_kernel 200 .quad x86_64_start_kernel
@@ -213,6 +215,11 @@ ENTRY(early_idt_handler)
213 cmpl $2,early_recursion_flag(%rip) 215 cmpl $2,early_recursion_flag(%rip)
214 jz 1f 216 jz 1f
215 call dump_stack 217 call dump_stack
218#ifdef CONFIG_KALLSYMS
219 leaq early_idt_ripmsg(%rip),%rdi
220 movq 8(%rsp),%rsi # get rip again
221 call __print_symbol
222#endif
2161: hlt 2231: hlt
217 jmp 1b 224 jmp 1b
218early_recursion_flag: 225early_recursion_flag:
@@ -220,6 +227,8 @@ early_recursion_flag:
220 227
221early_idt_msg: 228early_idt_msg:
222 .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n" 229 .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n"
230early_idt_ripmsg:
231 .asciz "RIP %s\n"
223 232
224.code32 233.code32
225ENTRY(no_long_mode) 234ENTRY(no_long_mode)
@@ -230,7 +239,7 @@ ENTRY(no_long_mode)
230.org 0xf00 239.org 0xf00
231 .globl pGDT32 240 .globl pGDT32
232pGDT32: 241pGDT32:
233 .word gdt_end-cpu_gdt_table 242 .word gdt_end-cpu_gdt_table-1
234 .long cpu_gdt_table-__START_KERNEL_map 243 .long cpu_gdt_table-__START_KERNEL_map
235 244
236.org 0xf10 245.org 0xf10
@@ -286,8 +295,6 @@ NEXT_PAGE(level2_kernel_pgt)
286 /* Module mapping starts here */ 295 /* Module mapping starts here */
287 .fill 492,8,0 296 .fill 492,8,0
288 297
289NEXT_PAGE(empty_zero_page)
290
291NEXT_PAGE(level3_physmem_pgt) 298NEXT_PAGE(level3_physmem_pgt)
292 .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ 299 .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */
293 .fill 511,8,0 300 .fill 511,8,0
@@ -330,7 +337,7 @@ ENTRY(boot_level4_pgt)
330 .align 16 337 .align 16
331 .globl cpu_gdt_descr 338 .globl cpu_gdt_descr
332cpu_gdt_descr: 339cpu_gdt_descr:
333 .word gdt_end-cpu_gdt_table 340 .word gdt_end-cpu_gdt_table-1
334gdt: 341gdt:
335 .quad cpu_gdt_table 342 .quad cpu_gdt_table
336#ifdef CONFIG_SMP 343#ifdef CONFIG_SMP
@@ -345,7 +352,8 @@ gdt:
345 * Also sysret mandates a special GDT layout 352 * Also sysret mandates a special GDT layout
346 */ 353 */
347 354
348.align PAGE_SIZE 355 .section .data.page_aligned, "aw"
356 .align PAGE_SIZE
349 357
350/* The TLS descriptors are currently at a different place compared to i386. 358/* The TLS descriptors are currently at a different place compared to i386.
351 Hopefully nobody expects them at a fixed place (Wine?) */ 359 Hopefully nobody expects them at a fixed place (Wine?) */
@@ -371,9 +379,12 @@ gdt_end:
371 /* zero the remaining page */ 379 /* zero the remaining page */
372 .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 380 .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
373 381
374ENTRY(idt_table) 382 .section .bss, "aw", @nobits
375 .rept 256 383 .align L1_CACHE_BYTES
376 .quad 0 384ENTRY(idt_table)
377 .quad 0 385 .skip 256 * 16
378 .endr
379 386
387 .section .bss.page_aligned, "aw", @nobits
388 .align PAGE_SIZE
389ENTRY(empty_zero_page)
390 .skip PAGE_SIZE
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 4282d72b2a26..9cc7031b7151 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -30,6 +30,9 @@
30#include <linux/mc146818rtc.h> 30#include <linux/mc146818rtc.h>
31#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/sysdev.h> 32#include <linux/sysdev.h>
33#ifdef CONFIG_ACPI
34#include <acpi/acpi_bus.h>
35#endif
33 36
34#include <asm/io.h> 37#include <asm/io.h>
35#include <asm/smp.h> 38#include <asm/smp.h>
@@ -47,6 +50,8 @@ static int no_timer_check;
47 50
48int disable_timer_pin_1 __initdata; 51int disable_timer_pin_1 __initdata;
49 52
53int timer_over_8254 __initdata = 0;
54
50/* Where if anywhere is the i8259 connect in external int mode */ 55/* Where if anywhere is the i8259 connect in external int mode */
51static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 56static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
52 57
@@ -248,10 +253,36 @@ static int __init enable_ioapic_setup(char *str)
248__setup("noapic", disable_ioapic_setup); 253__setup("noapic", disable_ioapic_setup);
249__setup("apic", enable_ioapic_setup); 254__setup("apic", enable_ioapic_setup);
250 255
256static int __init setup_disable_8254_timer(char *s)
257{
258 timer_over_8254 = -1;
259 return 1;
260}
261static int __init setup_enable_8254_timer(char *s)
262{
263 timer_over_8254 = 2;
264 return 1;
265}
266
267__setup("disable_8254_timer", setup_disable_8254_timer);
268__setup("enable_8254_timer", setup_enable_8254_timer);
269
251#include <asm/pci-direct.h> 270#include <asm/pci-direct.h>
252#include <linux/pci_ids.h> 271#include <linux/pci_ids.h>
253#include <linux/pci.h> 272#include <linux/pci.h>
254 273
274
275#ifdef CONFIG_ACPI
276
277static int nvidia_hpet_detected __initdata;
278
279static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
280{
281 nvidia_hpet_detected = 1;
282 return 0;
283}
284#endif
285
255/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC 286/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
256 off. Check for an Nvidia or VIA PCI bridge and turn it off. 287 off. Check for an Nvidia or VIA PCI bridge and turn it off.
257 Use pci direct infrastructure because this runs before the PCI subsystem. 288 Use pci direct infrastructure because this runs before the PCI subsystem.
@@ -260,6 +291,8 @@ __setup("apic", enable_ioapic_setup);
260 291
261 And another hack to disable the IOMMU on VIA chipsets. 292 And another hack to disable the IOMMU on VIA chipsets.
262 293
294 ... and others. Really should move this somewhere else.
295
263 Kludge-O-Rama. */ 296 Kludge-O-Rama. */
264void __init check_ioapic(void) 297void __init check_ioapic(void)
265{ 298{
@@ -289,31 +322,43 @@ void __init check_ioapic(void)
289 force_iommu) && 322 force_iommu) &&
290 !iommu_aperture_allowed) { 323 !iommu_aperture_allowed) {
291 printk(KERN_INFO 324 printk(KERN_INFO
292 "Looks like a VIA chipset. Disabling IOMMU. Overwrite with \"iommu=allowed\"\n"); 325 "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
293 iommu_aperture_disabled = 1; 326 iommu_aperture_disabled = 1;
294 } 327 }
295#endif 328#endif
296 return; 329 return;
297 case PCI_VENDOR_ID_NVIDIA: 330 case PCI_VENDOR_ID_NVIDIA:
298#ifdef CONFIG_ACPI 331#ifdef CONFIG_ACPI
299 /* All timer overrides on Nvidia 332 /*
300 seem to be wrong. Skip them. */ 333 * All timer overrides on Nvidia are
301 acpi_skip_timer_override = 1; 334 * wrong unless HPET is enabled.
302 printk(KERN_INFO 335 */
303 "Nvidia board detected. Ignoring ACPI timer override.\n"); 336 nvidia_hpet_detected = 0;
337 acpi_table_parse(ACPI_HPET,
338 nvidia_hpet_check);
339 if (nvidia_hpet_detected == 0) {
340 acpi_skip_timer_override = 1;
341 printk(KERN_INFO "Nvidia board "
342 "detected. Ignoring ACPI "
343 "timer override.\n");
344 }
304#endif 345#endif
305 /* RED-PEN skip them on mptables too? */ 346 /* RED-PEN skip them on mptables too? */
306 return; 347 return;
348
349 /* This should be actually default, but
350 for 2.6.16 let's do it for ATI only where
351 it's really needed. */
307 case PCI_VENDOR_ID_ATI: 352 case PCI_VENDOR_ID_ATI:
308 if (apic_runs_main_timer != 0) 353 if (timer_over_8254 == 1) {
309 break; 354 timer_over_8254 = 0;
310 printk(KERN_INFO 355 printk(KERN_INFO
311 "ATI board detected. Using APIC/PM timer.\n"); 356 "ATI board detected. Disabling timer routing over 8254.\n");
312 apic_runs_main_timer = 1; 357 }
313 nohpet = 1;
314 return; 358 return;
315 } 359 }
316 360
361
317 /* No multi-function device? */ 362 /* No multi-function device? */
318 type = read_pci_config_byte(num,slot,func, 363 type = read_pci_config_byte(num,slot,func,
319 PCI_HEADER_TYPE); 364 PCI_HEADER_TYPE);
@@ -1752,11 +1797,15 @@ static inline void unlock_ExtINT_logic(void)
1752 spin_unlock_irqrestore(&ioapic_lock, flags); 1797 spin_unlock_irqrestore(&ioapic_lock, flags);
1753} 1798}
1754 1799
1800int timer_uses_ioapic_pin_0;
1801
1755/* 1802/*
1756 * This code may look a bit paranoid, but it's supposed to cooperate with 1803 * This code may look a bit paranoid, but it's supposed to cooperate with
1757 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 1804 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
1758 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast 1805 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
1759 * fanatically on his truly buggy board. 1806 * fanatically on his truly buggy board.
1807 *
1808 * FIXME: really need to revamp this for modern platforms only.
1760 */ 1809 */
1761static inline void check_timer(void) 1810static inline void check_timer(void)
1762{ 1811{
@@ -1779,13 +1828,17 @@ static inline void check_timer(void)
1779 */ 1828 */
1780 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 1829 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1781 init_8259A(1); 1830 init_8259A(1);
1782 enable_8259A_irq(0); 1831 if (timer_over_8254 > 0)
1832 enable_8259A_irq(0);
1783 1833
1784 pin1 = find_isa_irq_pin(0, mp_INT); 1834 pin1 = find_isa_irq_pin(0, mp_INT);
1785 apic1 = find_isa_irq_apic(0, mp_INT); 1835 apic1 = find_isa_irq_apic(0, mp_INT);
1786 pin2 = ioapic_i8259.pin; 1836 pin2 = ioapic_i8259.pin;
1787 apic2 = ioapic_i8259.apic; 1837 apic2 = ioapic_i8259.apic;
1788 1838
1839 if (pin1 == 0)
1840 timer_uses_ioapic_pin_0 = 1;
1841
1789 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 1842 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1790 vector, apic1, pin1, apic2, pin2); 1843 vector, apic1, pin1, apic2, pin2);
1791 1844
@@ -1820,7 +1873,7 @@ static inline void check_timer(void)
1820 */ 1873 */
1821 setup_ExtINT_IRQ0_pin(apic2, pin2, vector); 1874 setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
1822 if (timer_irq_works()) { 1875 if (timer_irq_works()) {
1823 printk("works.\n"); 1876 apic_printk(APIC_VERBOSE," works.\n");
1824 nmi_watchdog_default(); 1877 nmi_watchdog_default();
1825 if (nmi_watchdog == NMI_IO_APIC) { 1878 if (nmi_watchdog == NMI_IO_APIC) {
1826 setup_nmi(); 1879 setup_nmi();
@@ -1832,9 +1885,9 @@ static inline void check_timer(void)
1832 */ 1885 */
1833 clear_IO_APIC_pin(apic2, pin2); 1886 clear_IO_APIC_pin(apic2, pin2);
1834 } 1887 }
1835 printk(" failed.\n"); 1888 apic_printk(APIC_VERBOSE," failed.\n");
1836 1889
1837 if (nmi_watchdog) { 1890 if (nmi_watchdog == NMI_IO_APIC) {
1838 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 1891 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
1839 nmi_watchdog = 0; 1892 nmi_watchdog = 0;
1840 } 1893 }
@@ -1847,7 +1900,7 @@ static inline void check_timer(void)
1847 enable_8259A_irq(0); 1900 enable_8259A_irq(0);
1848 1901
1849 if (timer_irq_works()) { 1902 if (timer_irq_works()) {
1850 apic_printk(APIC_QUIET, " works.\n"); 1903 apic_printk(APIC_VERBOSE," works.\n");
1851 return; 1904 return;
1852 } 1905 }
1853 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); 1906 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index 30d2a1e545fe..d8bd0b345b1e 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -38,9 +38,8 @@ int show_interrupts(struct seq_file *p, void *v)
38 38
39 if (i == 0) { 39 if (i == 0) {
40 seq_printf(p, " "); 40 seq_printf(p, " ");
41 for (j=0; j<NR_CPUS; j++) 41 for_each_online_cpu(j)
42 if (cpu_online(j)) 42 seq_printf(p, "CPU%d ",j);
43 seq_printf(p, "CPU%d ",j);
44 seq_putc(p, '\n'); 43 seq_putc(p, '\n');
45 } 44 }
46 45
@@ -53,10 +52,8 @@ int show_interrupts(struct seq_file *p, void *v)
53#ifndef CONFIG_SMP 52#ifndef CONFIG_SMP
54 seq_printf(p, "%10u ", kstat_irqs(i)); 53 seq_printf(p, "%10u ", kstat_irqs(i));
55#else 54#else
56 for (j=0; j<NR_CPUS; j++) 55 for_each_online_cpu(j)
57 if (cpu_online(j)) 56 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
58 seq_printf(p, "%10u ",
59 kstat_cpu(j).irqs[i]);
60#endif 57#endif
61 seq_printf(p, " %14s", irq_desc[i].handler->typename); 58 seq_printf(p, " %14s", irq_desc[i].handler->typename);
62 59
@@ -68,15 +65,13 @@ skip:
68 spin_unlock_irqrestore(&irq_desc[i].lock, flags); 65 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
69 } else if (i == NR_IRQS) { 66 } else if (i == NR_IRQS) {
70 seq_printf(p, "NMI: "); 67 seq_printf(p, "NMI: ");
71 for (j = 0; j < NR_CPUS; j++) 68 for_each_online_cpu(j)
72 if (cpu_online(j)) 69 seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
73 seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
74 seq_putc(p, '\n'); 70 seq_putc(p, '\n');
75#ifdef CONFIG_X86_LOCAL_APIC 71#ifdef CONFIG_X86_LOCAL_APIC
76 seq_printf(p, "LOC: "); 72 seq_printf(p, "LOC: ");
77 for (j = 0; j < NR_CPUS; j++) 73 for_each_online_cpu(j)
78 if (cpu_online(j)) 74 seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
79 seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
80 seq_putc(p, '\n'); 75 seq_putc(p, '\n');
81#endif 76#endif
82 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); 77 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index 8b866a8572cf..fa1d19ca700a 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -37,10 +37,12 @@
37#include <linux/string.h> 37#include <linux/string.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/preempt.h> 39#include <linux/preempt.h>
40#include <linux/module.h>
40 41
41#include <asm/cacheflush.h> 42#include <asm/cacheflush.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
43#include <asm/kdebug.h> 44#include <asm/kdebug.h>
45#include <asm/uaccess.h>
44 46
45void jprobe_return_end(void); 47void jprobe_return_end(void);
46static void __kprobes arch_copy_kprobe(struct kprobe *p); 48static void __kprobes arch_copy_kprobe(struct kprobe *p);
@@ -51,7 +53,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
51/* 53/*
52 * returns non-zero if opcode modifies the interrupt flag. 54 * returns non-zero if opcode modifies the interrupt flag.
53 */ 55 */
54static inline int is_IF_modifier(kprobe_opcode_t *insn) 56static __always_inline int is_IF_modifier(kprobe_opcode_t *insn)
55{ 57{
56 switch (*insn) { 58 switch (*insn) {
57 case 0xfa: /* cli */ 59 case 0xfa: /* cli */
@@ -82,7 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
82 * If it does, return the address of the 32-bit displacement word. 84 * If it does, return the address of the 32-bit displacement word.
83 * If not, return null. 85 * If not, return null.
84 */ 86 */
85static inline s32 *is_riprel(u8 *insn) 87static s32 __kprobes *is_riprel(u8 *insn)
86{ 88{
87#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ 89#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
88 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ 90 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -222,12 +224,12 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
222 224
223void __kprobes arch_remove_kprobe(struct kprobe *p) 225void __kprobes arch_remove_kprobe(struct kprobe *p)
224{ 226{
225 down(&kprobe_mutex); 227 mutex_lock(&kprobe_mutex);
226 free_insn_slot(p->ainsn.insn); 228 free_insn_slot(p->ainsn.insn);
227 up(&kprobe_mutex); 229 mutex_unlock(&kprobe_mutex);
228} 230}
229 231
230static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) 232static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
231{ 233{
232 kcb->prev_kprobe.kp = kprobe_running(); 234 kcb->prev_kprobe.kp = kprobe_running();
233 kcb->prev_kprobe.status = kcb->kprobe_status; 235 kcb->prev_kprobe.status = kcb->kprobe_status;
@@ -235,7 +237,7 @@ static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
235 kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; 237 kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags;
236} 238}
237 239
238static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) 240static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
239{ 241{
240 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 242 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
241 kcb->kprobe_status = kcb->prev_kprobe.status; 243 kcb->kprobe_status = kcb->prev_kprobe.status;
@@ -243,7 +245,7 @@ static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
243 kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; 245 kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags;
244} 246}
245 247
246static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 248static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
247 struct kprobe_ctlblk *kcb) 249 struct kprobe_ctlblk *kcb)
248{ 250{
249 __get_cpu_var(current_kprobe) = p; 251 __get_cpu_var(current_kprobe) = p;
@@ -512,13 +514,13 @@ static void __kprobes resume_execution(struct kprobe *p,
512 *tos = orig_rip + (*tos - copy_rip); 514 *tos = orig_rip + (*tos - copy_rip);
513 break; 515 break;
514 case 0xff: 516 case 0xff:
515 if ((*insn & 0x30) == 0x10) { 517 if ((insn[1] & 0x30) == 0x10) {
516 /* call absolute, indirect */ 518 /* call absolute, indirect */
517 /* Fix return addr; rip is correct. */ 519 /* Fix return addr; rip is correct. */
518 next_rip = regs->rip; 520 next_rip = regs->rip;
519 *tos = orig_rip + (*tos - copy_rip); 521 *tos = orig_rip + (*tos - copy_rip);
520 } else if (((*insn & 0x31) == 0x20) || /* jmp near, absolute indirect */ 522 } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
521 ((*insn & 0x31) == 0x21)) { /* jmp far, absolute indirect */ 523 ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */
522 /* rip is correct. */ 524 /* rip is correct. */
523 next_rip = regs->rip; 525 next_rip = regs->rip;
524 } 526 }
@@ -578,16 +580,62 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
578{ 580{
579 struct kprobe *cur = kprobe_running(); 581 struct kprobe *cur = kprobe_running();
580 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 582 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
583 const struct exception_table_entry *fixup;
581 584
582 if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) 585 switch(kcb->kprobe_status) {
583 return 1; 586 case KPROBE_HIT_SS:
584 587 case KPROBE_REENTER:
585 if (kcb->kprobe_status & KPROBE_HIT_SS) { 588 /*
586 resume_execution(cur, regs, kcb); 589 * We are here because the instruction being single
590 * stepped caused a page fault. We reset the current
591 * kprobe and the rip points back to the probe address
592 * and allow the page fault handler to continue as a
593 * normal page fault.
594 */
595 regs->rip = (unsigned long)cur->addr;
587 regs->eflags |= kcb->kprobe_old_rflags; 596 regs->eflags |= kcb->kprobe_old_rflags;
588 597 if (kcb->kprobe_status == KPROBE_REENTER)
589 reset_current_kprobe(); 598 restore_previous_kprobe(kcb);
599 else
600 reset_current_kprobe();
590 preempt_enable_no_resched(); 601 preempt_enable_no_resched();
602 break;
603 case KPROBE_HIT_ACTIVE:
604 case KPROBE_HIT_SSDONE:
605 /*
606 * We increment the nmissed count for accounting,
607 * we can also use npre/npostfault count for accouting
608 * these specific fault cases.
609 */
610 kprobes_inc_nmissed_count(cur);
611
612 /*
613 * We come here because instructions in the pre/post
614 * handler caused the page_fault, this could happen
615 * if handler tries to access user space by
616 * copy_from_user(), get_user() etc. Let the
617 * user-specified handler try to fix it first.
618 */
619 if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
620 return 1;
621
622 /*
623 * In case the user-specified fault handler returned
624 * zero, try to fix up.
625 */
626 fixup = search_exception_tables(regs->rip);
627 if (fixup) {
628 regs->rip = fixup->fixup;
629 return 1;
630 }
631
632 /*
633 * fixup() could not handle it,
634 * Let do_page_fault() fix it.
635 */
636 break;
637 default:
638 break;
591 } 639 }
592 return 0; 640 return 0;
593} 641}
@@ -601,6 +649,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
601 struct die_args *args = (struct die_args *)data; 649 struct die_args *args = (struct die_args *)data;
602 int ret = NOTIFY_DONE; 650 int ret = NOTIFY_DONE;
603 651
652 if (args->regs && user_mode(args->regs))
653 return ret;
654
604 switch (val) { 655 switch (val) {
605 case DIE_INT3: 656 case DIE_INT3:
606 if (kprobe_handler(args->regs)) 657 if (kprobe_handler(args->regs))
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 89fab51e20f4..25ac8a3faae6 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -140,7 +140,7 @@ static void load_segments(void)
140 "\tmovl %0,%%ss\n" 140 "\tmovl %0,%%ss\n"
141 "\tmovl %0,%%fs\n" 141 "\tmovl %0,%%fs\n"
142 "\tmovl %0,%%gs\n" 142 "\tmovl %0,%%gs\n"
143 : : "a" (__KERNEL_DS) 143 : : "a" (__KERNEL_DS) : "memory"
144 ); 144 );
145} 145}
146 146
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index b8b9529fa89e..c69fc43cee7b 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -29,6 +29,8 @@
29#define MISC_MCELOG_MINOR 227 29#define MISC_MCELOG_MINOR 227
30#define NR_BANKS 6 30#define NR_BANKS 6
31 31
32atomic_t mce_entry;
33
32static int mce_dont_init; 34static int mce_dont_init;
33 35
34/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, 36/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
@@ -139,8 +141,7 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
139 141
140static int mce_available(struct cpuinfo_x86 *c) 142static int mce_available(struct cpuinfo_x86 *c)
141{ 143{
142 return test_bit(X86_FEATURE_MCE, &c->x86_capability) && 144 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
143 test_bit(X86_FEATURE_MCA, &c->x86_capability);
144} 145}
145 146
146static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) 147static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
@@ -173,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
173 int i; 174 int i;
174 int panicm_found = 0; 175 int panicm_found = 0;
175 176
177 atomic_inc(&mce_entry);
178
176 if (regs) 179 if (regs)
177 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); 180 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
178 if (!banks) 181 if (!banks)
179 return; 182 goto out2;
180 183
181 memset(&m, 0, sizeof(struct mce)); 184 memset(&m, 0, sizeof(struct mce));
182 m.cpu = safe_smp_processor_id(); 185 m.cpu = safe_smp_processor_id();
@@ -267,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
267 out: 270 out:
268 /* Last thing done in the machine check exception to clear state. */ 271 /* Last thing done in the machine check exception to clear state. */
269 wrmsrl(MSR_IA32_MCG_STATUS, 0); 272 wrmsrl(MSR_IA32_MCG_STATUS, 0);
273 out2:
274 atomic_dec(&mce_entry);
270} 275}
271 276
272/* 277/*
@@ -502,7 +507,7 @@ static struct miscdevice mce_log_device = {
502static int __init mcheck_disable(char *str) 507static int __init mcheck_disable(char *str)
503{ 508{
504 mce_dont_init = 1; 509 mce_dont_init = 1;
505 return 0; 510 return 1;
506} 511}
507 512
508/* mce=off disables machine check. Note you can reenable it later 513/* mce=off disables machine check. Note you can reenable it later
@@ -522,7 +527,7 @@ static int __init mcheck_enable(char *str)
522 get_option(&str, &tolerant); 527 get_option(&str, &tolerant);
523 else 528 else
524 printk("mce= argument %s ignored. Please use /sys", str); 529 printk("mce= argument %s ignored. Please use /sys", str);
525 return 0; 530 return 1;
526} 531}
527 532
528__setup("nomce", mcheck_disable); 533__setup("nomce", mcheck_disable);
@@ -624,7 +629,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
624#endif 629#endif
625 630
626/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 631/* Get notified when a cpu comes on/off. Be hotplug friendly. */
627static __cpuinit int 632static int
628mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 633mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
629{ 634{
630 unsigned int cpu = (unsigned long)hcpu; 635 unsigned int cpu = (unsigned long)hcpu;
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index d3ad7d81266d..d13b241ad094 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -482,7 +482,7 @@ static void threshold_remove_device(unsigned int cpu)
482#endif 482#endif
483 483
484/* get notified when a cpu comes on/off */ 484/* get notified when a cpu comes on/off */
485static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, 485static int threshold_cpu_callback(struct notifier_block *nfb,
486 unsigned long action, void *hcpu) 486 unsigned long action, void *hcpu)
487{ 487{
488 /* cpu was unsigned int to begin with */ 488 /* cpu was unsigned int to begin with */
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index dc49bfb6db0a..083da7e606b1 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -106,11 +106,11 @@ static int __init mpf_checksum(unsigned char *mp, int len)
106 return sum & 0xFF; 106 return sum & 0xFF;
107} 107}
108 108
109static void __init MP_processor_info (struct mpc_config_processor *m) 109static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
110{ 110{
111 int cpu; 111 int cpu;
112 unsigned char ver; 112 unsigned char ver;
113 static int found_bsp=0; 113 cpumask_t tmp_map;
114 114
115 if (!(m->mpc_cpuflag & CPU_ENABLED)) { 115 if (!(m->mpc_cpuflag & CPU_ENABLED)) {
116 disabled_cpus++; 116 disabled_cpus++;
@@ -133,8 +133,10 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
133 return; 133 return;
134 } 134 }
135 135
136 cpu = num_processors++; 136 num_processors++;
137 137 cpus_complement(tmp_map, cpu_present_map);
138 cpu = first_cpu(tmp_map);
139
138#if MAX_APICS < 255 140#if MAX_APICS < 255
139 if ((int)m->mpc_apicid > MAX_APICS) { 141 if ((int)m->mpc_apicid > MAX_APICS) {
140 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", 142 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
@@ -160,12 +162,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
160 * entry is BSP, and so on. 162 * entry is BSP, and so on.
161 */ 163 */
162 cpu = 0; 164 cpu = 0;
163 165 }
164 bios_cpu_apicid[0] = m->mpc_apicid;
165 x86_cpu_to_apicid[0] = m->mpc_apicid;
166 found_bsp = 1;
167 } else
168 cpu = num_processors - found_bsp;
169 bios_cpu_apicid[cpu] = m->mpc_apicid; 166 bios_cpu_apicid[cpu] = m->mpc_apicid;
170 x86_cpu_to_apicid[cpu] = m->mpc_apicid; 167 x86_cpu_to_apicid[cpu] = m->mpc_apicid;
171 168
@@ -288,9 +285,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
288 285
289 memcpy(str,mpc->mpc_productid,12); 286 memcpy(str,mpc->mpc_productid,12);
290 str[12]=0; 287 str[12]=0;
291 printk(KERN_INFO "Product ID: %s ",str); 288 printk("Product ID: %s ",str);
292 289
293 printk(KERN_INFO "APIC at: 0x%X\n",mpc->mpc_lapic); 290 printk("APIC at: 0x%X\n",mpc->mpc_lapic);
294 291
295 /* save the local APIC address, it might be non-default */ 292 /* save the local APIC address, it might be non-default */
296 if (!acpi_lapic) 293 if (!acpi_lapic)
@@ -691,7 +688,7 @@ void __init mp_register_lapic_address (
691} 688}
692 689
693 690
694void __init mp_register_lapic ( 691void __cpuinit mp_register_lapic (
695 u8 id, 692 u8 id,
696 u8 enabled) 693 u8 enabled)
697{ 694{
@@ -971,7 +968,17 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
971 */ 968 */
972 int irq = gsi; 969 int irq = gsi;
973 if (gsi < MAX_GSI_NUM) { 970 if (gsi < MAX_GSI_NUM) {
974 if (gsi > 15) 971 /*
972 * Retain the VIA chipset work-around (gsi > 15), but
973 * avoid a problem where the 8254 timer (IRQ0) is setup
974 * via an override (so it's not on pin 0 of the ioapic),
975 * and at the same time, the pin 0 interrupt is a PCI
976 * type. The gsi > 15 test could cause these two pins
977 * to be shared as IRQ0, and they are not shareable.
978 * So test for this condition, and if necessary, avoid
979 * the pin collision.
980 */
981 if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
975 gsi = pci_irq++; 982 gsi = pci_irq++;
976 /* 983 /*
977 * Don't assign IRQ used by ACPI SCI 984 * Don't assign IRQ used by ACPI SCI
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 8be407a1f62d..4e6357fe0ec3 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -34,6 +34,7 @@
34#include <asm/proto.h> 34#include <asm/proto.h>
35#include <asm/kdebug.h> 35#include <asm/kdebug.h>
36#include <asm/local.h> 36#include <asm/local.h>
37#include <asm/mce.h>
37 38
38/* 39/*
39 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 40 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -162,9 +163,7 @@ int __init check_nmi_watchdog (void)
162 local_irq_enable(); 163 local_irq_enable();
163 mdelay((10*1000)/nmi_hz); // wait 10 ticks 164 mdelay((10*1000)/nmi_hz); // wait 10 ticks
164 165
165 for (cpu = 0; cpu < NR_CPUS; cpu++) { 166 for_each_online_cpu(cpu) {
166 if (!cpu_online(cpu))
167 continue;
168 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { 167 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
169 endflag = 1; 168 endflag = 1;
170 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 169 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
@@ -236,6 +235,7 @@ static void enable_lapic_nmi_watchdog(void)
236{ 235{
237 if (nmi_active < 0) { 236 if (nmi_active < 0) {
238 nmi_watchdog = NMI_LOCAL_APIC; 237 nmi_watchdog = NMI_LOCAL_APIC;
238 touch_nmi_watchdog();
239 setup_apic_nmi_watchdog(); 239 setup_apic_nmi_watchdog();
240 } 240 }
241} 241}
@@ -456,15 +456,17 @@ static DEFINE_PER_CPU(int, nmi_touch);
456 456
457void touch_nmi_watchdog (void) 457void touch_nmi_watchdog (void)
458{ 458{
459 int i; 459 if (nmi_watchdog > 0) {
460 unsigned cpu;
460 461
461 /* 462 /*
462 * Tell other CPUs to reset their alert counters. We cannot 463 * Tell other CPUs to reset their alert counters. We cannot
463 * do it ourselves because the alert count increase is not 464 * do it ourselves because the alert count increase is not
464 * atomic. 465 * atomic.
465 */ 466 */
466 for (i = 0; i < NR_CPUS; i++) 467 for_each_present_cpu (cpu)
467 per_cpu(nmi_touch, i) = 1; 468 per_cpu(nmi_touch, cpu) = 1;
469 }
468 470
469 touch_softlockup_watchdog(); 471 touch_softlockup_watchdog();
470} 472}
@@ -479,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
479 __get_cpu_var(nmi_touch) = 0; 481 __get_cpu_var(nmi_touch) = 0;
480 touched = 1; 482 touched = 1;
481 } 483 }
484#ifdef CONFIG_X86_MCE
485 /* Could check oops_in_progress here too, but it's safer
486 not too */
487 if (atomic_read(&mce_entry) > 0)
488 touched = 1;
489#endif
482 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 490 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
483 /* 491 /*
484 * Ayiee, looks like this CPU is stuck ... 492 * Ayiee, looks like this CPU is stuck ...
@@ -533,6 +541,7 @@ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
533 541
534void set_nmi_callback(nmi_callback_t callback) 542void set_nmi_callback(nmi_callback_t callback)
535{ 543{
544 vmalloc_sync_all();
536 rcu_assign_pointer(nmi_callback, callback); 545 rcu_assign_pointer(nmi_callback, callback);
537} 546}
538 547
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index 4ed391edd47a..a9275c9557cf 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -48,10 +48,16 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
48{ 48{
49 struct page *page; 49 struct page *page;
50 int node; 50 int node;
51#ifdef CONFIG_PCI
51 if (dev->bus == &pci_bus_type) 52 if (dev->bus == &pci_bus_type)
52 node = pcibus_to_node(to_pci_dev(dev)->bus); 53 node = pcibus_to_node(to_pci_dev(dev)->bus);
53 else 54 else
55#endif
54 node = numa_node_id(); 56 node = numa_node_id();
57
58 if (node < first_node(node_online_map))
59 node = first_node(node_online_map);
60
55 page = alloc_pages_node(node, gfp, order); 61 page = alloc_pages_node(node, gfp, order);
56 return page ? page_address(page) : NULL; 62 return page ? page_address(page) : NULL;
57} 63}
@@ -73,6 +79,9 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
73 if (dma_mask == 0) 79 if (dma_mask == 0)
74 dma_mask = 0xffffffff; 80 dma_mask = 0xffffffff;
75 81
82 /* Don't invoke OOM killer */
83 gfp |= __GFP_NORETRY;
84
76 /* Kludge to make it bug-to-bug compatible with i386. i386 85 /* Kludge to make it bug-to-bug compatible with i386. i386
77 uses the normal dma_mask for alloc_coherent. */ 86 uses the normal dma_mask for alloc_coherent. */
78 dma_mask &= *dev->dma_mask; 87 dma_mask &= *dev->dma_mask;
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 2fe23a6c361b..82a7c9bfdfa0 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -65,9 +65,7 @@ static u32 gart_unmapped_entry;
65 65
66#define for_all_nb(dev) \ 66#define for_all_nb(dev) \
67 dev = NULL; \ 67 dev = NULL; \
68 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\ 68 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
69 if (dev->bus->number == 0 && \
70 (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
71 69
72static struct pci_dev *northbridges[MAX_NB]; 70static struct pci_dev *northbridges[MAX_NB];
73static u32 northbridge_flush_word[MAX_NB]; 71static u32 northbridge_flush_word[MAX_NB];
@@ -114,10 +112,6 @@ static unsigned long alloc_iommu(int size)
114static void free_iommu(unsigned long offset, int size) 112static void free_iommu(unsigned long offset, int size)
115{ 113{
116 unsigned long flags; 114 unsigned long flags;
117 if (size == 1) {
118 clear_bit(offset, iommu_gart_bitmap);
119 return;
120 }
121 spin_lock_irqsave(&iommu_bitmap_lock, flags); 115 spin_lock_irqsave(&iommu_bitmap_lock, flags);
122 __clear_bit_string(iommu_gart_bitmap, offset, size); 116 __clear_bit_string(iommu_gart_bitmap, offset, size);
123 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 117 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -148,9 +142,12 @@ static void flush_gart(struct device *dev)
148 if (!northbridges[i]) 142 if (!northbridges[i])
149 continue; 143 continue;
150 /* Make sure the hardware actually executed the flush. */ 144 /* Make sure the hardware actually executed the flush. */
151 do { 145 for (;;) {
152 pci_read_config_dword(northbridges[i], 0x9c, &w); 146 pci_read_config_dword(northbridges[i], 0x9c, &w);
153 } while (w & 1); 147 if (!(w & 1))
148 break;
149 cpu_relax();
150 }
154 } 151 }
155 if (!flushed) 152 if (!flushed)
156 printk("nothing to flush?\n"); 153 printk("nothing to flush?\n");
@@ -228,11 +225,6 @@ static inline int need_iommu(struct device *dev, unsigned long addr, size_t size
228 int mmu = high; 225 int mmu = high;
229 if (force_iommu) 226 if (force_iommu)
230 mmu = 1; 227 mmu = 1;
231 if (no_iommu) {
232 if (high)
233 panic("PCI-DMA: high address but no IOMMU.\n");
234 mmu = 0;
235 }
236 return mmu; 228 return mmu;
237} 229}
238 230
@@ -241,11 +233,6 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t
241 u64 mask = *dev->dma_mask; 233 u64 mask = *dev->dma_mask;
242 int high = addr + size >= mask; 234 int high = addr + size >= mask;
243 int mmu = high; 235 int mmu = high;
244 if (no_iommu) {
245 if (high)
246 panic("PCI-DMA: high address but no IOMMU.\n");
247 mmu = 0;
248 }
249 return mmu; 236 return mmu;
250} 237}
251 238
@@ -379,7 +366,7 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
379 SET_LEAK(iommu_page); 366 SET_LEAK(iommu_page);
380 addr += PAGE_SIZE; 367 addr += PAGE_SIZE;
381 iommu_page++; 368 iommu_page++;
382 } 369 }
383 } 370 }
384 BUG_ON(iommu_page - iommu_start != pages); 371 BUG_ON(iommu_page - iommu_start != pages);
385 return 0; 372 return 0;
@@ -634,28 +621,30 @@ static int __init pci_iommu_init(void)
634 (agp_copy_info(agp_bridge, &info) < 0); 621 (agp_copy_info(agp_bridge, &info) < 0);
635#endif 622#endif
636 623
637 if (swiotlb) { 624 if (swiotlb)
638 no_iommu = 1;
639 return -1; 625 return -1;
640 } 626
641
642 if (no_iommu || 627 if (no_iommu ||
643 (!force_iommu && end_pfn <= MAX_DMA32_PFN) || 628 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
644 !iommu_aperture || 629 !iommu_aperture ||
645 (no_agp && init_k8_gatt(&info) < 0)) { 630 (no_agp && init_k8_gatt(&info) < 0)) {
646 no_iommu = 1;
647 no_iommu_init();
648 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); 631 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
649 if (end_pfn > MAX_DMA32_PFN) { 632 if (end_pfn > MAX_DMA32_PFN) {
650 printk(KERN_ERR "WARNING more than 4GB of memory " 633 printk(KERN_ERR "WARNING more than 4GB of memory "
651 "but IOMMU not compiled in.\n" 634 "but IOMMU not available.\n"
652 KERN_ERR "WARNING 32bit PCI may malfunction.\n" 635 KERN_ERR "WARNING 32bit PCI may malfunction.\n");
653 KERN_ERR "You might want to enable "
654 "CONFIG_GART_IOMMU\n");
655 } 636 }
656 return -1; 637 return -1;
657 } 638 }
658 639
640 i = 0;
641 for_all_nb(dev)
642 i++;
643 if (i > MAX_NB) {
644 printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
645 return -1;
646 }
647
659 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); 648 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
660 aper_size = info.aper_size * 1024 * 1024; 649 aper_size = info.aper_size * 1024 * 1024;
661 iommu_size = check_iommu_size(info.aper_base, aper_size); 650 iommu_size = check_iommu_size(info.aper_base, aper_size);
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 44adcc2d5e5b..1f6ecc62061d 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -12,9 +12,10 @@ static int
12check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) 12check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
13{ 13{
14 if (hwdev && bus + size > *hwdev->dma_mask) { 14 if (hwdev && bus + size > *hwdev->dma_mask) {
15 printk(KERN_ERR 15 if (*hwdev->dma_mask >= 0xffffffffULL)
16 "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", 16 printk(KERN_ERR
17 name, (long long)bus, size, (long long)*hwdev->dma_mask); 17 "nommu_%s: overflow %Lx+%lu of device mask %Lx\n",
18 name, (long long)bus, size, (long long)*hwdev->dma_mask);
18 return 0; 19 return 0;
19 } 20 }
20 return 1; 21 return 1;
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
index 5c51d10408a6..bf421ed26808 100644
--- a/arch/x86_64/kernel/pmtimer.c
+++ b/arch/x86_64/kernel/pmtimer.c
@@ -68,7 +68,7 @@ int pmtimer_mark_offset(void)
68 offset_delay = delta % (USEC_PER_SEC / HZ); 68 offset_delay = delta % (USEC_PER_SEC / HZ);
69 69
70 rdtscll(tsc); 70 rdtscll(tsc);
71 vxtime.last_tsc = tsc - offset_delay * cpu_khz; 71 vxtime.last_tsc = tsc - offset_delay * (u64)cpu_khz / 1000;
72 72
73 /* don't calculate delay for first run, 73 /* don't calculate delay for first run,
74 or if we've got less then a tick */ 74 or if we've got less then a tick */
@@ -86,7 +86,7 @@ static unsigned pmtimer_wait_tick(void)
86 for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK; 86 for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK;
87 a == b; 87 a == b;
88 b = inl(pmtmr_ioport) & ACPI_PM_MASK) 88 b = inl(pmtmr_ioport) & ACPI_PM_MASK)
89 ; 89 cpu_relax();
90 return b; 90 return b;
91} 91}
92 92
@@ -97,6 +97,7 @@ void pmtimer_wait(unsigned us)
97 a = pmtimer_wait_tick(); 97 a = pmtimer_wait_tick();
98 do { 98 do {
99 b = inl(pmtmr_ioport); 99 b = inl(pmtmr_ioport);
100 cpu_relax();
100 } while (cyc2us(b - a) < us); 101 } while (cyc2us(b - a) < us);
101} 102}
102 103
@@ -120,7 +121,7 @@ unsigned int do_gettimeoffset_pm(void)
120static int __init nopmtimer_setup(char *s) 121static int __init nopmtimer_setup(char *s)
121{ 122{
122 pmtmr_ioport = 0; 123 pmtmr_ioport = 0;
123 return 0; 124 return 1;
124} 125}
125 126
126__setup("nopmtimer", nopmtimer_setup); 127__setup("nopmtimer", nopmtimer_setup);
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 22a05dec81a2..fb903e65e079 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -35,8 +35,8 @@
35#include <linux/ptrace.h> 35#include <linux/ptrace.h>
36#include <linux/utsname.h> 36#include <linux/utsname.h>
37#include <linux/random.h> 37#include <linux/random.h>
38#include <linux/kprobes.h>
39#include <linux/notifier.h> 38#include <linux/notifier.h>
39#include <linux/kprobes.h>
40 40
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42#include <asm/pgtable.h> 42#include <asm/pgtable.h>
@@ -66,24 +66,17 @@ EXPORT_SYMBOL(boot_option_idle_override);
66void (*pm_idle)(void); 66void (*pm_idle)(void);
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68 68
69static struct notifier_block *idle_notifier; 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
70static DEFINE_SPINLOCK(idle_notifier_lock);
71 70
72void idle_notifier_register(struct notifier_block *n) 71void idle_notifier_register(struct notifier_block *n)
73{ 72{
74 unsigned long flags; 73 atomic_notifier_chain_register(&idle_notifier, n);
75 spin_lock_irqsave(&idle_notifier_lock, flags);
76 notifier_chain_register(&idle_notifier, n);
77 spin_unlock_irqrestore(&idle_notifier_lock, flags);
78} 74}
79EXPORT_SYMBOL_GPL(idle_notifier_register); 75EXPORT_SYMBOL_GPL(idle_notifier_register);
80 76
81void idle_notifier_unregister(struct notifier_block *n) 77void idle_notifier_unregister(struct notifier_block *n)
82{ 78{
83 unsigned long flags; 79 atomic_notifier_chain_unregister(&idle_notifier, n);
84 spin_lock_irqsave(&idle_notifier_lock, flags);
85 notifier_chain_unregister(&idle_notifier, n);
86 spin_unlock_irqrestore(&idle_notifier_lock, flags);
87} 80}
88EXPORT_SYMBOL(idle_notifier_unregister); 81EXPORT_SYMBOL(idle_notifier_unregister);
89 82
@@ -93,13 +86,13 @@ static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
93void enter_idle(void) 86void enter_idle(void)
94{ 87{
95 __get_cpu_var(idle_state) = CPU_IDLE; 88 __get_cpu_var(idle_state) = CPU_IDLE;
96 notifier_call_chain(&idle_notifier, IDLE_START, NULL); 89 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
97} 90}
98 91
99static void __exit_idle(void) 92static void __exit_idle(void)
100{ 93{
101 __get_cpu_var(idle_state) = CPU_NOT_IDLE; 94 __get_cpu_var(idle_state) = CPU_NOT_IDLE;
102 notifier_call_chain(&idle_notifier, IDLE_END, NULL); 95 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
103} 96}
104 97
105/* Called from interrupts to signify idle end */ 98/* Called from interrupts to signify idle end */
@@ -114,7 +107,7 @@ void exit_idle(void)
114 * We use this if we don't have any better 107 * We use this if we don't have any better
115 * idle routine.. 108 * idle routine..
116 */ 109 */
117void default_idle(void) 110static void default_idle(void)
118{ 111{
119 local_irq_enable(); 112 local_irq_enable();
120 113
@@ -353,13 +346,6 @@ void exit_thread(void)
353 struct task_struct *me = current; 346 struct task_struct *me = current;
354 struct thread_struct *t = &me->thread; 347 struct thread_struct *t = &me->thread;
355 348
356 /*
357 * Remove function-return probe instances associated with this task
358 * and put them back on the free list. Do not insert an exit probe for
359 * this function, it will be disabled by kprobe_flush_task if you do.
360 */
361 kprobe_flush_task(me);
362
363 if (me->thread.io_bitmap_ptr) { 349 if (me->thread.io_bitmap_ptr) {
364 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 350 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
365 351
@@ -508,7 +494,7 @@ out:
508/* 494/*
509 * This special macro can be used to load a debugging register 495 * This special macro can be used to load a debugging register
510 */ 496 */
511#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r) 497#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
512 498
513/* 499/*
514 * switch_to(x,y) should switch tasks from x to y. 500 * switch_to(x,y) should switch tasks from x to y.
@@ -527,8 +513,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
527 int cpu = smp_processor_id(); 513 int cpu = smp_processor_id();
528 struct tss_struct *tss = &per_cpu(init_tss, cpu); 514 struct tss_struct *tss = &per_cpu(init_tss, cpu);
529 515
530 unlazy_fpu(prev_p);
531
532 /* 516 /*
533 * Reload esp0, LDT and the page table pointer: 517 * Reload esp0, LDT and the page table pointer:
534 */ 518 */
@@ -586,11 +570,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
586 } 570 }
587 571
588 /* 572 /*
589 * Switch the PDA context. 573 * Switch the PDA and FPU contexts.
590 */ 574 */
591 prev->userrsp = read_pda(oldrsp); 575 prev->userrsp = read_pda(oldrsp);
592 write_pda(oldrsp, next->userrsp); 576 write_pda(oldrsp, next->userrsp);
593 write_pda(pcurrent, next_p); 577 write_pda(pcurrent, next_p);
578
579 /* This must be here to ensure both math_state_restore() and
580 kernel_fpu_begin() work consistently.
581 And the AMD workaround requires it to be after DS reload. */
582 unlazy_fpu(prev_p);
594 write_pda(kernelstack, 583 write_pda(kernelstack,
595 task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); 584 task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
596 585
@@ -794,10 +783,16 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
794 } 783 }
795 case ARCH_GET_GS: { 784 case ARCH_GET_GS: {
796 unsigned long base; 785 unsigned long base;
786 unsigned gsindex;
797 if (task->thread.gsindex == GS_TLS_SEL) 787 if (task->thread.gsindex == GS_TLS_SEL)
798 base = read_32bit_tls(task, GS_TLS); 788 base = read_32bit_tls(task, GS_TLS);
799 else if (doit) 789 else if (doit) {
800 rdmsrl(MSR_KERNEL_GS_BASE, base); 790 asm("movl %%gs,%0" : "=r" (gsindex));
791 if (gsindex)
792 rdmsrl(MSR_KERNEL_GS_BASE, base);
793 else
794 base = task->thread.gs;
795 }
801 else 796 else
802 base = task->thread.gs; 797 base = task->thread.gs;
803 ret = put_user(base, (unsigned long __user *)addr); 798 ret = put_user(base, (unsigned long __user *)addr);
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index 53205622351c..2d50024c9f30 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -274,11 +274,6 @@ static int putreg(struct task_struct *child,
274 return -EIO; 274 return -EIO;
275 value &= 0xffff; 275 value &= 0xffff;
276 break; 276 break;
277 case offsetof(struct user_regs_struct, rip):
278 /* Check if the new RIP address is canonical */
279 if (value >= TASK_SIZE_OF(child))
280 return -EIO;
281 break;
282 } 277 }
283 put_stack_long(child, regno - sizeof(struct pt_regs), value); 278 put_stack_long(child, regno - sizeof(struct pt_regs), value);
284 return 0; 279 return 0;
@@ -420,9 +415,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
420 case offsetof(struct user, u_debugreg[7]): 415 case offsetof(struct user, u_debugreg[7]):
421 /* See arch/i386/kernel/ptrace.c for an explanation of 416 /* See arch/i386/kernel/ptrace.c for an explanation of
422 * this awkward check.*/ 417 * this awkward check.*/
423 data &= ~DR_CONTROL_RESERVED; 418 data &= ~DR_CONTROL_RESERVED;
424 for(i=0; i<4; i++) 419 for(i=0; i<4; i++)
425 if ((0x5454 >> ((data >> (16 + 4*i)) & 0xf)) & 1) 420 if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
426 break; 421 break;
427 if (i == 4) { 422 if (i == 4) {
428 child->thread.debugreg7 = data; 423 child->thread.debugreg7 = data;
@@ -605,12 +600,12 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
605 600
606 if (unlikely(current->audit_context)) { 601 if (unlikely(current->audit_context)) {
607 if (test_thread_flag(TIF_IA32)) { 602 if (test_thread_flag(TIF_IA32)) {
608 audit_syscall_entry(current, AUDIT_ARCH_I386, 603 audit_syscall_entry(AUDIT_ARCH_I386,
609 regs->orig_rax, 604 regs->orig_rax,
610 regs->rbx, regs->rcx, 605 regs->rbx, regs->rcx,
611 regs->rdx, regs->rsi); 606 regs->rdx, regs->rsi);
612 } else { 607 } else {
613 audit_syscall_entry(current, AUDIT_ARCH_X86_64, 608 audit_syscall_entry(AUDIT_ARCH_X86_64,
614 regs->orig_rax, 609 regs->orig_rax,
615 regs->rdi, regs->rsi, 610 regs->rdi, regs->rsi,
616 regs->rdx, regs->r10); 611 regs->rdx, regs->r10);
@@ -621,7 +616,7 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
621asmlinkage void syscall_trace_leave(struct pt_regs *regs) 616asmlinkage void syscall_trace_leave(struct pt_regs *regs)
622{ 617{
623 if (unlikely(current->audit_context)) 618 if (unlikely(current->audit_context))
624 audit_syscall_exit(current, AUDITSC_RESULT(regs->rax), regs->rax); 619 audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax);
625 620
626 if ((test_thread_flag(TIF_SYSCALL_TRACE) 621 if ((test_thread_flag(TIF_SYSCALL_TRACE)
627 || test_thread_flag(TIF_SINGLESTEP)) 622 || test_thread_flag(TIF_SINGLESTEP))
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 9435ab7d6fb8..655b9192eeb3 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -46,6 +46,7 @@
46#include <linux/cpufreq.h> 46#include <linux/cpufreq.h>
47#include <linux/dmi.h> 47#include <linux/dmi.h>
48#include <linux/dma-mapping.h> 48#include <linux/dma-mapping.h>
49#include <linux/ctype.h>
49 50
50#include <asm/mtrr.h> 51#include <asm/mtrr.h>
51#include <asm/uaccess.h> 52#include <asm/uaccess.h>
@@ -67,6 +68,7 @@
67#include <asm/swiotlb.h> 68#include <asm/swiotlb.h>
68#include <asm/sections.h> 69#include <asm/sections.h>
69#include <asm/gart-mapping.h> 70#include <asm/gart-mapping.h>
71#include <asm/dmi.h>
70 72
71/* 73/*
72 * Machine setup.. 74 * Machine setup..
@@ -91,6 +93,12 @@ int bootloader_type;
91 93
92unsigned long saved_video_mode; 94unsigned long saved_video_mode;
93 95
96/*
97 * Early DMI memory
98 */
99int dmi_alloc_index;
100char dmi_alloc_data[DMI_MAX_DATA];
101
94/* 102/*
95 * Setup options 103 * Setup options
96 */ 104 */
@@ -270,6 +278,13 @@ static void __init probe_roms(void)
270 } 278 }
271} 279}
272 280
281/* Check for full argument with no trailing characters */
282static int fullarg(char *p, char *arg)
283{
284 int l = strlen(arg);
285 return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
286}
287
273static __init void parse_cmdline_early (char ** cmdline_p) 288static __init void parse_cmdline_early (char ** cmdline_p)
274{ 289{
275 char c = ' ', *to = command_line, *from = COMMAND_LINE; 290 char c = ' ', *to = command_line, *from = COMMAND_LINE;
@@ -293,10 +308,10 @@ static __init void parse_cmdline_early (char ** cmdline_p)
293#endif 308#endif
294#ifdef CONFIG_ACPI 309#ifdef CONFIG_ACPI
295 /* "acpi=off" disables both ACPI table parsing and interpreter init */ 310 /* "acpi=off" disables both ACPI table parsing and interpreter init */
296 if (!memcmp(from, "acpi=off", 8)) 311 if (fullarg(from,"acpi=off"))
297 disable_acpi(); 312 disable_acpi();
298 313
299 if (!memcmp(from, "acpi=force", 10)) { 314 if (fullarg(from, "acpi=force")) {
300 /* add later when we do DMI horrors: */ 315 /* add later when we do DMI horrors: */
301 acpi_force = 1; 316 acpi_force = 1;
302 acpi_disabled = 0; 317 acpi_disabled = 0;
@@ -304,52 +319,49 @@ static __init void parse_cmdline_early (char ** cmdline_p)
304 319
305 /* acpi=ht just means: do ACPI MADT parsing 320 /* acpi=ht just means: do ACPI MADT parsing
306 at bootup, but don't enable the full ACPI interpreter */ 321 at bootup, but don't enable the full ACPI interpreter */
307 if (!memcmp(from, "acpi=ht", 7)) { 322 if (fullarg(from, "acpi=ht")) {
308 if (!acpi_force) 323 if (!acpi_force)
309 disable_acpi(); 324 disable_acpi();
310 acpi_ht = 1; 325 acpi_ht = 1;
311 } 326 }
312 else if (!memcmp(from, "pci=noacpi", 10)) 327 else if (fullarg(from, "pci=noacpi"))
313 acpi_disable_pci(); 328 acpi_disable_pci();
314 else if (!memcmp(from, "acpi=noirq", 10)) 329 else if (fullarg(from, "acpi=noirq"))
315 acpi_noirq_set(); 330 acpi_noirq_set();
316 331
317 else if (!memcmp(from, "acpi_sci=edge", 13)) 332 else if (fullarg(from, "acpi_sci=edge"))
318 acpi_sci_flags.trigger = 1; 333 acpi_sci_flags.trigger = 1;
319 else if (!memcmp(from, "acpi_sci=level", 14)) 334 else if (fullarg(from, "acpi_sci=level"))
320 acpi_sci_flags.trigger = 3; 335 acpi_sci_flags.trigger = 3;
321 else if (!memcmp(from, "acpi_sci=high", 13)) 336 else if (fullarg(from, "acpi_sci=high"))
322 acpi_sci_flags.polarity = 1; 337 acpi_sci_flags.polarity = 1;
323 else if (!memcmp(from, "acpi_sci=low", 12)) 338 else if (fullarg(from, "acpi_sci=low"))
324 acpi_sci_flags.polarity = 3; 339 acpi_sci_flags.polarity = 3;
325 340
326 /* acpi=strict disables out-of-spec workarounds */ 341 /* acpi=strict disables out-of-spec workarounds */
327 else if (!memcmp(from, "acpi=strict", 11)) { 342 else if (fullarg(from, "acpi=strict")) {
328 acpi_strict = 1; 343 acpi_strict = 1;
329 } 344 }
330#ifdef CONFIG_X86_IO_APIC 345#ifdef CONFIG_X86_IO_APIC
331 else if (!memcmp(from, "acpi_skip_timer_override", 24)) 346 else if (fullarg(from, "acpi_skip_timer_override"))
332 acpi_skip_timer_override = 1; 347 acpi_skip_timer_override = 1;
333#endif 348#endif
334#endif 349#endif
335 350
336 if (!memcmp(from, "disable_timer_pin_1", 19)) 351 if (fullarg(from, "disable_timer_pin_1"))
337 disable_timer_pin_1 = 1; 352 disable_timer_pin_1 = 1;
338 if (!memcmp(from, "enable_timer_pin_1", 18)) 353 if (fullarg(from, "enable_timer_pin_1"))
339 disable_timer_pin_1 = -1; 354 disable_timer_pin_1 = -1;
340 355
341 if (!memcmp(from, "nolapic", 7) || 356 if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
342 !memcmp(from, "disableapic", 11)) 357 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
343 disable_apic = 1; 358 disable_apic = 1;
359 }
344 360
345 /* Don't confuse with noapictimer */ 361 if (fullarg(from, "noapic"))
346 if (!memcmp(from, "noapic", 6) &&
347 (from[6] == ' ' || from[6] == 0))
348 skip_ioapic_setup = 1; 362 skip_ioapic_setup = 1;
349 363
350 /* Make sure to not confuse with apic= */ 364 if (fullarg(from,"apic")) {
351 if (!memcmp(from, "apic", 4) &&
352 (from[4] == ' ' || from[4] == 0)) {
353 skip_ioapic_setup = 0; 365 skip_ioapic_setup = 0;
354 ioapic_force = 1; 366 ioapic_force = 1;
355 } 367 }
@@ -388,7 +400,7 @@ static __init void parse_cmdline_early (char ** cmdline_p)
388 iommu_setup(from+6); 400 iommu_setup(from+6);
389 } 401 }
390 402
391 if (!memcmp(from,"oops=panic", 10)) 403 if (fullarg(from,"oops=panic"))
392 panic_on_oops = 1; 404 panic_on_oops = 1;
393 405
394 if (!memcmp(from, "noexec=", 7)) 406 if (!memcmp(from, "noexec=", 7))
@@ -423,6 +435,12 @@ static __init void parse_cmdline_early (char ** cmdline_p)
423 else if(!memcmp(from, "elfcorehdr=", 11)) 435 else if(!memcmp(from, "elfcorehdr=", 11))
424 elfcorehdr_addr = memparse(from+11, &from); 436 elfcorehdr_addr = memparse(from+11, &from);
425#endif 437#endif
438
439#ifdef CONFIG_HOTPLUG_CPU
440 else if (!memcmp(from, "additional_cpus=", 16))
441 setup_additional_cpus(from+16);
442#endif
443
426 next_char: 444 next_char:
427 c = *(from++); 445 c = *(from++);
428 if (!c) 446 if (!c)
@@ -524,7 +542,7 @@ void __init alternative_instructions(void)
524static int __init noreplacement_setup(char *s) 542static int __init noreplacement_setup(char *s)
525{ 543{
526 no_replacement = 1; 544 no_replacement = 1;
527 return 0; 545 return 1;
528} 546}
529 547
530__setup("noreplacement", noreplacement_setup); 548__setup("noreplacement", noreplacement_setup);
@@ -553,17 +571,28 @@ static inline void copy_edd(void)
553#endif 571#endif
554 572
555#define EBDA_ADDR_POINTER 0x40E 573#define EBDA_ADDR_POINTER 0x40E
556static void __init reserve_ebda_region(void) 574
575unsigned __initdata ebda_addr;
576unsigned __initdata ebda_size;
577
578static void discover_ebda(void)
557{ 579{
558 unsigned int addr; 580 /*
559 /**
560 * there is a real-mode segmented pointer pointing to the 581 * there is a real-mode segmented pointer pointing to the
561 * 4K EBDA area at 0x40E 582 * 4K EBDA area at 0x40E
562 */ 583 */
563 addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER); 584 ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
564 addr <<= 4; 585 ebda_addr <<= 4;
565 if (addr) 586
566 reserve_bootmem_generic(addr, PAGE_SIZE); 587 ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
588
589 /* Round EBDA up to pages */
590 if (ebda_size == 0)
591 ebda_size = 1;
592 ebda_size <<= 10;
593 ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
594 if (ebda_size > 64*1024)
595 ebda_size = 64*1024;
567} 596}
568 597
569void __init setup_arch(char **cmdline_p) 598void __init setup_arch(char **cmdline_p)
@@ -605,11 +634,16 @@ void __init setup_arch(char **cmdline_p)
605 * we are rounding upwards: 634 * we are rounding upwards:
606 */ 635 */
607 end_pfn = e820_end_of_ram(); 636 end_pfn = e820_end_of_ram();
637 num_physpages = end_pfn; /* for pfn_valid */
608 638
609 check_efer(); 639 check_efer();
610 640
641 discover_ebda();
642
611 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); 643 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
612 644
645 dmi_scan_machine();
646
613 zap_low_mappings(0); 647 zap_low_mappings(0);
614 648
615#ifdef CONFIG_ACPI 649#ifdef CONFIG_ACPI
@@ -648,7 +682,8 @@ void __init setup_arch(char **cmdline_p)
648 reserve_bootmem_generic(0, PAGE_SIZE); 682 reserve_bootmem_generic(0, PAGE_SIZE);
649 683
650 /* reserve ebda region */ 684 /* reserve ebda region */
651 reserve_ebda_region(); 685 if (ebda_addr)
686 reserve_bootmem_generic(ebda_addr, ebda_size);
652 687
653#ifdef CONFIG_SMP 688#ifdef CONFIG_SMP
654 /* 689 /*
@@ -702,6 +737,12 @@ void __init setup_arch(char **cmdline_p)
702 737
703 check_ioapic(); 738 check_ioapic();
704 739
740 /*
741 * set this early, so we dont allocate cpu0
742 * if MADT list doesnt list BSP first
743 * mpparse.c/MP_processor_info() allocates logical cpu numbers.
744 */
745 cpu_set(0, cpu_present_map);
705#ifdef CONFIG_ACPI 746#ifdef CONFIG_ACPI
706 /* 747 /*
707 * Read APIC and some other early information from ACPI tables. 748 * Read APIC and some other early information from ACPI tables.
@@ -830,7 +871,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
830 unsigned bits; 871 unsigned bits;
831#ifdef CONFIG_NUMA 872#ifdef CONFIG_NUMA
832 int node = 0; 873 int node = 0;
833 unsigned apicid = phys_proc_id[cpu]; 874 unsigned apicid = hard_smp_processor_id();
834#endif 875#endif
835 876
836 bits = 0; 877 bits = 0;
@@ -840,7 +881,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
840 /* Low order bits define the core id (index of core in socket) */ 881 /* Low order bits define the core id (index of core in socket) */
841 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); 882 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
842 /* Convert the APIC ID into the socket ID */ 883 /* Convert the APIC ID into the socket ID */
843 phys_proc_id[cpu] >>= bits; 884 phys_proc_id[cpu] = phys_pkg_id(bits);
844 885
845#ifdef CONFIG_NUMA 886#ifdef CONFIG_NUMA
846 node = phys_proc_id[cpu]; 887 node = phys_proc_id[cpu];
@@ -866,8 +907,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
866 } 907 }
867 numa_set_node(cpu, node); 908 numa_set_node(cpu, node);
868 909
869 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", 910 printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n",
870 cpu, c->x86_max_cores, node, cpu_core_id[cpu]); 911 cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
871#endif 912#endif
872#endif 913#endif
873} 914}
@@ -903,6 +944,10 @@ static int __init init_amd(struct cpuinfo_x86 *c)
903 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) 944 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
904 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); 945 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
905 946
947 /* Enable workaround for FXSAVE leak */
948 if (c->x86 >= 6)
949 set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
950
906 r = get_model_name(c); 951 r = get_model_name(c);
907 if (!r) { 952 if (!r) {
908 switch (c->x86) { 953 switch (c->x86) {
@@ -921,8 +966,6 @@ static int __init init_amd(struct cpuinfo_x86 *c)
921 966
922 if (c->extended_cpuid_level >= 0x80000008) { 967 if (c->extended_cpuid_level >= 0x80000008) {
923 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 968 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
924 if (c->x86_max_cores & (c->x86_max_cores - 1))
925 c->x86_max_cores = 1;
926 969
927 amd_detect_cmp(c); 970 amd_detect_cmp(c);
928 } 971 }
@@ -939,7 +982,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
939 982
940 cpuid(1, &eax, &ebx, &ecx, &edx); 983 cpuid(1, &eax, &ebx, &ecx, &edx);
941 984
942 c->apicid = phys_pkg_id(0);
943 985
944 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 986 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
945 return; 987 return;
@@ -1009,7 +1051,7 @@ static void srat_detect_node(void)
1009 for now. */ 1051 for now. */
1010 node = apicid_to_node[hard_smp_processor_id()]; 1052 node = apicid_to_node[hard_smp_processor_id()];
1011 if (node == NUMA_NO_NODE) 1053 if (node == NUMA_NO_NODE)
1012 node = 0; 1054 node = first_node(node_online_map);
1013 numa_set_node(cpu, node); 1055 numa_set_node(cpu, node);
1014 1056
1015 if (acpi_numa > 0) 1057 if (acpi_numa > 0)
@@ -1148,6 +1190,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1148 c->x86_capability[2] = cpuid_edx(0x80860001); 1190 c->x86_capability[2] = cpuid_edx(0x80860001);
1149 } 1191 }
1150 1192
1193 c->apicid = phys_pkg_id(0);
1194
1151 /* 1195 /*
1152 * Vendor-specific initialization. In this section we 1196 * Vendor-specific initialization. In this section we
1153 * canonicalize the feature flags, meaning if there are 1197 * canonicalize the feature flags, meaning if there are
@@ -1255,7 +1299,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1255 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1299 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1256 1300
1257 /* Intel-defined (#2) */ 1301 /* Intel-defined (#2) */
1258 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", 1302 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
1259 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, 1303 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
1260 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1304 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1261 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1305 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1338,8 +1382,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1338 { 1382 {
1339 int i; 1383 int i;
1340 for ( i = 0 ; i < 32*NCAPINTS ; i++ ) 1384 for ( i = 0 ; i < 32*NCAPINTS ; i++ )
1341 if ( test_bit(i, &c->x86_capability) && 1385 if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
1342 x86_cap_flags[i] != NULL )
1343 seq_printf(m, " %s", x86_cap_flags[i]); 1386 seq_printf(m, " %s", x86_cap_flags[i]);
1344 } 1387 }
1345 1388
@@ -1397,10 +1440,22 @@ struct seq_operations cpuinfo_op = {
1397 .show = show_cpuinfo, 1440 .show = show_cpuinfo,
1398}; 1441};
1399 1442
1400static int __init run_dmi_scan(void) 1443#ifdef CONFIG_INPUT_PCSPKR
1444#include <linux/platform_device.h>
1445static __init int add_pcspkr(void)
1401{ 1446{
1402 dmi_scan_machine(); 1447 struct platform_device *pd;
1403 return 0; 1448 int ret;
1404} 1449
1405core_initcall(run_dmi_scan); 1450 pd = platform_device_alloc("pcspkr", -1);
1451 if (!pd)
1452 return -ENOMEM;
1406 1453
1454 ret = platform_device_add(pd);
1455 if (ret)
1456 platform_device_put(pd);
1457
1458 return ret;
1459}
1460device_initcall(add_pcspkr);
1461#endif
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 70f1bb808a20..8a691fa6d393 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -33,7 +33,7 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
33struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; 33struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
34struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; 34struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
35 35
36struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; 36struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
37 37
38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); 38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
39 39
@@ -55,11 +55,11 @@ int __init nonx_setup(char *str)
55 do_not_nx = 1; 55 do_not_nx = 1;
56 __supported_pte_mask &= ~_PAGE_NX; 56 __supported_pte_mask &= ~_PAGE_NX;
57 } 57 }
58 return 0; 58 return 1;
59} 59}
60__setup("noexec=", nonx_setup); /* parsed early actually */ 60__setup("noexec=", nonx_setup); /* parsed early actually */
61 61
62int force_personality32 = READ_IMPLIES_EXEC; 62int force_personality32 = 0;
63 63
64/* noexec32=on|off 64/* noexec32=on|off
65Control non executable heap for 32bit processes. 65Control non executable heap for 32bit processes.
@@ -74,7 +74,7 @@ static int __init nonx32_setup(char *str)
74 force_personality32 &= ~READ_IMPLIES_EXEC; 74 force_personality32 &= ~READ_IMPLIES_EXEC;
75 else if (!strcmp(str, "off")) 75 else if (!strcmp(str, "off"))
76 force_personality32 |= READ_IMPLIES_EXEC; 76 force_personality32 |= READ_IMPLIES_EXEC;
77 return 0; 77 return 1;
78} 78}
79__setup("noexec32=", nonx32_setup); 79__setup("noexec32=", nonx32_setup);
80 80
@@ -248,7 +248,7 @@ void __cpuinit cpu_init (void)
248 switch (v + 1) { 248 switch (v + 1) {
249#if DEBUG_STKSZ > EXCEPTION_STKSZ 249#if DEBUG_STKSZ > EXCEPTION_STKSZ
250 case DEBUG_STACK: 250 case DEBUG_STACK:
251 cpu_pda[cpu].debugstack = (unsigned long)estacks; 251 cpu_pda(cpu)->debugstack = (unsigned long)estacks;
252 estacks += DEBUG_STKSZ; 252 estacks += DEBUG_STKSZ;
253 break; 253 break;
254#endif 254#endif
@@ -281,12 +281,12 @@ void __cpuinit cpu_init (void)
281 * Clear all 6 debug registers: 281 * Clear all 6 debug registers:
282 */ 282 */
283 283
284 set_debug(0UL, 0); 284 set_debugreg(0UL, 0);
285 set_debug(0UL, 1); 285 set_debugreg(0UL, 1);
286 set_debug(0UL, 2); 286 set_debugreg(0UL, 2);
287 set_debug(0UL, 3); 287 set_debugreg(0UL, 3);
288 set_debug(0UL, 6); 288 set_debugreg(0UL, 6);
289 set_debug(0UL, 7); 289 set_debugreg(0UL, 7);
290 290
291 fpu_init(); 291 fpu_init();
292} 292}
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index 5876df116c92..e5f5ce7909a3 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -443,9 +443,6 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
443 if (!user_mode(regs)) 443 if (!user_mode(regs))
444 return 1; 444 return 1;
445 445
446 if (try_to_freeze())
447 goto no_signal;
448
449 if (!oldset) 446 if (!oldset)
450 oldset = &current->blocked; 447 oldset = &current->blocked;
451 448
@@ -463,7 +460,6 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
463 return handle_signal(signr, &info, &ka, oldset, regs); 460 return handle_signal(signr, &info, &ka, oldset, regs);
464 } 461 }
465 462
466 no_signal:
467 /* Did we come from a system call? */ 463 /* Did we come from a system call? */
468 if ((long)regs->orig_rax >= 0) { 464 if ((long)regs->orig_rax >= 0) {
469 /* Restart the system call - no handlers present */ 465 /* Restart the system call - no handlers present */
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 19ef012b1f17..4a6628b14d99 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -75,7 +75,7 @@ static inline void leave_mm(int cpu)
75{ 75{
76 if (read_pda(mmu_state) == TLBSTATE_OK) 76 if (read_pda(mmu_state) == TLBSTATE_OK)
77 BUG(); 77 BUG();
78 clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask); 78 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
79 load_cr3(swapper_pg_dir); 79 load_cr3(swapper_pg_dir);
80} 80}
81 81
@@ -85,7 +85,7 @@ static inline void leave_mm(int cpu)
85 * [cpu0: the cpu that switches] 85 * [cpu0: the cpu that switches]
86 * 1) switch_mm() either 1a) or 1b) 86 * 1) switch_mm() either 1a) or 1b)
87 * 1a) thread switch to a different mm 87 * 1a) thread switch to a different mm
88 * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask); 88 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
89 * Stop ipi delivery for the old mm. This is not synchronized with 89 * Stop ipi delivery for the old mm. This is not synchronized with
90 * the other cpus, but smp_invalidate_interrupt ignore flush ipis 90 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
91 * for the wrong mm, and in the worst case we perform a superfluous 91 * for the wrong mm, and in the worst case we perform a superfluous
@@ -95,7 +95,7 @@ static inline void leave_mm(int cpu)
95 * was in lazy tlb mode. 95 * was in lazy tlb mode.
96 * 1a3) update cpu active_mm 96 * 1a3) update cpu active_mm
97 * Now cpu0 accepts tlb flushes for the new mm. 97 * Now cpu0 accepts tlb flushes for the new mm.
98 * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask); 98 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
99 * Now the other cpus will send tlb flush ipis. 99 * Now the other cpus will send tlb flush ipis.
100 * 1a4) change cr3. 100 * 1a4) change cr3.
101 * 1b) thread switch without mm change 101 * 1b) thread switch without mm change
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 67e4e28f4df8..71a7222cf9ce 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
68/* core ID of each logical CPU */ 68/* core ID of each logical CPU */
69u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 69u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
70 70
71/* Last level cache ID of each logical CPU */
72u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
73
71/* Bitmask of currently online CPUs */ 74/* Bitmask of currently online CPUs */
72cpumask_t cpu_online_map __read_mostly; 75cpumask_t cpu_online_map __read_mostly;
73 76
@@ -350,7 +353,7 @@ static void __cpuinit tsc_sync_wait(void)
350static __init int notscsync_setup(char *s) 353static __init int notscsync_setup(char *s)
351{ 354{
352 notscsync = 1; 355 notscsync = 1;
353 return 0; 356 return 1;
354} 357}
355__setup("notscsync", notscsync_setup); 358__setup("notscsync", notscsync_setup);
356 359
@@ -445,6 +448,18 @@ void __cpuinit smp_callin(void)
445 cpu_set(cpuid, cpu_callin_map); 448 cpu_set(cpuid, cpu_callin_map);
446} 449}
447 450
451/* maps the cpu to the sched domain representing multi-core */
452cpumask_t cpu_coregroup_map(int cpu)
453{
454 struct cpuinfo_x86 *c = cpu_data + cpu;
455 /*
456 * For perf, we return last level cache shared map.
457 * TBD: when power saving sched policy is added, we will return
458 * cpu_core_map when power saving policy is enabled
459 */
460 return c->llc_shared_map;
461}
462
448/* representing cpus for which sibling maps can be computed */ 463/* representing cpus for which sibling maps can be computed */
449static cpumask_t cpu_sibling_setup_map; 464static cpumask_t cpu_sibling_setup_map;
450 465
@@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu)
463 cpu_set(cpu, cpu_sibling_map[i]); 478 cpu_set(cpu, cpu_sibling_map[i]);
464 cpu_set(i, cpu_core_map[cpu]); 479 cpu_set(i, cpu_core_map[cpu]);
465 cpu_set(cpu, cpu_core_map[i]); 480 cpu_set(cpu, cpu_core_map[i]);
481 cpu_set(i, c[cpu].llc_shared_map);
482 cpu_set(cpu, c[i].llc_shared_map);
466 } 483 }
467 } 484 }
468 } else { 485 } else {
469 cpu_set(cpu, cpu_sibling_map[cpu]); 486 cpu_set(cpu, cpu_sibling_map[cpu]);
470 } 487 }
471 488
489 cpu_set(cpu, c[cpu].llc_shared_map);
490
472 if (current_cpu_data.x86_max_cores == 1) { 491 if (current_cpu_data.x86_max_cores == 1) {
473 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 492 cpu_core_map[cpu] = cpu_sibling_map[cpu];
474 c[cpu].booted_cores = 1; 493 c[cpu].booted_cores = 1;
@@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu)
476 } 495 }
477 496
478 for_each_cpu_mask(i, cpu_sibling_setup_map) { 497 for_each_cpu_mask(i, cpu_sibling_setup_map) {
498 if (cpu_llc_id[cpu] != BAD_APICID &&
499 cpu_llc_id[cpu] == cpu_llc_id[i]) {
500 cpu_set(i, c[cpu].llc_shared_map);
501 cpu_set(cpu, c[i].llc_shared_map);
502 }
479 if (phys_proc_id[cpu] == phys_proc_id[i]) { 503 if (phys_proc_id[cpu] == phys_proc_id[i]) {
480 cpu_set(i, cpu_core_map[cpu]); 504 cpu_set(i, cpu_core_map[cpu]);
481 cpu_set(cpu, cpu_core_map[i]); 505 cpu_set(cpu, cpu_core_map[i]);
@@ -1152,8 +1176,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
1152 setup_ioapic_dest(); 1176 setup_ioapic_dest();
1153#endif 1177#endif
1154 1178
1155 time_init_gtod();
1156
1157 check_nmi_watchdog(); 1179 check_nmi_watchdog();
1158} 1180}
1159 1181
@@ -1244,7 +1266,7 @@ void __cpu_die(unsigned int cpu)
1244 printk(KERN_ERR "CPU %u didn't die...\n", cpu); 1266 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1245} 1267}
1246 1268
1247static __init int setup_additional_cpus(char *s) 1269__init int setup_additional_cpus(char *s)
1248{ 1270{
1249 return get_option(&s, &additional_cpus); 1271 return get_option(&s, &additional_cpus);
1250} 1272}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index dba7237be5c1..7392570f975d 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -48,6 +48,8 @@ static void cpufreq_delayed_get(void);
48extern void i8254_timer_resume(void); 48extern void i8254_timer_resume(void);
49extern int using_apic_timer; 49extern int using_apic_timer;
50 50
51static char *time_init_gtod(void);
52
51DEFINE_SPINLOCK(rtc_lock); 53DEFINE_SPINLOCK(rtc_lock);
52DEFINE_SPINLOCK(i8253_lock); 54DEFINE_SPINLOCK(i8253_lock);
53 55
@@ -59,7 +61,7 @@ static int notsc __initdata = 0;
59unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 61unsigned int cpu_khz; /* TSC clocks / usec, not used here */
60static unsigned long hpet_period; /* fsecs / HPET clock */ 62static unsigned long hpet_period; /* fsecs / HPET clock */
61unsigned long hpet_tick; /* HPET clocks / interrupt */ 63unsigned long hpet_tick; /* HPET clocks / interrupt */
62static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ 64int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
63unsigned long vxtime_hz = PIT_TICK_RATE; 65unsigned long vxtime_hz = PIT_TICK_RATE;
64int report_lost_ticks; /* command line option */ 66int report_lost_ticks; /* command line option */
65unsigned long long monotonic_base; 67unsigned long long monotonic_base;
@@ -86,7 +88,8 @@ static inline unsigned int do_gettimeoffset_tsc(void)
86 unsigned long t; 88 unsigned long t;
87 unsigned long x; 89 unsigned long x;
88 t = get_cycles_sync(); 90 t = get_cycles_sync();
89 if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ 91 if (t < vxtime.last_tsc)
92 t = vxtime.last_tsc; /* hack */
90 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; 93 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
91 return x; 94 return x;
92} 95}
@@ -176,8 +179,9 @@ unsigned long profile_pc(struct pt_regs *regs)
176{ 179{
177 unsigned long pc = instruction_pointer(regs); 180 unsigned long pc = instruction_pointer(regs);
178 181
179 /* Assume the lock function has either no stack frame or only a single word. 182 /* Assume the lock function has either no stack frame or only a single
180 This checks if the address on the stack looks like a kernel text address. 183 word. This checks if the address on the stack looks like a kernel
184 text address.
181 There is a small window for false hits, but in that case the tick 185 There is a small window for false hits, but in that case the tick
182 is just accounted to the spinlock function. 186 is just accounted to the spinlock function.
183 Better would be to write these functions in assembler again 187 Better would be to write these functions in assembler again
@@ -240,17 +244,10 @@ static void set_rtc_mmss(unsigned long nowtime)
240 real_minutes += 30; /* correct for half hour time zone */ 244 real_minutes += 30; /* correct for half hour time zone */
241 real_minutes %= 60; 245 real_minutes %= 60;
242 246
243#if 0
244 /* AMD 8111 is a really bad time keeper and hits this regularly.
245 It probably was an attempt to avoid screwing up DST, but ignore
246 that for now. */
247 if (abs(real_minutes - cmos_minutes) >= 30) { 247 if (abs(real_minutes - cmos_minutes) >= 30) {
248 printk(KERN_WARNING "time.c: can't update CMOS clock " 248 printk(KERN_WARNING "time.c: can't update CMOS clock "
249 "from %d to %d\n", cmos_minutes, real_minutes); 249 "from %d to %d\n", cmos_minutes, real_minutes);
250 } else 250 } else {
251#endif
252
253 {
254 BIN_TO_BCD(real_seconds); 251 BIN_TO_BCD(real_seconds);
255 BIN_TO_BCD(real_minutes); 252 BIN_TO_BCD(real_minutes);
256 CMOS_WRITE(real_seconds, RTC_SECONDS); 253 CMOS_WRITE(real_seconds, RTC_SECONDS);
@@ -291,8 +288,7 @@ unsigned long long monotonic_clock(void)
291 this_offset = hpet_readl(HPET_COUNTER); 288 this_offset = hpet_readl(HPET_COUNTER);
292 } while (read_seqretry(&xtime_lock, seq)); 289 } while (read_seqretry(&xtime_lock, seq));
293 offset = (this_offset - last_offset); 290 offset = (this_offset - last_offset);
294 offset *=(NSEC_PER_SEC/HZ)/hpet_tick; 291 offset *= (NSEC_PER_SEC/HZ) / hpet_tick;
295 return base + offset;
296 } else { 292 } else {
297 do { 293 do {
298 seq = read_seqbegin(&xtime_lock); 294 seq = read_seqbegin(&xtime_lock);
@@ -301,47 +297,46 @@ unsigned long long monotonic_clock(void)
301 base = monotonic_base; 297 base = monotonic_base;
302 } while (read_seqretry(&xtime_lock, seq)); 298 } while (read_seqretry(&xtime_lock, seq));
303 this_offset = get_cycles_sync(); 299 this_offset = get_cycles_sync();
304 offset = (this_offset - last_offset)*1000/cpu_khz; 300 offset = (this_offset - last_offset)*1000 / cpu_khz;
305 return base + offset;
306 } 301 }
302 return base + offset;
307} 303}
308EXPORT_SYMBOL(monotonic_clock); 304EXPORT_SYMBOL(monotonic_clock);
309 305
310static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) 306static noinline void handle_lost_ticks(int lost, struct pt_regs *regs)
311{ 307{
312 static long lost_count; 308 static long lost_count;
313 static int warned; 309 static int warned;
314 310 if (report_lost_ticks) {
315 if (report_lost_ticks) { 311 printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost);
316 printk(KERN_WARNING "time.c: Lost %d timer " 312 print_symbol("rip %s)\n", regs->rip);
317 "tick(s)! ", lost); 313 }
318 print_symbol("rip %s)\n", regs->rip); 314
319 } 315 if (lost_count == 1000 && !warned) {
320 316 printk(KERN_WARNING "warning: many lost ticks.\n"
321 if (lost_count == 1000 && !warned) { 317 KERN_WARNING "Your time source seems to be instable or "
322 printk(KERN_WARNING
323 "warning: many lost ticks.\n"
324 KERN_WARNING "Your time source seems to be instable or "
325 "some driver is hogging interupts\n"); 318 "some driver is hogging interupts\n");
326 print_symbol("rip %s\n", regs->rip); 319 print_symbol("rip %s\n", regs->rip);
327 if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { 320 if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) {
328 printk(KERN_WARNING "Falling back to HPET\n"); 321 printk(KERN_WARNING "Falling back to HPET\n");
329 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; 322 if (hpet_use_timer)
330 vxtime.mode = VXTIME_HPET; 323 vxtime.last = hpet_readl(HPET_T0_CMP) -
331 do_gettimeoffset = do_gettimeoffset_hpet; 324 hpet_tick;
332 } 325 else
333 /* else should fall back to PIT, but code missing. */ 326 vxtime.last = hpet_readl(HPET_COUNTER);
334 warned = 1; 327 vxtime.mode = VXTIME_HPET;
335 } else 328 do_gettimeoffset = do_gettimeoffset_hpet;
336 lost_count++; 329 }
330 /* else should fall back to PIT, but code missing. */
331 warned = 1;
332 } else
333 lost_count++;
337 334
338#ifdef CONFIG_CPU_FREQ 335#ifdef CONFIG_CPU_FREQ
339 /* In some cases the CPU can change frequency without us noticing 336 /* In some cases the CPU can change frequency without us noticing
340 (like going into thermal throttle) 337 Give cpufreq a change to catch up. */
341 Give cpufreq a change to catch up. */ 338 if ((lost_count+1) % 25 == 0)
342 if ((lost_count+1) % 25 == 0) { 339 cpufreq_delayed_get();
343 cpufreq_delayed_get();
344 }
345#endif 340#endif
346} 341}
347 342
@@ -349,7 +344,7 @@ void main_timer_handler(struct pt_regs *regs)
349{ 344{
350 static unsigned long rtc_update = 0; 345 static unsigned long rtc_update = 0;
351 unsigned long tsc; 346 unsigned long tsc;
352 int delay, offset = 0, lost = 0; 347 int delay = 0, offset = 0, lost = 0;
353 348
354/* 349/*
355 * Here we are in the timer irq handler. We have irqs locally disabled (so we 350 * Here we are in the timer irq handler. We have irqs locally disabled (so we
@@ -370,7 +365,7 @@ void main_timer_handler(struct pt_regs *regs)
370 */ 365 */
371 offset = hpet_readl(HPET_T0_CMP) - hpet_tick; 366 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
372 delay = hpet_readl(HPET_COUNTER) - offset; 367 delay = hpet_readl(HPET_COUNTER) - offset;
373 } else { 368 } else if (!pmtmr_ioport) {
374 spin_lock(&i8253_lock); 369 spin_lock(&i8253_lock);
375 outb_p(0x00, 0x43); 370 outb_p(0x00, 0x43);
376 delay = inb_p(0x40); 371 delay = inb_p(0x40);
@@ -472,7 +467,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
472 return IRQ_HANDLED; 467 return IRQ_HANDLED;
473} 468}
474 469
475static unsigned int cyc2ns_scale; 470static unsigned int cyc2ns_scale __read_mostly;
476#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ 471#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
477 472
478static inline void set_cyc2ns_scale(unsigned long cpu_khz) 473static inline void set_cyc2ns_scale(unsigned long cpu_khz)
@@ -509,43 +504,32 @@ unsigned long long sched_clock(void)
509 504
510static unsigned long get_cmos_time(void) 505static unsigned long get_cmos_time(void)
511{ 506{
512 unsigned int timeout = 1000000, year, mon, day, hour, min, sec; 507 unsigned int year, mon, day, hour, min, sec;
513 unsigned char uip = 0, this = 0;
514 unsigned long flags; 508 unsigned long flags;
515 509 unsigned extyear = 0;
516/*
517 * The Linux interpretation of the CMOS clock register contents: When the
518 * Update-In-Progress (UIP) flag goes from 1 to 0, the RTC registers show the
519 * second which has precisely just started. Waiting for this can take up to 1
520 * second, we timeout approximately after 2.4 seconds on a machine with
521 * standard 8.3 MHz ISA bus.
522 */
523 510
524 spin_lock_irqsave(&rtc_lock, flags); 511 spin_lock_irqsave(&rtc_lock, flags);
525 512
526 while (timeout && (!uip || this)) { 513 do {
527 uip |= this; 514 sec = CMOS_READ(RTC_SECONDS);
528 this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; 515 min = CMOS_READ(RTC_MINUTES);
529 timeout--; 516 hour = CMOS_READ(RTC_HOURS);
530 } 517 day = CMOS_READ(RTC_DAY_OF_MONTH);
531 518 mon = CMOS_READ(RTC_MONTH);
532 /* 519 year = CMOS_READ(RTC_YEAR);
533 * Here we are safe to assume the registers won't change for a whole 520#ifdef CONFIG_ACPI
534 * second, so we just go ahead and read them. 521 if (acpi_fadt.revision >= FADT2_REVISION_ID &&
535 */ 522 acpi_fadt.century)
536 sec = CMOS_READ(RTC_SECONDS); 523 extyear = CMOS_READ(acpi_fadt.century);
537 min = CMOS_READ(RTC_MINUTES); 524#endif
538 hour = CMOS_READ(RTC_HOURS); 525 } while (sec != CMOS_READ(RTC_SECONDS));
539 day = CMOS_READ(RTC_DAY_OF_MONTH);
540 mon = CMOS_READ(RTC_MONTH);
541 year = CMOS_READ(RTC_YEAR);
542 526
543 spin_unlock_irqrestore(&rtc_lock, flags); 527 spin_unlock_irqrestore(&rtc_lock, flags);
544 528
545 /* 529 /*
546 * We know that x86-64 always uses BCD format, no need to check the 530 * We know that x86-64 always uses BCD format, no need to check the
547 * config register. 531 * config register.
548 */ 532 */
549 533
550 BCD_TO_BIN(sec); 534 BCD_TO_BIN(sec);
551 BCD_TO_BIN(min); 535 BCD_TO_BIN(min);
@@ -554,11 +538,17 @@ static unsigned long get_cmos_time(void)
554 BCD_TO_BIN(mon); 538 BCD_TO_BIN(mon);
555 BCD_TO_BIN(year); 539 BCD_TO_BIN(year);
556 540
557 /* 541 if (extyear) {
558 * x86-64 systems only exists since 2002. 542 BCD_TO_BIN(extyear);
559 * This will work up to Dec 31, 2100 543 year += extyear;
560 */ 544 printk(KERN_INFO "Extended CMOS year: %d\n", extyear);
561 year += 2000; 545 } else {
546 /*
547 * x86-64 systems only exists since 2002.
548 * This will work up to Dec 31, 2100
549 */
550 year += 2000;
551 }
562 552
563 return mktime(year, mon, day, hour, min, sec); 553 return mktime(year, mon, day, hour, min, sec);
564} 554}
@@ -601,7 +591,8 @@ static void cpufreq_delayed_get(void)
601 cpufreq_delayed_issched = 1; 591 cpufreq_delayed_issched = 1;
602 if (!warned) { 592 if (!warned) {
603 warned = 1; 593 warned = 1;
604 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); 594 printk(KERN_DEBUG
595 "Losing some ticks... checking if CPU frequency changed.\n");
605 } 596 }
606 schedule_work(&cpufreq_delayed_get_work); 597 schedule_work(&cpufreq_delayed_get_work);
607 } 598 }
@@ -624,9 +615,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
624 lpj = &dummy; 615 lpj = &dummy;
625 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 616 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
626#ifdef CONFIG_SMP 617#ifdef CONFIG_SMP
627 lpj = &cpu_data[freq->cpu].loops_per_jiffy; 618 lpj = &cpu_data[freq->cpu].loops_per_jiffy;
628#else 619#else
629 lpj = &boot_cpu_data.loops_per_jiffy; 620 lpj = &boot_cpu_data.loops_per_jiffy;
630#endif 621#endif
631 622
632 if (!ref_freq) { 623 if (!ref_freq) {
@@ -735,7 +726,7 @@ static __init int late_hpet_init(void)
735 unsigned int ntimer; 726 unsigned int ntimer;
736 727
737 if (!vxtime.hpet_address) 728 if (!vxtime.hpet_address)
738 return -1; 729 return 0;
739 730
740 memset(&hd, 0, sizeof (hd)); 731 memset(&hd, 0, sizeof (hd));
741 732
@@ -763,9 +754,8 @@ static __init int late_hpet_init(void)
763 int i; 754 int i;
764 755
765 hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); 756 hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
766 757 timer = &hpet->hpet_timers[2];
767 for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer; 758 for (i = 2; i < ntimer; timer++, i++)
768 timer++, i++)
769 hd.hd_irq[i] = (timer->hpet_config & 759 hd.hd_irq[i] = (timer->hpet_config &
770 Tn_INT_ROUTE_CNF_MASK) >> 760 Tn_INT_ROUTE_CNF_MASK) >>
771 Tn_INT_ROUTE_CNF_SHIFT; 761 Tn_INT_ROUTE_CNF_SHIFT;
@@ -898,6 +888,7 @@ static struct irqaction irq0 = {
898void __init time_init(void) 888void __init time_init(void)
899{ 889{
900 char *timename; 890 char *timename;
891 char *gtod;
901 892
902#ifdef HPET_HACK_ENABLE_DANGEROUS 893#ifdef HPET_HACK_ENABLE_DANGEROUS
903 if (!vxtime.hpet_address) { 894 if (!vxtime.hpet_address) {
@@ -921,12 +912,13 @@ void __init time_init(void)
921 -xtime.tv_sec, -xtime.tv_nsec); 912 -xtime.tv_sec, -xtime.tv_nsec);
922 913
923 if (!hpet_init()) 914 if (!hpet_init())
924 vxtime_hz = (1000000000000000L + hpet_period / 2) / 915 vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period;
925 hpet_period;
926 else 916 else
927 vxtime.hpet_address = 0; 917 vxtime.hpet_address = 0;
928 918
929 if (hpet_use_timer) { 919 if (hpet_use_timer) {
920 /* set tick_nsec to use the proper rate for HPET */
921 tick_nsec = TICK_NSEC_HPET;
930 cpu_khz = hpet_calibrate_tsc(); 922 cpu_khz = hpet_calibrate_tsc();
931 timename = "HPET"; 923 timename = "HPET";
932#ifdef CONFIG_X86_PM_TIMER 924#ifdef CONFIG_X86_PM_TIMER
@@ -942,21 +934,19 @@ void __init time_init(void)
942 timename = "PIT"; 934 timename = "PIT";
943 } 935 }
944 936
945 printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n", 937 vxtime.mode = VXTIME_TSC;
946 vxtime_hz / 1000000, vxtime_hz % 1000000, timename); 938 gtod = time_init_gtod();
939
940 printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
941 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
947 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 942 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
948 cpu_khz / 1000, cpu_khz % 1000); 943 cpu_khz / 1000, cpu_khz % 1000);
949 vxtime.mode = VXTIME_TSC;
950 vxtime.quot = (1000000L << 32) / vxtime_hz; 944 vxtime.quot = (1000000L << 32) / vxtime_hz;
951 vxtime.tsc_quot = (1000L << 32) / cpu_khz; 945 vxtime.tsc_quot = (1000L << 32) / cpu_khz;
952 vxtime.last_tsc = get_cycles_sync(); 946 vxtime.last_tsc = get_cycles_sync();
953 setup_irq(0, &irq0); 947 setup_irq(0, &irq0);
954 948
955 set_cyc2ns_scale(cpu_khz); 949 set_cyc2ns_scale(cpu_khz);
956
957#ifndef CONFIG_SMP
958 time_init_gtod();
959#endif
960} 950}
961 951
962/* 952/*
@@ -978,9 +968,9 @@ __cpuinit int unsynchronized_tsc(void)
978} 968}
979 969
980/* 970/*
981 * Decide after all CPUs are booted what mode gettimeofday should use. 971 * Decide what mode gettimeofday should use.
982 */ 972 */
983void __init time_init_gtod(void) 973__init static char *time_init_gtod(void)
984{ 974{
985 char *timetype; 975 char *timetype;
986 976
@@ -988,7 +978,10 @@ void __init time_init_gtod(void)
988 notsc = 1; 978 notsc = 1;
989 if (vxtime.hpet_address && notsc) { 979 if (vxtime.hpet_address && notsc) {
990 timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; 980 timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
991 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; 981 if (hpet_use_timer)
982 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
983 else
984 vxtime.last = hpet_readl(HPET_COUNTER);
992 vxtime.mode = VXTIME_HPET; 985 vxtime.mode = VXTIME_HPET;
993 do_gettimeoffset = do_gettimeoffset_hpet; 986 do_gettimeoffset = do_gettimeoffset_hpet;
994#ifdef CONFIG_X86_PM_TIMER 987#ifdef CONFIG_X86_PM_TIMER
@@ -1005,8 +998,7 @@ void __init time_init_gtod(void)
1005 timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; 998 timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
1006 vxtime.mode = VXTIME_TSC; 999 vxtime.mode = VXTIME_TSC;
1007 } 1000 }
1008 1001 return timetype;
1009 printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
1010} 1002}
1011 1003
1012__setup("report_lost_ticks", time_setup); 1004__setup("report_lost_ticks", time_setup);
@@ -1316,16 +1308,15 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
1316static int __init nohpet_setup(char *s) 1308static int __init nohpet_setup(char *s)
1317{ 1309{
1318 nohpet = 1; 1310 nohpet = 1;
1319 return 0; 1311 return 1;
1320} 1312}
1321 1313
1322__setup("nohpet", nohpet_setup); 1314__setup("nohpet", nohpet_setup);
1323 1315
1324 1316int __init notsc_setup(char *s)
1325static int __init notsc_setup(char *s)
1326{ 1317{
1327 notsc = 1; 1318 notsc = 1;
1328 return 0; 1319 return 1;
1329} 1320}
1330 1321
1331__setup("notsc", notsc_setup); 1322__setup("notsc", notsc_setup);
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index ee1b2da9e5e7..cea335e8746c 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -30,6 +30,7 @@
30#include <linux/moduleparam.h> 30#include <linux/moduleparam.h>
31#include <linux/nmi.h> 31#include <linux/nmi.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/kexec.h>
33 34
34#include <asm/system.h> 35#include <asm/system.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
@@ -47,8 +48,6 @@
47#include <asm/proto.h> 48#include <asm/proto.h>
48#include <asm/nmi.h> 49#include <asm/nmi.h>
49 50
50extern struct gate_struct idt_table[256];
51
52asmlinkage void divide_error(void); 51asmlinkage void divide_error(void);
53asmlinkage void debug(void); 52asmlinkage void debug(void);
54asmlinkage void nmi(void); 53asmlinkage void nmi(void);
@@ -71,18 +70,20 @@ asmlinkage void alignment_check(void);
71asmlinkage void machine_check(void); 70asmlinkage void machine_check(void);
72asmlinkage void spurious_interrupt_bug(void); 71asmlinkage void spurious_interrupt_bug(void);
73 72
74struct notifier_block *die_chain; 73ATOMIC_NOTIFIER_HEAD(die_chain);
75static DEFINE_SPINLOCK(die_notifier_lock);
76 74
77int register_die_notifier(struct notifier_block *nb) 75int register_die_notifier(struct notifier_block *nb)
78{ 76{
79 int err = 0; 77 vmalloc_sync_all();
80 unsigned long flags; 78 return atomic_notifier_chain_register(&die_chain, nb);
81 spin_lock_irqsave(&die_notifier_lock, flags); 79}
82 err = notifier_chain_register(&die_chain, nb); 80EXPORT_SYMBOL(register_die_notifier);
83 spin_unlock_irqrestore(&die_notifier_lock, flags); 81
84 return err; 82int unregister_die_notifier(struct notifier_block *nb)
83{
84 return atomic_notifier_chain_unregister(&die_chain, nb);
85} 85}
86EXPORT_SYMBOL(unregister_die_notifier);
86 87
87static inline void conditional_sti(struct pt_regs *regs) 88static inline void conditional_sti(struct pt_regs *regs)
88{ 89{
@@ -90,6 +91,22 @@ static inline void conditional_sti(struct pt_regs *regs)
90 local_irq_enable(); 91 local_irq_enable();
91} 92}
92 93
94static inline void preempt_conditional_sti(struct pt_regs *regs)
95{
96 preempt_disable();
97 if (regs->eflags & X86_EFLAGS_IF)
98 local_irq_enable();
99}
100
101static inline void preempt_conditional_cli(struct pt_regs *regs)
102{
103 if (regs->eflags & X86_EFLAGS_IF)
104 local_irq_disable();
105 /* Make sure to not schedule here because we could be running
106 on an exception stack. */
107 preempt_enable_no_resched();
108}
109
93static int kstack_depth_to_print = 10; 110static int kstack_depth_to_print = 10;
94 111
95#ifdef CONFIG_KALLSYMS 112#ifdef CONFIG_KALLSYMS
@@ -108,7 +125,7 @@ int printk_address(unsigned long address)
108 if (!modname) 125 if (!modname)
109 modname = delim = ""; 126 modname = delim = "";
110 return printk("<%016lx>{%s%s%s%s%+ld}", 127 return printk("<%016lx>{%s%s%s%s%+ld}",
111 address,delim,modname,delim,symname,offset); 128 address, delim, modname, delim, symname, offset);
112} 129}
113#else 130#else
114int printk_address(unsigned long address) 131int printk_address(unsigned long address)
@@ -320,13 +337,12 @@ void show_registers(struct pt_regs *regs)
320 show_stack(NULL, (unsigned long*)rsp); 337 show_stack(NULL, (unsigned long*)rsp);
321 338
322 printk("\nCode: "); 339 printk("\nCode: ");
323 if(regs->rip < PAGE_OFFSET) 340 if (regs->rip < PAGE_OFFSET)
324 goto bad; 341 goto bad;
325 342
326 for(i=0;i<20;i++) 343 for (i=0; i<20; i++) {
327 {
328 unsigned char c; 344 unsigned char c;
329 if(__get_user(c, &((unsigned char*)regs->rip)[i])) { 345 if (__get_user(c, &((unsigned char*)regs->rip)[i])) {
330bad: 346bad:
331 printk(" Bad RIP value."); 347 printk(" Bad RIP value.");
332 break; 348 break;
@@ -371,6 +387,7 @@ void out_of_line_bug(void)
371 387
372static DEFINE_SPINLOCK(die_lock); 388static DEFINE_SPINLOCK(die_lock);
373static int die_owner = -1; 389static int die_owner = -1;
390static unsigned int die_nest_count;
374 391
375unsigned __kprobes long oops_begin(void) 392unsigned __kprobes long oops_begin(void)
376{ 393{
@@ -385,6 +402,7 @@ unsigned __kprobes long oops_begin(void)
385 else 402 else
386 spin_lock(&die_lock); 403 spin_lock(&die_lock);
387 } 404 }
405 die_nest_count++;
388 die_owner = cpu; 406 die_owner = cpu;
389 console_verbose(); 407 console_verbose();
390 bust_spinlocks(1); 408 bust_spinlocks(1);
@@ -395,7 +413,13 @@ void __kprobes oops_end(unsigned long flags)
395{ 413{
396 die_owner = -1; 414 die_owner = -1;
397 bust_spinlocks(0); 415 bust_spinlocks(0);
398 spin_unlock_irqrestore(&die_lock, flags); 416 die_nest_count--;
417 if (die_nest_count)
418 /* We still own the lock */
419 local_irq_restore(flags);
420 else
421 /* Nest count reaches zero, release the lock. */
422 spin_unlock_irqrestore(&die_lock, flags);
399 if (panic_on_oops) 423 if (panic_on_oops)
400 panic("Oops"); 424 panic("Oops");
401} 425}
@@ -420,6 +444,8 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
420 printk(KERN_ALERT "RIP "); 444 printk(KERN_ALERT "RIP ");
421 printk_address(regs->rip); 445 printk_address(regs->rip);
422 printk(" RSP <%016lx>\n", regs->rsp); 446 printk(" RSP <%016lx>\n", regs->rsp);
447 if (kexec_should_crash(current))
448 crash_kexec(regs);
423} 449}
424 450
425void die(const char * str, struct pt_regs * regs, long err) 451void die(const char * str, struct pt_regs * regs, long err)
@@ -442,10 +468,14 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs)
442 */ 468 */
443 printk(str, safe_smp_processor_id()); 469 printk(str, safe_smp_processor_id());
444 show_registers(regs); 470 show_registers(regs);
471 if (kexec_should_crash(current))
472 crash_kexec(regs);
445 if (panic_on_timeout || panic_on_oops) 473 if (panic_on_timeout || panic_on_oops)
446 panic("nmi watchdog"); 474 panic("nmi watchdog");
447 printk("console shuts up ...\n"); 475 printk("console shuts up ...\n");
448 oops_end(flags); 476 oops_end(flags);
477 nmi_exit();
478 local_irq_enable();
449 do_exit(SIGSEGV); 479 do_exit(SIGSEGV);
450} 480}
451 481
@@ -455,8 +485,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
455{ 485{
456 struct task_struct *tsk = current; 486 struct task_struct *tsk = current;
457 487
458 conditional_sti(regs);
459
460 tsk->thread.error_code = error_code; 488 tsk->thread.error_code = error_code;
461 tsk->thread.trap_no = trapnr; 489 tsk->thread.trap_no = trapnr;
462 490
@@ -465,7 +493,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
465 printk(KERN_INFO 493 printk(KERN_INFO
466 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", 494 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
467 tsk->comm, tsk->pid, str, 495 tsk->comm, tsk->pid, str,
468 regs->rip,regs->rsp,error_code); 496 regs->rip, regs->rsp, error_code);
469 497
470 if (info) 498 if (info)
471 force_sig_info(signr, info, tsk); 499 force_sig_info(signr, info, tsk);
@@ -479,9 +507,9 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
479 { 507 {
480 const struct exception_table_entry *fixup; 508 const struct exception_table_entry *fixup;
481 fixup = search_exception_tables(regs->rip); 509 fixup = search_exception_tables(regs->rip);
482 if (fixup) { 510 if (fixup)
483 regs->rip = fixup->fixup; 511 regs->rip = fixup->fixup;
484 } else 512 else
485 die(str, regs, error_code); 513 die(str, regs, error_code);
486 return; 514 return;
487 } 515 }
@@ -493,6 +521,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
493 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 521 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
494 == NOTIFY_STOP) \ 522 == NOTIFY_STOP) \
495 return; \ 523 return; \
524 conditional_sti(regs); \
496 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 525 do_trap(trapnr, signr, str, regs, error_code, NULL); \
497} 526}
498 527
@@ -507,6 +536,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
507 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 536 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
508 == NOTIFY_STOP) \ 537 == NOTIFY_STOP) \
509 return; \ 538 return; \
539 conditional_sti(regs); \
510 do_trap(trapnr, signr, str, regs, error_code, &info); \ 540 do_trap(trapnr, signr, str, regs, error_code, &info); \
511} 541}
512 542
@@ -520,7 +550,17 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
520DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 550DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
521DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 551DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
522DO_ERROR(18, SIGSEGV, "reserved", reserved) 552DO_ERROR(18, SIGSEGV, "reserved", reserved)
523DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 553
554/* Runs on IST stack */
555asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
556{
557 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
558 12, SIGBUS) == NOTIFY_STOP)
559 return;
560 preempt_conditional_sti(regs);
561 do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
562 preempt_conditional_cli(regs);
563}
524 564
525asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) 565asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
526{ 566{
@@ -554,7 +594,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
554 printk(KERN_INFO 594 printk(KERN_INFO
555 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", 595 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
556 tsk->comm, tsk->pid, 596 tsk->comm, tsk->pid,
557 regs->rip,regs->rsp,error_code); 597 regs->rip, regs->rsp, error_code);
558 598
559 force_sig(SIGSEGV, tsk); 599 force_sig(SIGSEGV, tsk);
560 return; 600 return;
@@ -654,8 +694,9 @@ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
654 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { 694 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
655 return; 695 return;
656 } 696 }
697 preempt_conditional_sti(regs);
657 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 698 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
658 return; 699 preempt_conditional_cli(regs);
659} 700}
660 701
661/* Help handler running on IST stack to switch back to user stack 702/* Help handler running on IST stack to switch back to user stack
@@ -693,7 +734,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
693 SIGTRAP) == NOTIFY_STOP) 734 SIGTRAP) == NOTIFY_STOP)
694 return; 735 return;
695 736
696 conditional_sti(regs); 737 preempt_conditional_sti(regs);
697 738
698 /* Mask out spurious debug traps due to lazy DR7 setting */ 739 /* Mask out spurious debug traps due to lazy DR7 setting */
699 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 740 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
@@ -738,11 +779,13 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
738 779
739clear_dr7: 780clear_dr7:
740 set_debugreg(0UL, 7); 781 set_debugreg(0UL, 7);
782 preempt_conditional_cli(regs);
741 return; 783 return;
742 784
743clear_TF_reenable: 785clear_TF_reenable:
744 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 786 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
745 regs->eflags &= ~TF_MASK; 787 regs->eflags &= ~TF_MASK;
788 preempt_conditional_cli(regs);
746} 789}
747 790
748static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) 791static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
@@ -958,14 +1001,14 @@ void __init trap_init(void)
958static int __init oops_dummy(char *s) 1001static int __init oops_dummy(char *s)
959{ 1002{
960 panic_on_oops = 1; 1003 panic_on_oops = 1;
961 return -1; 1004 return 1;
962} 1005}
963__setup("oops=", oops_dummy); 1006__setup("oops=", oops_dummy);
964 1007
965static int __init kstack_setup(char *s) 1008static int __init kstack_setup(char *s)
966{ 1009{
967 kstack_depth_to_print = simple_strtoul(s,NULL,0); 1010 kstack_depth_to_print = simple_strtoul(s,NULL,0);
968 return 0; 1011 return 1;
969} 1012}
970__setup("kstack=", kstack_setup); 1013__setup("kstack=", kstack_setup);
971 1014
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 74db0062d4a2..b81f473c4a19 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -20,6 +20,12 @@ SECTIONS
20 phys_startup_64 = startup_64 - LOAD_OFFSET; 20 phys_startup_64 = startup_64 - LOAD_OFFSET;
21 _text = .; /* Text and read-only data */ 21 _text = .; /* Text and read-only data */
22 .text : AT(ADDR(.text) - LOAD_OFFSET) { 22 .text : AT(ADDR(.text) - LOAD_OFFSET) {
23 /* First the code that has to be first for bootstrapping */
24 *(.bootstrap.text)
25 /* Then all the functions that are "hot" in profiles, to group them
26 onto the same hugetlb entry */
27 #include "functionlist"
28 /* Then the rest */
23 *(.text) 29 *(.text)
24 SCHED_TEXT 30 SCHED_TEXT
25 LOCK_TEXT 31 LOCK_TEXT
@@ -59,7 +65,7 @@ SECTIONS
59 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { 65 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
60 *(.data.cacheline_aligned) 66 *(.data.cacheline_aligned)
61 } 67 }
62 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 68 . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
63 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { 69 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
64 *(.data.read_mostly) 70 *(.data.read_mostly)
65 } 71 }
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 3496abc8d372..1def21c9f7cd 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -102,8 +102,6 @@ EXPORT_SYMBOL(cpu_callout_map);
102EXPORT_SYMBOL(screen_info); 102EXPORT_SYMBOL(screen_info);
103#endif 103#endif
104 104
105EXPORT_SYMBOL(get_wchan);
106
107EXPORT_SYMBOL(rtc_lock); 105EXPORT_SYMBOL(rtc_lock);
108 106
109EXPORT_SYMBOL_GPL(set_nmi_callback); 107EXPORT_SYMBOL_GPL(set_nmi_callback);
@@ -114,7 +112,6 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
114#undef memcpy 112#undef memcpy
115#undef memset 113#undef memset
116#undef memmove 114#undef memmove
117#undef strlen
118 115
119extern void * memset(void *,int,__kernel_size_t); 116extern void * memset(void *,int,__kernel_size_t);
120extern size_t strlen(const char *); 117extern size_t strlen(const char *);
@@ -123,7 +120,6 @@ extern void * memcpy(void *,const void *,__kernel_size_t);
123extern void * __memcpy(void *,const void *,__kernel_size_t); 120extern void * __memcpy(void *,const void *,__kernel_size_t);
124 121
125EXPORT_SYMBOL(memset); 122EXPORT_SYMBOL(memset);
126EXPORT_SYMBOL(strlen);
127EXPORT_SYMBOL(memmove); 123EXPORT_SYMBOL(memmove);
128EXPORT_SYMBOL(memcpy); 124EXPORT_SYMBOL(memcpy);
129EXPORT_SYMBOL(__memcpy); 125EXPORT_SYMBOL(__memcpy);
@@ -143,16 +139,12 @@ EXPORT_SYMBOL(rwsem_down_write_failed_thunk);
143EXPORT_SYMBOL(empty_zero_page); 139EXPORT_SYMBOL(empty_zero_page);
144 140
145EXPORT_SYMBOL(die_chain); 141EXPORT_SYMBOL(die_chain);
146EXPORT_SYMBOL(register_die_notifier);
147 142
148#ifdef CONFIG_SMP 143#ifdef CONFIG_SMP
149EXPORT_SYMBOL(cpu_sibling_map); 144EXPORT_SYMBOL(cpu_sibling_map);
150EXPORT_SYMBOL(smp_num_siblings); 145EXPORT_SYMBOL(smp_num_siblings);
151#endif 146#endif
152 147
153extern void do_softirq_thunk(void);
154EXPORT_SYMBOL(do_softirq_thunk);
155
156#ifdef CONFIG_BUG 148#ifdef CONFIG_BUG
157EXPORT_SYMBOL(out_of_line_bug); 149EXPORT_SYMBOL(out_of_line_bug);
158#endif 150#endif
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index acc1e2ca7ed7..e49af0032e94 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -42,7 +42,6 @@
42 thunk rwsem_wake_thunk,rwsem_wake 42 thunk rwsem_wake_thunk,rwsem_wake
43 thunk rwsem_downgrade_thunk,rwsem_downgrade_wake 43 thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
44#endif 44#endif
45 thunk do_softirq_thunk,do_softirq
46 45
47 thunk __down_failed,__down 46 thunk __down_failed,__down
48 thunk_retrax __down_failed_interruptible,__down_interruptible 47 thunk_retrax __down_failed_interruptible,__down_interruptible
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 2e7c3c8ffe03..55250593d8c9 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -264,6 +264,8 @@ static int vmalloc_fault(unsigned long address)
264 return -1; 264 return -1;
265 if (pgd_none(*pgd)) 265 if (pgd_none(*pgd))
266 set_pgd(pgd, *pgd_ref); 266 set_pgd(pgd, *pgd_ref);
267 else
268 BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
267 269
268 /* Below here mismatches are bugs because these lower tables 270 /* Below here mismatches are bugs because these lower tables
269 are shared */ 271 are shared */
@@ -312,21 +314,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
312 unsigned long flags; 314 unsigned long flags;
313 siginfo_t info; 315 siginfo_t info;
314 316
317 tsk = current;
318 mm = tsk->mm;
319 prefetchw(&mm->mmap_sem);
320
315 /* get the address */ 321 /* get the address */
316 __asm__("movq %%cr2,%0":"=r" (address)); 322 __asm__("movq %%cr2,%0":"=r" (address));
317 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
318 SIGSEGV) == NOTIFY_STOP)
319 return;
320
321 if (likely(regs->eflags & X86_EFLAGS_IF))
322 local_irq_enable();
323 323
324 if (unlikely(page_fault_trace))
325 printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
326 regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
327
328 tsk = current;
329 mm = tsk->mm;
330 info.si_code = SEGV_MAPERR; 324 info.si_code = SEGV_MAPERR;
331 325
332 326
@@ -351,10 +345,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
351 */ 345 */
352 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && 346 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
353 ((address >= VMALLOC_START && address < VMALLOC_END))) { 347 ((address >= VMALLOC_START && address < VMALLOC_END))) {
354 if (vmalloc_fault(address) < 0) 348 if (vmalloc_fault(address) >= 0)
355 goto bad_area_nosemaphore; 349 return;
356 return;
357 } 350 }
351 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
352 SIGSEGV) == NOTIFY_STOP)
353 return;
358 /* 354 /*
359 * Don't take the mm semaphore here. If we fixup a prefetch 355 * Don't take the mm semaphore here. If we fixup a prefetch
360 * fault we could otherwise deadlock. 356 * fault we could otherwise deadlock.
@@ -362,6 +358,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
362 goto bad_area_nosemaphore; 358 goto bad_area_nosemaphore;
363 } 359 }
364 360
361 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
362 SIGSEGV) == NOTIFY_STOP)
363 return;
364
365 if (likely(regs->eflags & X86_EFLAGS_IF))
366 local_irq_enable();
367
368 if (unlikely(page_fault_trace))
369 printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
370 regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
371
365 if (unlikely(error_code & PF_RSVD)) 372 if (unlikely(error_code & PF_RSVD))
366 pgtable_bad(address, regs, error_code); 373 pgtable_bad(address, regs, error_code);
367 374
@@ -571,9 +578,51 @@ do_sigbus:
571 return; 578 return;
572} 579}
573 580
581DEFINE_SPINLOCK(pgd_lock);
582struct page *pgd_list;
583
584void vmalloc_sync_all(void)
585{
586 /* Note that races in the updates of insync and start aren't
587 problematic:
588 insync can only get set bits added, and updates to start are only
589 improving performance (without affecting correctness if undone). */
590 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
591 static unsigned long start = VMALLOC_START & PGDIR_MASK;
592 unsigned long address;
593
594 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
595 if (!test_bit(pgd_index(address), insync)) {
596 const pgd_t *pgd_ref = pgd_offset_k(address);
597 struct page *page;
598
599 if (pgd_none(*pgd_ref))
600 continue;
601 spin_lock(&pgd_lock);
602 for (page = pgd_list; page;
603 page = (struct page *)page->index) {
604 pgd_t *pgd;
605 pgd = (pgd_t *)page_address(page) + pgd_index(address);
606 if (pgd_none(*pgd))
607 set_pgd(pgd, *pgd_ref);
608 else
609 BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
610 }
611 spin_unlock(&pgd_lock);
612 set_bit(pgd_index(address), insync);
613 }
614 if (address == start)
615 start = address + PGDIR_SIZE;
616 }
617 /* Check that there is no need to do the same for the modules area. */
618 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
619 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
620 (__START_KERNEL & PGDIR_MASK)));
621}
622
574static int __init enable_pagefaulttrace(char *str) 623static int __init enable_pagefaulttrace(char *str)
575{ 624{
576 page_fault_trace = 1; 625 page_fault_trace = 1;
577 return 0; 626 return 1;
578} 627}
579__setup("pagefaulttrace", enable_pagefaulttrace); 628__setup("pagefaulttrace", enable_pagefaulttrace);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 7af1742aa958..4ba34e95d835 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -72,7 +72,7 @@ void show_mem(void)
72 show_free_areas(); 72 show_free_areas();
73 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 73 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
74 74
75 for_each_pgdat(pgdat) { 75 for_each_online_pgdat(pgdat) {
76 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 76 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77 page = pfn_to_page(pgdat->node_start_pfn + i); 77 page = pfn_to_page(pgdat->node_start_pfn + i);
78 total++; 78 total++;
@@ -94,7 +94,7 @@ void show_mem(void)
94 94
95int after_bootmem; 95int after_bootmem;
96 96
97static void *spp_getpage(void) 97static __init void *spp_getpage(void)
98{ 98{
99 void *ptr; 99 void *ptr;
100 if (after_bootmem) 100 if (after_bootmem)
@@ -108,7 +108,7 @@ static void *spp_getpage(void)
108 return ptr; 108 return ptr;
109} 109}
110 110
111static void set_pte_phys(unsigned long vaddr, 111static __init void set_pte_phys(unsigned long vaddr,
112 unsigned long phys, pgprot_t prot) 112 unsigned long phys, pgprot_t prot)
113{ 113{
114 pgd_t *pgd; 114 pgd_t *pgd;
@@ -157,7 +157,8 @@ static void set_pte_phys(unsigned long vaddr,
157} 157}
158 158
159/* NOTE: this is meant to be run only at boot */ 159/* NOTE: this is meant to be run only at boot */
160void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) 160void __init
161__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
161{ 162{
162 unsigned long address = __fix_to_virt(idx); 163 unsigned long address = __fix_to_virt(idx);
163 164
@@ -225,6 +226,33 @@ static __meminit void unmap_low_page(int i)
225 ti->allocated = 0; 226 ti->allocated = 0;
226} 227}
227 228
229/* Must run before zap_low_mappings */
230__init void *early_ioremap(unsigned long addr, unsigned long size)
231{
232 unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
233
234 /* actually usually some more */
235 if (size >= LARGE_PAGE_SIZE) {
236 printk("SMBIOS area too long %lu\n", size);
237 return NULL;
238 }
239 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
240 map += LARGE_PAGE_SIZE;
241 set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
242 __flush_tlb();
243 return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
244}
245
246/* To avoid virtual aliases later */
247__init void early_iounmap(void *addr, unsigned long size)
248{
249 if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
250 printk("early_iounmap: bad address %p\n", addr);
251 set_pmd(temp_mappings[0].pmd, __pmd(0));
252 set_pmd(temp_mappings[1].pmd, __pmd(0));
253 __flush_tlb();
254}
255
228static void __meminit 256static void __meminit
229phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) 257phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
230{ 258{
@@ -277,7 +305,7 @@ static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned
277 if (paddr >= end) 305 if (paddr >= end)
278 break; 306 break;
279 307
280 if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) { 308 if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) {
281 set_pud(pud, __pud(0)); 309 set_pud(pud, __pud(0));
282 continue; 310 continue;
283 } 311 }
@@ -344,7 +372,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
344 pud_t *pud; 372 pud_t *pud;
345 373
346 if (after_bootmem) 374 if (after_bootmem)
347 pud = pud_offset_k(pgd, __PAGE_OFFSET); 375 pud = pud_offset_k(pgd, start & PGDIR_MASK);
348 else 376 else
349 pud = alloc_low_page(&map, &pud_phys); 377 pud = alloc_low_page(&map, &pud_phys);
350 378
@@ -479,19 +507,50 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
479 507
480/* 508/*
481 * Memory hotplug specific functions 509 * Memory hotplug specific functions
482 * These are only for non-NUMA machines right now.
483 */ 510 */
484#ifdef CONFIG_MEMORY_HOTPLUG 511#if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)
485 512
486void online_page(struct page *page) 513void online_page(struct page *page)
487{ 514{
488 ClearPageReserved(page); 515 ClearPageReserved(page);
489 set_page_count(page, 1); 516 init_page_count(page);
490 __free_page(page); 517 __free_page(page);
491 totalram_pages++; 518 totalram_pages++;
492 num_physpages++; 519 num_physpages++;
493} 520}
494 521
522#ifndef CONFIG_MEMORY_HOTPLUG
523/*
524 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
525 * just online the pages.
526 */
527int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
528{
529 int err = -EIO;
530 unsigned long pfn;
531 unsigned long total = 0, mem = 0;
532 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
533 if (pfn_valid(pfn)) {
534 online_page(pfn_to_page(pfn));
535 err = 0;
536 mem++;
537 }
538 total++;
539 }
540 if (!err) {
541 z->spanned_pages += total;
542 z->present_pages += mem;
543 z->zone_pgdat->node_spanned_pages += total;
544 z->zone_pgdat->node_present_pages += mem;
545 }
546 return err;
547}
548#endif
549
550/*
551 * Memory is added always to NORMAL zone. This means you will never get
552 * additional DMA/DMA32 memory.
553 */
495int add_memory(u64 start, u64 size) 554int add_memory(u64 start, u64 size)
496{ 555{
497 struct pglist_data *pgdat = NODE_DATA(0); 556 struct pglist_data *pgdat = NODE_DATA(0);
@@ -592,7 +651,7 @@ void free_initmem(void)
592 addr = (unsigned long)(&__init_begin); 651 addr = (unsigned long)(&__init_begin);
593 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { 652 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
594 ClearPageReserved(virt_to_page(addr)); 653 ClearPageReserved(virt_to_page(addr));
595 set_page_count(virt_to_page(addr), 1); 654 init_page_count(virt_to_page(addr));
596 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); 655 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
597 free_page(addr); 656 free_page(addr);
598 totalram_pages++; 657 totalram_pages++;
@@ -632,7 +691,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
632 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); 691 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
633 for (; start < end; start += PAGE_SIZE) { 692 for (; start < end; start += PAGE_SIZE) {
634 ClearPageReserved(virt_to_page(start)); 693 ClearPageReserved(virt_to_page(start));
635 set_page_count(virt_to_page(start), 1); 694 init_page_count(virt_to_page(start));
636 free_page(start); 695 free_page(start);
637 totalram_pages++; 696 totalram_pages++;
638 } 697 }
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index a5663e0bb01c..7c45c2d2b8b2 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -43,7 +43,7 @@ static __init int find_northbridge(void)
43int __init k8_scan_nodes(unsigned long start, unsigned long end) 43int __init k8_scan_nodes(unsigned long start, unsigned long end)
44{ 44{
45 unsigned long prevbase; 45 unsigned long prevbase;
46 struct node nodes[8]; 46 struct bootnode nodes[8];
47 int nodeid, i, nb; 47 int nodeid, i, nb;
48 unsigned char nodeids[8]; 48 unsigned char nodeids[8];
49 int found = 0; 49 int found = 0;
@@ -155,7 +155,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
155 if (!found) 155 if (!found)
156 return -1; 156 return -1;
157 157
158 memnode_shift = compute_hash_shift(nodes, numnodes); 158 memnode_shift = compute_hash_shift(nodes, 8);
159 if (memnode_shift < 0) { 159 if (memnode_shift < 0) {
160 printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); 160 printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
161 return -1; 161 return -1;
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 6ef9f9a76235..b2fac14baac0 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -25,8 +25,7 @@
25struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 25struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
26bootmem_data_t plat_node_bdata[MAX_NUMNODES]; 26bootmem_data_t plat_node_bdata[MAX_NUMNODES];
27 27
28int memnode_shift; 28struct memnode memnode;
29u8 memnodemap[NODEMAPSIZE];
30 29
31unsigned char cpu_to_node[NR_CPUS] __read_mostly = { 30unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
32 [0 ... NR_CPUS-1] = NUMA_NO_NODE 31 [0 ... NR_CPUS-1] = NUMA_NO_NODE
@@ -47,7 +46,7 @@ int numa_off __initdata;
47 * -1 if node overlap or lost ram (shift too big) 46 * -1 if node overlap or lost ram (shift too big)
48 */ 47 */
49static int __init 48static int __init
50populate_memnodemap(const struct node *nodes, int numnodes, int shift) 49populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
51{ 50{
52 int i; 51 int i;
53 int res = -1; 52 int res = -1;
@@ -74,7 +73,7 @@ populate_memnodemap(const struct node *nodes, int numnodes, int shift)
74 return res; 73 return res;
75} 74}
76 75
77int __init compute_hash_shift(struct node *nodes, int numnodes) 76int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
78{ 77{
79 int shift = 20; 78 int shift = 20;
80 79
@@ -101,11 +100,30 @@ int early_pfn_to_nid(unsigned long pfn)
101} 100}
102#endif 101#endif
103 102
103static void * __init
104early_node_mem(int nodeid, unsigned long start, unsigned long end,
105 unsigned long size)
106{
107 unsigned long mem = find_e820_area(start, end, size);
108 void *ptr;
109 if (mem != -1L)
110 return __va(mem);
111 ptr = __alloc_bootmem_nopanic(size,
112 SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
113 if (ptr == 0) {
114 printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
115 size, nodeid);
116 return NULL;
117 }
118 return ptr;
119}
120
104/* Initialize bootmem allocator for a node */ 121/* Initialize bootmem allocator for a node */
105void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) 122void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
106{ 123{
107 unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; 124 unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
108 unsigned long nodedata_phys; 125 unsigned long nodedata_phys;
126 void *bootmap;
109 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); 127 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
110 128
111 start = round_up(start, ZONE_ALIGN); 129 start = round_up(start, ZONE_ALIGN);
@@ -115,13 +133,11 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
115 start_pfn = start >> PAGE_SHIFT; 133 start_pfn = start >> PAGE_SHIFT;
116 end_pfn = end >> PAGE_SHIFT; 134 end_pfn = end >> PAGE_SHIFT;
117 135
118 nodedata_phys = find_e820_area(start, end, pgdat_size); 136 node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size);
119 if (nodedata_phys == -1L) 137 if (node_data[nodeid] == NULL)
120 panic("Cannot find memory pgdat in node %d\n", nodeid); 138 return;
121 139 nodedata_phys = __pa(node_data[nodeid]);
122 Dprintk("nodedata_phys %lx\n", nodedata_phys);
123 140
124 node_data[nodeid] = phys_to_virt(nodedata_phys);
125 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 141 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
126 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; 142 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
127 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 143 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
@@ -130,9 +146,15 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
130 /* Find a place for the bootmem map */ 146 /* Find a place for the bootmem map */
131 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 147 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
132 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); 148 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
133 bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT); 149 bootmap = early_node_mem(nodeid, bootmap_start, end,
134 if (bootmap_start == -1L) 150 bootmap_pages<<PAGE_SHIFT);
135 panic("Not enough continuous space for bootmap on node %d", nodeid); 151 if (bootmap == NULL) {
152 if (nodedata_phys < start || nodedata_phys >= end)
153 free_bootmem((unsigned long)node_data[nodeid],pgdat_size);
154 node_data[nodeid] = NULL;
155 return;
156 }
157 bootmap_start = __pa(bootmap);
136 Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); 158 Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
137 159
138 bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 160 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
@@ -143,13 +165,16 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
143 165
144 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); 166 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
145 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); 167 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
168#ifdef CONFIG_ACPI_NUMA
169 srat_reserve_add_area(nodeid);
170#endif
146 node_set_online(nodeid); 171 node_set_online(nodeid);
147} 172}
148 173
149/* Initialize final allocator for a zone */ 174/* Initialize final allocator for a zone */
150void __init setup_node_zones(int nodeid) 175void __init setup_node_zones(int nodeid)
151{ 176{
152 unsigned long start_pfn, end_pfn; 177 unsigned long start_pfn, end_pfn, memmapsize, limit;
153 unsigned long zones[MAX_NR_ZONES]; 178 unsigned long zones[MAX_NR_ZONES];
154 unsigned long holes[MAX_NR_ZONES]; 179 unsigned long holes[MAX_NR_ZONES];
155 180
@@ -159,6 +184,18 @@ void __init setup_node_zones(int nodeid)
159 Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", 184 Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n",
160 nodeid, start_pfn, end_pfn); 185 nodeid, start_pfn, end_pfn);
161 186
187 /* Try to allocate mem_map at end to not fill up precious <4GB
188 memory. */
189 memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
190 limit = end_pfn << PAGE_SHIFT;
191#ifdef CONFIG_FLAT_NODE_MEM_MAP
192 NODE_DATA(nodeid)->node_mem_map =
193 __alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
194 memmapsize, SMP_CACHE_BYTES,
195 round_down(limit - memmapsize, PAGE_SIZE),
196 limit);
197#endif
198
162 size_zones(zones, holes, start_pfn, end_pfn); 199 size_zones(zones, holes, start_pfn, end_pfn);
163 free_area_init_node(nodeid, NODE_DATA(nodeid), zones, 200 free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
164 start_pfn, holes); 201 start_pfn, holes);
@@ -191,7 +228,7 @@ int numa_fake __initdata = 0;
191static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) 228static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
192{ 229{
193 int i; 230 int i;
194 struct node nodes[MAX_NUMNODES]; 231 struct bootnode nodes[MAX_NUMNODES];
195 unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; 232 unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
196 233
197 /* Kludge needed for the hash function */ 234 /* Kludge needed for the hash function */
@@ -326,6 +363,8 @@ __init int numa_setup(char *opt)
326#ifdef CONFIG_ACPI_NUMA 363#ifdef CONFIG_ACPI_NUMA
327 if (!strncmp(opt,"noacpi",6)) 364 if (!strncmp(opt,"noacpi",6))
328 acpi_numa = -1; 365 acpi_numa = -1;
366 if (!strncmp(opt,"hotadd=", 7))
367 hotadd_percent = simple_strtoul(opt+7, NULL, 10);
329#endif 368#endif
330 return 1; 369 return 1;
331} 370}
@@ -351,14 +390,13 @@ void __init init_cpu_to_node(void)
351 continue; 390 continue;
352 if (apicid_to_node[apicid] == NUMA_NO_NODE) 391 if (apicid_to_node[apicid] == NUMA_NO_NODE)
353 continue; 392 continue;
354 cpu_to_node[i] = apicid_to_node[apicid]; 393 numa_set_node(i,apicid_to_node[apicid]);
355 } 394 }
356} 395}
357 396
358EXPORT_SYMBOL(cpu_to_node); 397EXPORT_SYMBOL(cpu_to_node);
359EXPORT_SYMBOL(node_to_cpumask); 398EXPORT_SYMBOL(node_to_cpumask);
360EXPORT_SYMBOL(memnode_shift); 399EXPORT_SYMBOL(memnode);
361EXPORT_SYMBOL(memnodemap);
362EXPORT_SYMBOL(node_data); 400EXPORT_SYMBOL(node_data);
363 401
364#ifdef CONFIG_DISCONTIGMEM 402#ifdef CONFIG_DISCONTIGMEM
@@ -369,21 +407,6 @@ EXPORT_SYMBOL(node_data);
369 * Should do that. 407 * Should do that.
370 */ 408 */
371 409
372/* Requires pfn_valid(pfn) to be true */
373struct page *pfn_to_page(unsigned long pfn)
374{
375 int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT);
376 return (pfn - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map;
377}
378EXPORT_SYMBOL(pfn_to_page);
379
380unsigned long page_to_pfn(struct page *page)
381{
382 return (long)(((page) - page_zone(page)->zone_mem_map) +
383 page_zone(page)->zone_start_pfn);
384}
385EXPORT_SYMBOL(page_to_pfn);
386
387int pfn_valid(unsigned long pfn) 410int pfn_valid(unsigned long pfn)
388{ 411{
389 unsigned nid; 412 unsigned nid;
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 35f1f1aab063..531ad21447b1 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -45,6 +45,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
45 pte_t *pbase; 45 pte_t *pbase;
46 if (!base) 46 if (!base)
47 return NULL; 47 return NULL;
48 /*
49 * page_private is used to track the number of entries in
50 * the page table page have non standard attributes.
51 */
52 SetPagePrivate(base);
53 page_private(base) = 0;
54
48 address = __pa(address); 55 address = __pa(address);
49 addr = address & LARGE_PAGE_MASK; 56 addr = address & LARGE_PAGE_MASK;
50 pbase = (pte_t *)page_address(base); 57 pbase = (pte_t *)page_address(base);
@@ -77,26 +84,12 @@ static inline void flush_map(unsigned long address)
77 on_each_cpu(flush_kernel_map, (void *)address, 1, 1); 84 on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
78} 85}
79 86
80struct deferred_page { 87static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
81 struct deferred_page *next;
82 struct page *fpage;
83 unsigned long address;
84};
85static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
86 88
87static inline void save_page(unsigned long address, struct page *fpage) 89static inline void save_page(struct page *fpage)
88{ 90{
89 struct deferred_page *df; 91 fpage->lru.next = (struct list_head *)deferred_pages;
90 df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); 92 deferred_pages = fpage;
91 if (!df) {
92 flush_map(address);
93 __free_page(fpage);
94 } else {
95 df->next = df_list;
96 df->fpage = fpage;
97 df->address = address;
98 df_list = df;
99 }
100} 93}
101 94
102/* 95/*
@@ -138,8 +131,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
138 set_pte(kpte, pfn_pte(pfn, prot)); 131 set_pte(kpte, pfn_pte(pfn, prot));
139 } else { 132 } else {
140 /* 133 /*
141 * split_large_page will take the reference for this change_page_attr 134 * split_large_page will take the reference for this
142 * on the split page. 135 * change_page_attr on the split page.
143 */ 136 */
144 137
145 struct page *split; 138 struct page *split;
@@ -151,23 +144,20 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
151 set_pte(kpte,mk_pte(split, ref_prot2)); 144 set_pte(kpte,mk_pte(split, ref_prot2));
152 kpte_page = split; 145 kpte_page = split;
153 } 146 }
154 get_page(kpte_page); 147 page_private(kpte_page)++;
155 } else if ((kpte_flags & _PAGE_PSE) == 0) { 148 } else if ((kpte_flags & _PAGE_PSE) == 0) {
156 set_pte(kpte, pfn_pte(pfn, ref_prot)); 149 set_pte(kpte, pfn_pte(pfn, ref_prot));
157 __put_page(kpte_page); 150 BUG_ON(page_private(kpte_page) == 0);
151 page_private(kpte_page)--;
158 } else 152 } else
159 BUG(); 153 BUG();
160 154
161 /* on x86-64 the direct mapping set at boot is not using 4k pages */ 155 /* on x86-64 the direct mapping set at boot is not using 4k pages */
162 BUG_ON(PageReserved(kpte_page)); 156 BUG_ON(PageReserved(kpte_page));
163 157
164 switch (page_count(kpte_page)) { 158 if (page_private(kpte_page) == 0) {
165 case 1: 159 save_page(kpte_page);
166 save_page(address, kpte_page);
167 revert_page(address, ref_prot); 160 revert_page(address, ref_prot);
168 break;
169 case 0:
170 BUG(); /* memleak and failed 2M page regeneration */
171 } 161 }
172 return 0; 162 return 0;
173} 163}
@@ -220,17 +210,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
220 210
221void global_flush_tlb(void) 211void global_flush_tlb(void)
222{ 212{
223 struct deferred_page *df, *next_df; 213 struct page *dpage;
224 214
225 down_read(&init_mm.mmap_sem); 215 down_read(&init_mm.mmap_sem);
226 df = xchg(&df_list, NULL); 216 dpage = xchg(&deferred_pages, NULL);
227 up_read(&init_mm.mmap_sem); 217 up_read(&init_mm.mmap_sem);
228 flush_map((df && !df->next) ? df->address : 0); 218
229 for (; df; df = next_df) { 219 flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
230 next_df = df->next; 220 while (dpage) {
231 if (df->fpage) 221 struct page *tmp = dpage;
232 __free_page(df->fpage); 222 dpage = (struct page *)dpage->lru.next;
233 kfree(df); 223 ClearPagePrivate(tmp);
224 __free_page(tmp);
234 } 225 }
235} 226}
236 227
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index cd25300726fc..474df22c6ed2 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -15,15 +15,29 @@
15#include <linux/bitmap.h> 15#include <linux/bitmap.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/topology.h> 17#include <linux/topology.h>
18#include <linux/bootmem.h>
19#include <linux/mm.h>
18#include <asm/proto.h> 20#include <asm/proto.h>
19#include <asm/numa.h> 21#include <asm/numa.h>
20#include <asm/e820.h> 22#include <asm/e820.h>
21 23
24#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
25 defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \
26 && !defined(CONFIG_MEMORY_HOTPLUG)
27#define RESERVE_HOTADD 1
28#endif
29
22static struct acpi_table_slit *acpi_slit; 30static struct acpi_table_slit *acpi_slit;
23 31
24static nodemask_t nodes_parsed __initdata; 32static nodemask_t nodes_parsed __initdata;
25static nodemask_t nodes_found __initdata; 33static nodemask_t nodes_found __initdata;
26static struct node nodes[MAX_NUMNODES] __initdata; 34static struct bootnode nodes[MAX_NUMNODES] __initdata;
35static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
36static int found_add_area __initdata;
37int hotadd_percent __initdata = 0;
38#ifndef RESERVE_HOTADD
39#define hotadd_percent 0 /* Ignore all settings */
40#endif
27static u8 pxm2node[256] = { [0 ... 255] = 0xff }; 41static u8 pxm2node[256] = { [0 ... 255] = 0xff };
28 42
29/* Too small nodes confuse the VM badly. Usually they result 43/* Too small nodes confuse the VM badly. Usually they result
@@ -57,7 +71,7 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end)
57{ 71{
58 int i; 72 int i;
59 for_each_node_mask(i, nodes_parsed) { 73 for_each_node_mask(i, nodes_parsed) {
60 struct node *nd = &nodes[i]; 74 struct bootnode *nd = &nodes[i];
61 if (nd->start == nd->end) 75 if (nd->start == nd->end)
62 continue; 76 continue;
63 if (nd->end > start && nd->start < end) 77 if (nd->end > start && nd->start < end)
@@ -70,7 +84,11 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end)
70 84
71static __init void cutoff_node(int i, unsigned long start, unsigned long end) 85static __init void cutoff_node(int i, unsigned long start, unsigned long end)
72{ 86{
73 struct node *nd = &nodes[i]; 87 struct bootnode *nd = &nodes[i];
88
89 if (found_add_area)
90 return;
91
74 if (nd->start < start) { 92 if (nd->start < start) {
75 nd->start = start; 93 nd->start = start;
76 if (nd->end < nd->start) 94 if (nd->end < nd->start)
@@ -88,8 +106,11 @@ static __init void bad_srat(void)
88 int i; 106 int i;
89 printk(KERN_ERR "SRAT: SRAT not used.\n"); 107 printk(KERN_ERR "SRAT: SRAT not used.\n");
90 acpi_numa = -1; 108 acpi_numa = -1;
109 found_add_area = 0;
91 for (i = 0; i < MAX_LOCAL_APIC; i++) 110 for (i = 0; i < MAX_LOCAL_APIC; i++)
92 apicid_to_node[i] = NUMA_NO_NODE; 111 apicid_to_node[i] = NUMA_NO_NODE;
112 for (i = 0; i < MAX_NUMNODES; i++)
113 nodes_add[i].start = nodes[i].end = 0;
93} 114}
94 115
95static __init inline int srat_disabled(void) 116static __init inline int srat_disabled(void)
@@ -137,7 +158,8 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
137 int pxm, node; 158 int pxm, node;
138 if (srat_disabled()) 159 if (srat_disabled())
139 return; 160 return;
140 if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) { bad_srat(); 161 if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) {
162 bad_srat();
141 return; 163 return;
142 } 164 }
143 if (pa->flags.enabled == 0) 165 if (pa->flags.enabled == 0)
@@ -155,11 +177,116 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
155 pxm, pa->apic_id, node); 177 pxm, pa->apic_id, node);
156} 178}
157 179
180#ifdef RESERVE_HOTADD
181/*
182 * Protect against too large hotadd areas that would fill up memory.
183 */
184static int hotadd_enough_memory(struct bootnode *nd)
185{
186 static unsigned long allocated;
187 static unsigned long last_area_end;
188 unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
189 long mem = pages * sizeof(struct page);
190 unsigned long addr;
191 unsigned long allowed;
192 unsigned long oldpages = pages;
193
194 if (mem < 0)
195 return 0;
196 allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE;
197 allowed = (allowed / 100) * hotadd_percent;
198 if (allocated + mem > allowed) {
199 unsigned long range;
200 /* Give them at least part of their hotadd memory upto hotadd_percent
201 It would be better to spread the limit out
202 over multiple hotplug areas, but that is too complicated
203 right now */
204 if (allocated >= allowed)
205 return 0;
206 range = allowed - allocated;
207 pages = (range / PAGE_SIZE);
208 mem = pages * sizeof(struct page);
209 nd->end = nd->start + range;
210 }
211 /* Not completely fool proof, but a good sanity check */
212 addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
213 if (addr == -1UL)
214 return 0;
215 if (pages != oldpages)
216 printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
217 pages << PAGE_SHIFT);
218 last_area_end = addr + mem;
219 allocated += mem;
220 return 1;
221}
222
223/*
224 * It is fine to add this area to the nodes data it will be used later
225 * This code supports one contigious hot add area per node.
226 */
227static int reserve_hotadd(int node, unsigned long start, unsigned long end)
228{
229 unsigned long s_pfn = start >> PAGE_SHIFT;
230 unsigned long e_pfn = end >> PAGE_SHIFT;
231 int changed = 0;
232 struct bootnode *nd = &nodes_add[node];
233
234 /* I had some trouble with strange memory hotadd regions breaking
235 the boot. Be very strict here and reject anything unexpected.
236 If you want working memory hotadd write correct SRATs.
237
238 The node size check is a basic sanity check to guard against
239 mistakes */
240 if ((signed long)(end - start) < NODE_MIN_SIZE) {
241 printk(KERN_ERR "SRAT: Hotplug area too small\n");
242 return -1;
243 }
244
245 /* This check might be a bit too strict, but I'm keeping it for now. */
246 if (e820_hole_size(s_pfn, e_pfn) != e_pfn - s_pfn) {
247 printk(KERN_ERR "SRAT: Hotplug area has existing memory\n");
248 return -1;
249 }
250
251 if (!hotadd_enough_memory(&nodes_add[node])) {
252 printk(KERN_ERR "SRAT: Hotplug area too large\n");
253 return -1;
254 }
255
256 /* Looks good */
257
258 found_add_area = 1;
259 if (nd->start == nd->end) {
260 nd->start = start;
261 nd->end = end;
262 changed = 1;
263 } else {
264 if (nd->start == end) {
265 nd->start = start;
266 changed = 1;
267 }
268 if (nd->end == start) {
269 nd->end = end;
270 changed = 1;
271 }
272 if (!changed)
273 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
274 }
275
276 if ((nd->end >> PAGE_SHIFT) > end_pfn)
277 end_pfn = nd->end >> PAGE_SHIFT;
278
279 if (changed)
280 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
281 return 0;
282}
283#endif
284
158/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 285/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
159void __init 286void __init
160acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) 287acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
161{ 288{
162 struct node *nd; 289 struct bootnode *nd, oldnode;
163 unsigned long start, end; 290 unsigned long start, end;
164 int node, pxm; 291 int node, pxm;
165 int i; 292 int i;
@@ -172,6 +299,8 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
172 } 299 }
173 if (ma->flags.enabled == 0) 300 if (ma->flags.enabled == 0)
174 return; 301 return;
302 if (ma->flags.hot_pluggable && hotadd_percent == 0)
303 return;
175 start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32); 304 start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
176 end = start + (ma->length_lo | ((u64)ma->length_hi << 32)); 305 end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
177 pxm = ma->proximity_domain; 306 pxm = ma->proximity_domain;
@@ -181,10 +310,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
181 bad_srat(); 310 bad_srat();
182 return; 311 return;
183 } 312 }
184 /* It is fine to add this area to the nodes data it will be used later*/
185 if (ma->flags.hot_pluggable == 1)
186 printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n",
187 start, end);
188 i = conflicting_nodes(start, end); 313 i = conflicting_nodes(start, end);
189 if (i == node) { 314 if (i == node) {
190 printk(KERN_WARNING 315 printk(KERN_WARNING
@@ -199,6 +324,7 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
199 return; 324 return;
200 } 325 }
201 nd = &nodes[node]; 326 nd = &nodes[node];
327 oldnode = *nd;
202 if (!node_test_and_set(node, nodes_parsed)) { 328 if (!node_test_and_set(node, nodes_parsed)) {
203 nd->start = start; 329 nd->start = start;
204 nd->end = end; 330 nd->end = end;
@@ -208,8 +334,19 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
208 if (nd->end < end) 334 if (nd->end < end)
209 nd->end = end; 335 nd->end = end;
210 } 336 }
337
211 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, 338 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
212 nd->start, nd->end); 339 nd->start, nd->end);
340
341#ifdef RESERVE_HOTADD
342 if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
343 /* Ignore hotadd region. Undo damage */
344 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
345 *nd = oldnode;
346 if ((nd->start | nd->end) == 0)
347 node_clear(node, nodes_parsed);
348 }
349#endif
213} 350}
214 351
215/* Sanity check to catch more bad SRATs (they are amazingly common). 352/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -225,10 +362,14 @@ static int nodes_cover_memory(void)
225 unsigned long e = nodes[i].end >> PAGE_SHIFT; 362 unsigned long e = nodes[i].end >> PAGE_SHIFT;
226 pxmram += e - s; 363 pxmram += e - s;
227 pxmram -= e820_hole_size(s, e); 364 pxmram -= e820_hole_size(s, e);
365 pxmram -= nodes_add[i].end - nodes_add[i].start;
366 if ((long)pxmram < 0)
367 pxmram = 0;
228 } 368 }
229 369
230 e820ram = end_pfn - e820_hole_size(0, end_pfn); 370 e820ram = end_pfn - e820_hole_size(0, end_pfn);
231 if (pxmram < e820ram) { 371 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
372 if ((long)(e820ram - pxmram) >= 1*1024*1024) {
232 printk(KERN_ERR 373 printk(KERN_ERR
233 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", 374 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
234 (pxmram << PAGE_SHIFT) >> 20, 375 (pxmram << PAGE_SHIFT) >> 20,
@@ -257,9 +398,11 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
257 398
258 /* First clean up the node list */ 399 /* First clean up the node list */
259 for (i = 0; i < MAX_NUMNODES; i++) { 400 for (i = 0; i < MAX_NUMNODES; i++) {
260 cutoff_node(i, start, end); 401 cutoff_node(i, start, end);
261 if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) 402 if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
262 unparse_node(i); 403 unparse_node(i);
404 node_set_offline(i);
405 }
263 } 406 }
264 407
265 if (acpi_numa <= 0) 408 if (acpi_numa <= 0)
@@ -270,7 +413,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
270 return -1; 413 return -1;
271 } 414 }
272 415
273 memnode_shift = compute_hash_shift(nodes, nodes_weight(nodes_parsed)); 416 memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
274 if (memnode_shift < 0) { 417 if (memnode_shift < 0) {
275 printk(KERN_ERR 418 printk(KERN_ERR
276 "SRAT: No NUMA node hash function found. Contact maintainer\n"); 419 "SRAT: No NUMA node hash function found. Contact maintainer\n");
@@ -281,6 +424,12 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
281 /* Finally register nodes */ 424 /* Finally register nodes */
282 for_each_node_mask(i, nodes_parsed) 425 for_each_node_mask(i, nodes_parsed)
283 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 426 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
427 /* Try again in case setup_node_bootmem missed one due
428 to missing bootmem */
429 for_each_node_mask(i, nodes_parsed)
430 if (!node_online(i))
431 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
432
284 for (i = 0; i < NR_CPUS; i++) { 433 for (i = 0; i < NR_CPUS; i++) {
285 if (cpu_to_node[i] == NUMA_NO_NODE) 434 if (cpu_to_node[i] == NUMA_NO_NODE)
286 continue; 435 continue;
@@ -302,6 +451,25 @@ static int node_to_pxm(int n)
302 return 0; 451 return 0;
303} 452}
304 453
454void __init srat_reserve_add_area(int nodeid)
455{
456 if (found_add_area && nodes_add[nodeid].end) {
457 u64 total_mb;
458
459 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
460 "for node %d at %Lx-%Lx\n",
461 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
462 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
463 >> PAGE_SHIFT;
464 total_mb *= sizeof(struct page);
465 total_mb >>= 20;
466 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
467 "pre-allocated memory.\n", (unsigned long long)total_mb);
468 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
469 nodes_add[nodeid].end - nodes_add[nodeid].start);
470 }
471}
472
305int __node_distance(int a, int b) 473int __node_distance(int a, int b)
306{ 474{
307 int index; 475 int index;
diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile
index a8f75a2a0f6f..a3f6ad570179 100644
--- a/arch/x86_64/pci/Makefile
+++ b/arch/x86_64/pci/Makefile
@@ -7,7 +7,7 @@ CFLAGS += -Iarch/i386/pci
7 7
8obj-y := i386.o 8obj-y := i386.o
9obj-$(CONFIG_PCI_DIRECT)+= direct.o 9obj-$(CONFIG_PCI_DIRECT)+= direct.o
10obj-y += fixup.o 10obj-y += fixup.o init.o
11obj-$(CONFIG_ACPI) += acpi.o 11obj-$(CONFIG_ACPI) += acpi.o
12obj-y += legacy.o irq.o common.o 12obj-y += legacy.o irq.o common.o
13# mmconfig has a 64bit special 13# mmconfig has a 64bit special
@@ -22,3 +22,4 @@ irq-y += ../../i386/pci/irq.o
22common-y += ../../i386/pci/common.o 22common-y += ../../i386/pci/common.o
23fixup-y += ../../i386/pci/fixup.o 23fixup-y += ../../i386/pci/fixup.o
24i386-y += ../../i386/pci/i386.o 24i386-y += ../../i386/pci/i386.o
25init-y += ../../i386/pci/init.o
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index 18f371fe37f8..a2060e4d5de6 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -9,11 +9,16 @@
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/acpi.h> 10#include <linux/acpi.h>
11#include <linux/bitmap.h> 11#include <linux/bitmap.h>
12#include <asm/e820.h>
13
12#include "pci.h" 14#include "pci.h"
13 15
14#define MMCONFIG_APER_SIZE (256*1024*1024) 16#define MMCONFIG_APER_SIZE (256*1024*1024)
17/* Verify the first 16 busses. We assume that systems with more busses
18 get MCFG right. */
19#define MAX_CHECK_BUS 16
15 20
16static DECLARE_BITMAP(fallback_slots, 32); 21static DECLARE_BITMAP(fallback_slots, 32*MAX_CHECK_BUS);
17 22
18/* Static virtual mapping of the MMCONFIG aperture */ 23/* Static virtual mapping of the MMCONFIG aperture */
19struct mmcfg_virt { 24struct mmcfg_virt {
@@ -55,7 +60,8 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus)
55static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) 60static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
56{ 61{
57 char __iomem *addr; 62 char __iomem *addr;
58 if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots)) 63 if (seg == 0 && bus < MAX_CHECK_BUS &&
64 test_bit(32*bus + PCI_SLOT(devfn), fallback_slots))
59 return NULL; 65 return NULL;
60 addr = get_virt(seg, bus); 66 addr = get_virt(seg, bus);
61 if (!addr) 67 if (!addr)
@@ -69,8 +75,10 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
69 char __iomem *addr; 75 char __iomem *addr;
70 76
71 /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ 77 /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
72 if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) 78 if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
79 *value = -1;
73 return -EINVAL; 80 return -EINVAL;
81 }
74 82
75 addr = pci_dev_base(seg, bus, devfn); 83 addr = pci_dev_base(seg, bus, devfn);
76 if (!addr) 84 if (!addr)
@@ -129,43 +137,56 @@ static struct pci_raw_ops pci_mmcfg = {
129 Normally this can be expressed in the MCFG by not listing them 137 Normally this can be expressed in the MCFG by not listing them
130 and assigning suitable _SEGs, but this isn't implemented in some BIOS. 138 and assigning suitable _SEGs, but this isn't implemented in some BIOS.
131 Instead try to discover all devices on bus 0 that are unreachable using MM 139 Instead try to discover all devices on bus 0 that are unreachable using MM
132 and fallback for them. 140 and fallback for them. */
133 We only do this for bus 0/seg 0 */
134static __init void unreachable_devices(void) 141static __init void unreachable_devices(void)
135{ 142{
136 int i; 143 int i, k;
137 for (i = 0; i < 32; i++) { 144 /* Use the max bus number from ACPI here? */
138 u32 val1; 145 for (k = 0; k < MAX_CHECK_BUS; k++) {
139 char __iomem *addr; 146 for (i = 0; i < 32; i++) {
140 147 u32 val1;
141 pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1); 148 char __iomem *addr;
142 if (val1 == 0xffffffff) 149
143 continue; 150 pci_conf1_read(0, k, PCI_DEVFN(i,0), 0, 4, &val1);
144 addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0)); 151 if (val1 == 0xffffffff)
145 if (addr == NULL|| readl(addr) != val1) { 152 continue;
146 set_bit(i, &fallback_slots); 153 addr = pci_dev_base(0, k, PCI_DEVFN(i, 0));
154 if (addr == NULL|| readl(addr) != val1) {
155 set_bit(i + 32*k, fallback_slots);
156 printk(KERN_NOTICE
157 "PCI: No mmconfig possible on device %x:%x\n",
158 k, i);
159 }
147 } 160 }
148 } 161 }
149} 162}
150 163
151static int __init pci_mmcfg_init(void) 164void __init pci_mmcfg_init(void)
152{ 165{
153 int i; 166 int i;
154 167
155 if ((pci_probe & PCI_PROBE_MMCONF) == 0) 168 if ((pci_probe & PCI_PROBE_MMCONF) == 0)
156 return 0; 169 return;
157 170
158 acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg); 171 acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
159 if ((pci_mmcfg_config_num == 0) || 172 if ((pci_mmcfg_config_num == 0) ||
160 (pci_mmcfg_config == NULL) || 173 (pci_mmcfg_config == NULL) ||
161 (pci_mmcfg_config[0].base_address == 0)) 174 (pci_mmcfg_config[0].base_address == 0))
162 return 0; 175 return;
176
177 if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
178 pci_mmcfg_config[0].base_address + MMCONFIG_APER_SIZE,
179 E820_RESERVED)) {
180 printk(KERN_ERR "PCI: BIOS Bug: MCFG area is not E820-reserved\n");
181 printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
182 return;
183 }
163 184
164 /* RED-PEN i386 doesn't do _nocache right now */ 185 /* RED-PEN i386 doesn't do _nocache right now */
165 pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); 186 pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
166 if (pci_mmcfg_virt == NULL) { 187 if (pci_mmcfg_virt == NULL) {
167 printk("PCI: Can not allocate memory for mmconfig structures\n"); 188 printk("PCI: Can not allocate memory for mmconfig structures\n");
168 return 0; 189 return;
169 } 190 }
170 for (i = 0; i < pci_mmcfg_config_num; ++i) { 191 for (i = 0; i < pci_mmcfg_config_num; ++i) {
171 pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; 192 pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i];
@@ -173,7 +194,7 @@ static int __init pci_mmcfg_init(void)
173 if (!pci_mmcfg_virt[i].virt) { 194 if (!pci_mmcfg_virt[i].virt) {
174 printk("PCI: Cannot map mmconfig aperture for segment %d\n", 195 printk("PCI: Cannot map mmconfig aperture for segment %d\n",
175 pci_mmcfg_config[i].pci_segment_group_number); 196 pci_mmcfg_config[i].pci_segment_group_number);
176 return 0; 197 return;
177 } 198 }
178 printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[i].base_address); 199 printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[i].base_address);
179 } 200 }
@@ -182,8 +203,4 @@ static int __init pci_mmcfg_init(void)
182 203
183 raw_pci_ops = &pci_mmcfg; 204 raw_pci_ops = &pci_mmcfg;
184 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 205 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
185
186 return 0;
187} 206}
188
189arch_initcall(pci_mmcfg_init);