aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2005-12-06 17:31:30 -0500
committerLen Brown <len.brown@intel.com>2005-12-06 17:31:30 -0500
commit3d5271f9883cba7b54762bc4fe027d4172f06db7 (patch)
treeab8a881a14478598a0c8bda0d26c62cdccfffd6d /arch/x86_64
parent378b2556f4e09fa6f87ff0cb5c4395ff28257d02 (diff)
parent9115a6c787596e687df03010d97fccc5e0762506 (diff)
Pull release into acpica branch
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig70
-rw-r--r--arch/x86_64/Kconfig.debug19
-rw-r--r--arch/x86_64/defconfig98
-rw-r--r--arch/x86_64/ia32/ia32_aout.c4
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c4
-rw-r--r--arch/x86_64/ia32/ia32_ioctl.c131
-rw-r--r--arch/x86_64/ia32/ia32_signal.c6
-rw-r--r--arch/x86_64/kernel/Makefile3
-rw-r--r--arch/x86_64/kernel/aperture.c2
-rw-r--r--arch/x86_64/kernel/apic.c10
-rw-r--r--arch/x86_64/kernel/e820.c3
-rw-r--r--arch/x86_64/kernel/entry.S3
-rw-r--r--arch/x86_64/kernel/head.S77
-rw-r--r--arch/x86_64/kernel/head64.c14
-rw-r--r--arch/x86_64/kernel/i8259.c12
-rw-r--r--arch/x86_64/kernel/io_apic.c80
-rw-r--r--arch/x86_64/kernel/kprobes.c191
-rw-r--r--arch/x86_64/kernel/mce.c27
-rw-r--r--arch/x86_64/kernel/mce_amd.c538
-rw-r--r--arch/x86_64/kernel/mpparse.c23
-rw-r--r--arch/x86_64/kernel/pci-gart.c12
-rw-r--r--arch/x86_64/kernel/pci-nommu.c2
-rw-r--r--arch/x86_64/kernel/process.c123
-rw-r--r--arch/x86_64/kernel/ptrace.c43
-rw-r--r--arch/x86_64/kernel/reboot.c7
-rw-r--r--arch/x86_64/kernel/setup.c118
-rw-r--r--arch/x86_64/kernel/setup64.c6
-rw-r--r--arch/x86_64/kernel/signal.c17
-rw-r--r--arch/x86_64/kernel/smp.c7
-rw-r--r--arch/x86_64/kernel/smpboot.c120
-rw-r--r--arch/x86_64/kernel/suspend.c92
-rw-r--r--arch/x86_64/kernel/suspend_asm.S17
-rw-r--r--arch/x86_64/kernel/sys_x86_64.c14
-rw-r--r--arch/x86_64/kernel/time.c35
-rw-r--r--arch/x86_64/kernel/traps.c44
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c3
-rw-r--r--arch/x86_64/lib/bitops.c66
-rw-r--r--arch/x86_64/lib/clear_page.S38
-rw-r--r--arch/x86_64/lib/copy_page.S87
-rw-r--r--arch/x86_64/lib/memcpy.S93
-rw-r--r--arch/x86_64/lib/memset.S94
-rw-r--r--arch/x86_64/mm/fault.c19
-rw-r--r--arch/x86_64/mm/init.c129
-rw-r--r--arch/x86_64/mm/ioremap.c4
-rw-r--r--arch/x86_64/mm/k8topology.c1
-rw-r--r--arch/x86_64/mm/numa.c130
-rw-r--r--arch/x86_64/mm/pageattr.c2
-rw-r--r--arch/x86_64/mm/srat.c6
-rw-r--r--arch/x86_64/oprofile/Kconfig6
50 files changed, 1546 insertions, 1106 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 21afa69a086d..6ece645e4dbe 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -226,22 +226,42 @@ config SCHED_SMT
226 226
227source "kernel/Kconfig.preempt" 227source "kernel/Kconfig.preempt"
228 228
229config K8_NUMA 229config NUMA
230 bool "K8 NUMA support" 230 bool "Non Uniform Memory Access (NUMA) Support"
231 select NUMA
232 depends on SMP 231 depends on SMP
233 help 232 help
234 Enable NUMA (Non Unified Memory Architecture) support for 233 Enable NUMA (Non Uniform Memory Access) support. The kernel
235 AMD Opteron Multiprocessor systems. The kernel will try to allocate 234 will try to allocate memory used by a CPU on the local memory
236 memory used by a CPU on the local memory controller of the CPU 235 controller of the CPU and add some more NUMA awareness to the kernel.
237 and add some more NUMA awareness to the kernel. 236 This code is recommended on all multiprocessor Opteron systems.
238 This code is recommended on all multiprocessor Opteron systems 237 If the system is EM64T, you should say N unless your system is EM64T
239 and normally doesn't hurt on others. 238 NUMA.
239
240config K8_NUMA
241 bool "Old style AMD Opteron NUMA detection"
242 depends on NUMA
243 default y
244 help
245 Enable K8 NUMA node topology detection. You should say Y here if
246 you have a multi processor AMD K8 system. This uses an old
247 method to read the NUMA configurtion directly from the builtin
248 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
249 instead, which also takes priority if both are compiled in.
250
251# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
252
253config X86_64_ACPI_NUMA
254 bool "ACPI NUMA detection"
255 depends on NUMA
256 select ACPI
257 select ACPI_NUMA
258 default y
259 help
260 Enable ACPI SRAT based node topology detection.
240 261
241config NUMA_EMU 262config NUMA_EMU
242 bool "NUMA emulation support" 263 bool "NUMA emulation"
243 select NUMA 264 depends on NUMA
244 depends on SMP
245 help 265 help
246 Enable NUMA emulation. A flat machine will be split 266 Enable NUMA emulation. A flat machine will be split
247 into virtual nodes when booted with "numa=fake=N", where N is the 267 into virtual nodes when booted with "numa=fake=N", where N is the
@@ -252,9 +272,6 @@ config ARCH_DISCONTIGMEM_ENABLE
252 depends on NUMA 272 depends on NUMA
253 default y 273 default y
254 274
255config NUMA
256 bool
257 default n
258 275
259config ARCH_DISCONTIGMEM_ENABLE 276config ARCH_DISCONTIGMEM_ENABLE
260 def_bool y 277 def_bool y
@@ -374,6 +391,14 @@ config X86_MCE_INTEL
374 Additional support for intel specific MCE features such as 391 Additional support for intel specific MCE features such as
375 the thermal monitor. 392 the thermal monitor.
376 393
394config X86_MCE_AMD
395 bool "AMD MCE features"
396 depends on X86_MCE && X86_LOCAL_APIC
397 default y
398 help
399 Additional support for AMD specific MCE features such as
400 the DRAM Error Threshold.
401
377config PHYSICAL_START 402config PHYSICAL_START
378 hex "Physical address where the kernel is loaded" if EMBEDDED 403 hex "Physical address where the kernel is loaded" if EMBEDDED
379 default "0x100000" 404 default "0x100000"
@@ -502,7 +527,7 @@ config IA32_EMULATION
502 left. 527 left.
503 528
504config IA32_AOUT 529config IA32_AOUT
505 bool "IA32 a.out support" 530 tristate "IA32 a.out support"
506 depends on IA32_EMULATION 531 depends on IA32_EMULATION
507 help 532 help
508 Support old a.out binaries in the 32bit emulation. 533 Support old a.out binaries in the 32bit emulation.
@@ -532,8 +557,21 @@ source "drivers/firmware/Kconfig"
532 557
533source fs/Kconfig 558source fs/Kconfig
534 559
560menu "Instrumentation Support"
561 depends on EXPERIMENTAL
562
535source "arch/x86_64/oprofile/Kconfig" 563source "arch/x86_64/oprofile/Kconfig"
536 564
565config KPROBES
566 bool "Kprobes (EXPERIMENTAL)"
567 help
568 Kprobes allows you to trap at almost any kernel address and
569 execute a callback function. register_kprobe() establishes
570 a probepoint and specifies the callback. Kprobes is useful
571 for kernel debugging, non-intrusive instrumentation and testing.
572 If in doubt, say "N".
573endmenu
574
537source "arch/x86_64/Kconfig.debug" 575source "arch/x86_64/Kconfig.debug"
538 576
539source "security/Kconfig" 577source "security/Kconfig"
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index 9cf1410d2f5a..e2c6e64a85ec 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -2,15 +2,6 @@ menu "Kernel hacking"
2 2
3source "lib/Kconfig.debug" 3source "lib/Kconfig.debug"
4 4
5# !SMP for now because the context switch early causes GPF in segment reloading
6# and the GS base checking does the wrong thing then, causing a hang.
7config CHECKING
8 bool "Additional run-time checks"
9 depends on DEBUG_KERNEL && !SMP
10 help
11 Enables some internal consistency checks for kernel debugging.
12 You should normally say N.
13
14config INIT_DEBUG 5config INIT_DEBUG
15 bool "Debug __init statements" 6 bool "Debug __init statements"
16 depends on DEBUG_KERNEL 7 depends on DEBUG_KERNEL
@@ -33,16 +24,6 @@ config IOMMU_DEBUG
33 options. See Documentation/x86_64/boot-options.txt for more 24 options. See Documentation/x86_64/boot-options.txt for more
34 details. 25 details.
35 26
36config KPROBES
37 bool "Kprobes"
38 depends on DEBUG_KERNEL
39 help
40 Kprobes allows you to trap at almost any kernel address and
41 execute a callback function. register_kprobe() establishes
42 a probepoint and specifies the callback. Kprobes is useful
43 for kernel debugging, non-intrusive instrumentation and testing.
44 If in doubt, say "N".
45
46config IOMMU_LEAK 27config IOMMU_LEAK
47 bool "IOMMU leak tracing" 28 bool "IOMMU leak tracing"
48 depends on DEBUG_KERNEL 29 depends on DEBUG_KERNEL
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index f8db7e500fbf..5d56542fb68f 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.13-git11 3# Linux kernel version: 2.6.14-git7
4# Mon Sep 12 16:16:16 2005 4# Sat Nov 5 15:55:50 2005
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -35,7 +35,7 @@ CONFIG_POSIX_MQUEUE=y
35# CONFIG_BSD_PROCESS_ACCT is not set 35# CONFIG_BSD_PROCESS_ACCT is not set
36CONFIG_SYSCTL=y 36CONFIG_SYSCTL=y
37# CONFIG_AUDIT is not set 37# CONFIG_AUDIT is not set
38# CONFIG_HOTPLUG is not set 38CONFIG_HOTPLUG=y
39CONFIG_KOBJECT_UEVENT=y 39CONFIG_KOBJECT_UEVENT=y
40CONFIG_IKCONFIG=y 40CONFIG_IKCONFIG=y
41CONFIG_IKCONFIG_PROC=y 41CONFIG_IKCONFIG_PROC=y
@@ -93,10 +93,11 @@ CONFIG_PREEMPT_NONE=y
93# CONFIG_PREEMPT_VOLUNTARY is not set 93# CONFIG_PREEMPT_VOLUNTARY is not set
94# CONFIG_PREEMPT is not set 94# CONFIG_PREEMPT is not set
95CONFIG_PREEMPT_BKL=y 95CONFIG_PREEMPT_BKL=y
96CONFIG_NUMA=y
96CONFIG_K8_NUMA=y 97CONFIG_K8_NUMA=y
98CONFIG_X86_64_ACPI_NUMA=y
97# CONFIG_NUMA_EMU is not set 99# CONFIG_NUMA_EMU is not set
98CONFIG_ARCH_DISCONTIGMEM_ENABLE=y 100CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
99CONFIG_NUMA=y
100CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y 101CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
101CONFIG_ARCH_SPARSEMEM_ENABLE=y 102CONFIG_ARCH_SPARSEMEM_ENABLE=y
102CONFIG_SELECT_MEMORY_MODEL=y 103CONFIG_SELECT_MEMORY_MODEL=y
@@ -107,9 +108,10 @@ CONFIG_DISCONTIGMEM=y
107CONFIG_FLAT_NODE_MEM_MAP=y 108CONFIG_FLAT_NODE_MEM_MAP=y
108CONFIG_NEED_MULTIPLE_NODES=y 109CONFIG_NEED_MULTIPLE_NODES=y
109# CONFIG_SPARSEMEM_STATIC is not set 110# CONFIG_SPARSEMEM_STATIC is not set
111CONFIG_SPLIT_PTLOCK_CPUS=4
110CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y 112CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
111CONFIG_HAVE_DEC_LOCK=y
112CONFIG_NR_CPUS=32 113CONFIG_NR_CPUS=32
114CONFIG_HOTPLUG_CPU=y
113CONFIG_HPET_TIMER=y 115CONFIG_HPET_TIMER=y
114CONFIG_X86_PM_TIMER=y 116CONFIG_X86_PM_TIMER=y
115CONFIG_HPET_EMULATE_RTC=y 117CONFIG_HPET_EMULATE_RTC=y
@@ -117,6 +119,7 @@ CONFIG_GART_IOMMU=y
117CONFIG_SWIOTLB=y 119CONFIG_SWIOTLB=y
118CONFIG_X86_MCE=y 120CONFIG_X86_MCE=y
119CONFIG_X86_MCE_INTEL=y 121CONFIG_X86_MCE_INTEL=y
122CONFIG_X86_MCE_AMD=y
120CONFIG_PHYSICAL_START=0x100000 123CONFIG_PHYSICAL_START=0x100000
121# CONFIG_KEXEC is not set 124# CONFIG_KEXEC is not set
122CONFIG_SECCOMP=y 125CONFIG_SECCOMP=y
@@ -136,11 +139,15 @@ CONFIG_PM=y
136# CONFIG_PM_DEBUG is not set 139# CONFIG_PM_DEBUG is not set
137CONFIG_SOFTWARE_SUSPEND=y 140CONFIG_SOFTWARE_SUSPEND=y
138CONFIG_PM_STD_PARTITION="" 141CONFIG_PM_STD_PARTITION=""
142CONFIG_SUSPEND_SMP=y
139 143
140# 144#
141# ACPI (Advanced Configuration and Power Interface) Support 145# ACPI (Advanced Configuration and Power Interface) Support
142# 146#
143CONFIG_ACPI=y 147CONFIG_ACPI=y
148CONFIG_ACPI_SLEEP=y
149CONFIG_ACPI_SLEEP_PROC_FS=y
150CONFIG_ACPI_SLEEP_PROC_SLEEP=y
144CONFIG_ACPI_AC=y 151CONFIG_ACPI_AC=y
145CONFIG_ACPI_BATTERY=y 152CONFIG_ACPI_BATTERY=y
146CONFIG_ACPI_BUTTON=y 153CONFIG_ACPI_BUTTON=y
@@ -148,6 +155,7 @@ CONFIG_ACPI_BUTTON=y
148CONFIG_ACPI_HOTKEY=m 155CONFIG_ACPI_HOTKEY=m
149CONFIG_ACPI_FAN=y 156CONFIG_ACPI_FAN=y
150CONFIG_ACPI_PROCESSOR=y 157CONFIG_ACPI_PROCESSOR=y
158CONFIG_ACPI_HOTPLUG_CPU=y
151CONFIG_ACPI_THERMAL=y 159CONFIG_ACPI_THERMAL=y
152CONFIG_ACPI_NUMA=y 160CONFIG_ACPI_NUMA=y
153# CONFIG_ACPI_ASUS is not set 161# CONFIG_ACPI_ASUS is not set
@@ -158,7 +166,7 @@ CONFIG_ACPI_BLACKLIST_YEAR=2001
158CONFIG_ACPI_EC=y 166CONFIG_ACPI_EC=y
159CONFIG_ACPI_POWER=y 167CONFIG_ACPI_POWER=y
160CONFIG_ACPI_SYSTEM=y 168CONFIG_ACPI_SYSTEM=y
161# CONFIG_ACPI_CONTAINER is not set 169CONFIG_ACPI_CONTAINER=y
162 170
163# 171#
164# CPU Frequency scaling 172# CPU Frequency scaling
@@ -293,7 +301,6 @@ CONFIG_IPV6=y
293# Network testing 301# Network testing
294# 302#
295# CONFIG_NET_PKTGEN is not set 303# CONFIG_NET_PKTGEN is not set
296# CONFIG_NETFILTER_NETLINK is not set
297# CONFIG_HAMRADIO is not set 304# CONFIG_HAMRADIO is not set
298# CONFIG_IRDA is not set 305# CONFIG_IRDA is not set
299# CONFIG_BT is not set 306# CONFIG_BT is not set
@@ -312,6 +319,11 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y
312# CONFIG_DEBUG_DRIVER is not set 319# CONFIG_DEBUG_DRIVER is not set
313 320
314# 321#
322# Connector - unified userspace <-> kernelspace linker
323#
324# CONFIG_CONNECTOR is not set
325
326#
315# Memory Technology Devices (MTD) 327# Memory Technology Devices (MTD)
316# 328#
317# CONFIG_MTD is not set 329# CONFIG_MTD is not set
@@ -354,6 +366,11 @@ CONFIG_IOSCHED_NOOP=y
354# CONFIG_IOSCHED_AS is not set 366# CONFIG_IOSCHED_AS is not set
355CONFIG_IOSCHED_DEADLINE=y 367CONFIG_IOSCHED_DEADLINE=y
356CONFIG_IOSCHED_CFQ=y 368CONFIG_IOSCHED_CFQ=y
369# CONFIG_DEFAULT_AS is not set
370CONFIG_DEFAULT_DEADLINE=y
371# CONFIG_DEFAULT_CFQ is not set
372# CONFIG_DEFAULT_NOOP is not set
373CONFIG_DEFAULT_IOSCHED="cfq"
357# CONFIG_ATA_OVER_ETH is not set 374# CONFIG_ATA_OVER_ETH is not set
358 375
359# 376#
@@ -450,6 +467,7 @@ CONFIG_BLK_DEV_SD=y
450CONFIG_SCSI_SPI_ATTRS=y 467CONFIG_SCSI_SPI_ATTRS=y
451# CONFIG_SCSI_FC_ATTRS is not set 468# CONFIG_SCSI_FC_ATTRS is not set
452# CONFIG_SCSI_ISCSI_ATTRS is not set 469# CONFIG_SCSI_ISCSI_ATTRS is not set
470# CONFIG_SCSI_SAS_ATTRS is not set
453 471
454# 472#
455# SCSI low-level drivers 473# SCSI low-level drivers
@@ -469,20 +487,24 @@ CONFIG_AIC79XX_DEBUG_MASK=0
469# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set 487# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
470# CONFIG_MEGARAID_NEWGEN is not set 488# CONFIG_MEGARAID_NEWGEN is not set
471# CONFIG_MEGARAID_LEGACY is not set 489# CONFIG_MEGARAID_LEGACY is not set
490# CONFIG_MEGARAID_SAS is not set
472CONFIG_SCSI_SATA=y 491CONFIG_SCSI_SATA=y
473# CONFIG_SCSI_SATA_AHCI is not set 492# CONFIG_SCSI_SATA_AHCI is not set
474# CONFIG_SCSI_SATA_SVW is not set 493# CONFIG_SCSI_SATA_SVW is not set
475CONFIG_SCSI_ATA_PIIX=y 494CONFIG_SCSI_ATA_PIIX=y
476# CONFIG_SCSI_SATA_MV is not set 495# CONFIG_SCSI_SATA_MV is not set
477# CONFIG_SCSI_SATA_NV is not set 496CONFIG_SCSI_SATA_NV=y
478# CONFIG_SCSI_SATA_PROMISE is not set 497# CONFIG_SCSI_PDC_ADMA is not set
479# CONFIG_SCSI_SATA_QSTOR is not set 498# CONFIG_SCSI_SATA_QSTOR is not set
499# CONFIG_SCSI_SATA_PROMISE is not set
480# CONFIG_SCSI_SATA_SX4 is not set 500# CONFIG_SCSI_SATA_SX4 is not set
481# CONFIG_SCSI_SATA_SIL is not set 501# CONFIG_SCSI_SATA_SIL is not set
502# CONFIG_SCSI_SATA_SIL24 is not set
482# CONFIG_SCSI_SATA_SIS is not set 503# CONFIG_SCSI_SATA_SIS is not set
483# CONFIG_SCSI_SATA_ULI is not set 504# CONFIG_SCSI_SATA_ULI is not set
484CONFIG_SCSI_SATA_VIA=y 505CONFIG_SCSI_SATA_VIA=y
485# CONFIG_SCSI_SATA_VITESSE is not set 506# CONFIG_SCSI_SATA_VITESSE is not set
507CONFIG_SCSI_SATA_INTEL_COMBINED=y
486# CONFIG_SCSI_BUSLOGIC is not set 508# CONFIG_SCSI_BUSLOGIC is not set
487# CONFIG_SCSI_DMX3191D is not set 509# CONFIG_SCSI_DMX3191D is not set
488# CONFIG_SCSI_EATA is not set 510# CONFIG_SCSI_EATA is not set
@@ -525,6 +547,7 @@ CONFIG_BLK_DEV_DM=y
525CONFIG_FUSION=y 547CONFIG_FUSION=y
526CONFIG_FUSION_SPI=y 548CONFIG_FUSION_SPI=y
527# CONFIG_FUSION_FC is not set 549# CONFIG_FUSION_FC is not set
550# CONFIG_FUSION_SAS is not set
528CONFIG_FUSION_MAX_SGE=128 551CONFIG_FUSION_MAX_SGE=128
529# CONFIG_FUSION_CTL is not set 552# CONFIG_FUSION_CTL is not set
530 553
@@ -564,6 +587,7 @@ CONFIG_NET_ETHERNET=y
564CONFIG_MII=y 587CONFIG_MII=y
565# CONFIG_HAPPYMEAL is not set 588# CONFIG_HAPPYMEAL is not set
566# CONFIG_SUNGEM is not set 589# CONFIG_SUNGEM is not set
590# CONFIG_CASSINI is not set
567CONFIG_NET_VENDOR_3COM=y 591CONFIG_NET_VENDOR_3COM=y
568CONFIG_VORTEX=y 592CONFIG_VORTEX=y
569# CONFIG_TYPHOON is not set 593# CONFIG_TYPHOON is not set
@@ -740,7 +764,43 @@ CONFIG_LEGACY_PTY_COUNT=256
740# 764#
741# Watchdog Cards 765# Watchdog Cards
742# 766#
743# CONFIG_WATCHDOG is not set 767CONFIG_WATCHDOG=y
768# CONFIG_WATCHDOG_NOWAYOUT is not set
769
770#
771# Watchdog Device Drivers
772#
773CONFIG_SOFT_WATCHDOG=y
774# CONFIG_ACQUIRE_WDT is not set
775# CONFIG_ADVANTECH_WDT is not set
776# CONFIG_ALIM1535_WDT is not set
777# CONFIG_ALIM7101_WDT is not set
778# CONFIG_SC520_WDT is not set
779# CONFIG_EUROTECH_WDT is not set
780# CONFIG_IB700_WDT is not set
781# CONFIG_IBMASR is not set
782# CONFIG_WAFER_WDT is not set
783# CONFIG_I6300ESB_WDT is not set
784# CONFIG_I8XX_TCO is not set
785# CONFIG_SC1200_WDT is not set
786# CONFIG_60XX_WDT is not set
787# CONFIG_SBC8360_WDT is not set
788# CONFIG_CPU5_WDT is not set
789# CONFIG_W83627HF_WDT is not set
790# CONFIG_W83877F_WDT is not set
791# CONFIG_W83977F_WDT is not set
792# CONFIG_MACHZ_WDT is not set
793
794#
795# PCI-based Watchdog Cards
796#
797# CONFIG_PCIPCWATCHDOG is not set
798# CONFIG_WDTPCI is not set
799
800#
801# USB-based Watchdog Cards
802#
803# CONFIG_USBPCWATCHDOG is not set
744CONFIG_HW_RANDOM=y 804CONFIG_HW_RANDOM=y
745# CONFIG_NVRAM is not set 805# CONFIG_NVRAM is not set
746CONFIG_RTC=y 806CONFIG_RTC=y
@@ -767,6 +827,7 @@ CONFIG_MAX_RAW_DEVS=256
767# TPM devices 827# TPM devices
768# 828#
769# CONFIG_TCG_TPM is not set 829# CONFIG_TCG_TPM is not set
830# CONFIG_TELCLOCK is not set
770 831
771# 832#
772# I2C support 833# I2C support
@@ -783,6 +844,7 @@ CONFIG_MAX_RAW_DEVS=256
783# 844#
784CONFIG_HWMON=y 845CONFIG_HWMON=y
785# CONFIG_HWMON_VID is not set 846# CONFIG_HWMON_VID is not set
847# CONFIG_SENSORS_HDAPS is not set
786# CONFIG_HWMON_DEBUG_CHIP is not set 848# CONFIG_HWMON_DEBUG_CHIP is not set
787 849
788# 850#
@@ -886,12 +948,15 @@ CONFIG_USB_UHCI_HCD=y
886# USB Device Class drivers 948# USB Device Class drivers
887# 949#
888# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set 950# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
889# CONFIG_USB_BLUETOOTH_TTY is not set
890# CONFIG_USB_ACM is not set 951# CONFIG_USB_ACM is not set
891CONFIG_USB_PRINTER=y 952CONFIG_USB_PRINTER=y
892 953
893# 954#
894# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information 955# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
956#
957
958#
959# may also be needed; see USB_STORAGE Help for more information
895# 960#
896CONFIG_USB_STORAGE=y 961CONFIG_USB_STORAGE=y
897# CONFIG_USB_STORAGE_DEBUG is not set 962# CONFIG_USB_STORAGE_DEBUG is not set
@@ -924,6 +989,7 @@ CONFIG_USB_HIDINPUT=y
924# CONFIG_USB_XPAD is not set 989# CONFIG_USB_XPAD is not set
925# CONFIG_USB_ATI_REMOTE is not set 990# CONFIG_USB_ATI_REMOTE is not set
926# CONFIG_USB_KEYSPAN_REMOTE is not set 991# CONFIG_USB_KEYSPAN_REMOTE is not set
992# CONFIG_USB_APPLETOUCH is not set
927 993
928# 994#
929# USB Imaging devices 995# USB Imaging devices
@@ -1005,7 +1071,7 @@ CONFIG_USB_MON=y
1005# 1071#
1006# CONFIG_EDD is not set 1072# CONFIG_EDD is not set
1007# CONFIG_DELL_RBU is not set 1073# CONFIG_DELL_RBU is not set
1008CONFIG_DCDBAS=m 1074# CONFIG_DCDBAS is not set
1009 1075
1010# 1076#
1011# File systems 1077# File systems
@@ -1037,7 +1103,7 @@ CONFIG_INOTIFY=y
1037# CONFIG_QUOTA is not set 1103# CONFIG_QUOTA is not set
1038CONFIG_DNOTIFY=y 1104CONFIG_DNOTIFY=y
1039CONFIG_AUTOFS_FS=y 1105CONFIG_AUTOFS_FS=y
1040# CONFIG_AUTOFS4_FS is not set 1106CONFIG_AUTOFS4_FS=y
1041# CONFIG_FUSE_FS is not set 1107# CONFIG_FUSE_FS is not set
1042 1108
1043# 1109#
@@ -1068,7 +1134,7 @@ CONFIG_TMPFS=y
1068CONFIG_HUGETLBFS=y 1134CONFIG_HUGETLBFS=y
1069CONFIG_HUGETLB_PAGE=y 1135CONFIG_HUGETLB_PAGE=y
1070CONFIG_RAMFS=y 1136CONFIG_RAMFS=y
1071# CONFIG_RELAYFS_FS is not set 1137CONFIG_RELAYFS_FS=y
1072 1138
1073# 1139#
1074# Miscellaneous filesystems 1140# Miscellaneous filesystems
@@ -1186,7 +1252,9 @@ CONFIG_DETECT_SOFTLOCKUP=y
1186# CONFIG_DEBUG_KOBJECT is not set 1252# CONFIG_DEBUG_KOBJECT is not set
1187# CONFIG_DEBUG_INFO is not set 1253# CONFIG_DEBUG_INFO is not set
1188CONFIG_DEBUG_FS=y 1254CONFIG_DEBUG_FS=y
1255# CONFIG_DEBUG_VM is not set
1189# CONFIG_FRAME_POINTER is not set 1256# CONFIG_FRAME_POINTER is not set
1257# CONFIG_RCU_TORTURE_TEST is not set
1190CONFIG_INIT_DEBUG=y 1258CONFIG_INIT_DEBUG=y
1191# CONFIG_IOMMU_DEBUG is not set 1259# CONFIG_IOMMU_DEBUG is not set
1192CONFIG_KPROBES=y 1260CONFIG_KPROBES=y
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
index 3e6780fa0186..3bf58af98936 100644
--- a/arch/x86_64/ia32/ia32_aout.c
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -36,9 +36,6 @@
36#undef WARN_OLD 36#undef WARN_OLD
37#undef CORE_DUMP /* probably broken */ 37#undef CORE_DUMP /* probably broken */
38 38
39extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
40 unsigned long stack_top, int exec_stack);
41
42static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); 39static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
43static int load_aout_library(struct file*); 40static int load_aout_library(struct file*);
44 41
@@ -314,7 +311,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
314 current->mm->free_area_cache = TASK_UNMAPPED_BASE; 311 current->mm->free_area_cache = TASK_UNMAPPED_BASE;
315 current->mm->cached_hole_size = 0; 312 current->mm->cached_hole_size = 0;
316 313
317 set_mm_counter(current->mm, rss, 0);
318 current->mm->mmap = NULL; 314 current->mm->mmap = NULL;
319 compute_creds(bprm); 315 compute_creds(bprm);
320 current->flags &= ~PF_FORKNOEXEC; 316 current->flags &= ~PF_FORKNOEXEC;
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index d9161e395978..830feb272eca 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -335,7 +335,8 @@ static void elf32_init(struct pt_regs *regs)
335 me->thread.es = __USER_DS; 335 me->thread.es = __USER_DS;
336} 336}
337 337
338int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) 338int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
339 int executable_stack)
339{ 340{
340 unsigned long stack_base; 341 unsigned long stack_base;
341 struct vm_area_struct *mpnt; 342 struct vm_area_struct *mpnt;
@@ -389,6 +390,7 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int exec
389 390
390 return 0; 391 return 0;
391} 392}
393EXPORT_SYMBOL(ia32_setup_arg_pages);
392 394
393static unsigned long 395static unsigned long
394elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) 396elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c
index 419758f19ca4..e335bd0b637d 100644
--- a/arch/x86_64/ia32/ia32_ioctl.c
+++ b/arch/x86_64/ia32/ia32_ioctl.c
@@ -12,40 +12,11 @@
12#define INCLUDES 12#define INCLUDES
13#include <linux/syscalls.h> 13#include <linux/syscalls.h>
14#include "compat_ioctl.c" 14#include "compat_ioctl.c"
15#include <asm/mtrr.h>
16#include <asm/ia32.h> 15#include <asm/ia32.h>
17 16
18#define CODE 17#define CODE
19#include "compat_ioctl.c" 18#include "compat_ioctl.c"
20 19
21#ifndef TIOCGDEV
22#define TIOCGDEV _IOR('T',0x32, unsigned int)
23#endif
24static int tiocgdev(unsigned fd, unsigned cmd, unsigned int __user *ptr)
25{
26
27 struct file *file;
28 struct tty_struct *real_tty;
29 int fput_needed, ret;
30
31 file = fget_light(fd, &fput_needed);
32 if (!file)
33 return -EBADF;
34
35 ret = -EINVAL;
36 if (file->f_op->ioctl != tty_ioctl)
37 goto out;
38 real_tty = (struct tty_struct *)file->private_data;
39 if (!real_tty)
40 goto out;
41
42 ret = put_user(new_encode_dev(tty_devnum(real_tty)), ptr);
43
44out:
45 fput_light(file, fput_needed);
46 return ret;
47}
48
49#define RTC_IRQP_READ32 _IOR('p', 0x0b, unsigned int) /* Read IRQ rate */ 20#define RTC_IRQP_READ32 _IOR('p', 0x0b, unsigned int) /* Read IRQ rate */
50#define RTC_IRQP_SET32 _IOW('p', 0x0c, unsigned int) /* Set IRQ rate */ 21#define RTC_IRQP_SET32 _IOW('p', 0x0c, unsigned int) /* Set IRQ rate */
51#define RTC_EPOCH_READ32 _IOR('p', 0x0d, unsigned) /* Read epoch */ 22#define RTC_EPOCH_READ32 _IOR('p', 0x0d, unsigned) /* Read epoch */
@@ -85,90 +56,6 @@ static int rtc32_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
85 return sys_ioctl(fd,cmd,arg); 56 return sys_ioctl(fd,cmd,arg);
86} 57}
87 58
88/* /proc/mtrr ioctls */
89
90
91struct mtrr_sentry32
92{
93 compat_ulong_t base; /* Base address */
94 compat_uint_t size; /* Size of region */
95 compat_uint_t type; /* Type of region */
96};
97
98struct mtrr_gentry32
99{
100 compat_ulong_t regnum; /* Register number */
101 compat_uint_t base; /* Base address */
102 compat_uint_t size; /* Size of region */
103 compat_uint_t type; /* Type of region */
104};
105
106#define MTRR_IOCTL_BASE 'M'
107
108#define MTRRIOC32_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry32)
109#define MTRRIOC32_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry32)
110#define MTRRIOC32_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry32)
111#define MTRRIOC32_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry32)
112#define MTRRIOC32_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry32)
113#define MTRRIOC32_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry32)
114#define MTRRIOC32_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry32)
115#define MTRRIOC32_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry32)
116#define MTRRIOC32_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry32)
117#define MTRRIOC32_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32)
118
119
120static int mtrr_ioctl32(unsigned int fd, unsigned int cmd, unsigned long arg)
121{
122 struct mtrr_gentry g;
123 struct mtrr_sentry s;
124 int get = 0, err = 0;
125 struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)arg;
126 mm_segment_t oldfs = get_fs();
127
128 switch (cmd) {
129#define SET(x) case MTRRIOC32_ ## x ## _ENTRY: cmd = MTRRIOC_ ## x ## _ENTRY; break
130#define GET(x) case MTRRIOC32_ ## x ## _ENTRY: cmd = MTRRIOC_ ## x ## _ENTRY; get=1; break
131 SET(ADD);
132 SET(SET);
133 SET(DEL);
134 GET(GET);
135 SET(KILL);
136 SET(ADD_PAGE);
137 SET(SET_PAGE);
138 SET(DEL_PAGE);
139 GET(GET_PAGE);
140 SET(KILL_PAGE);
141 }
142
143 if (get) {
144 err = get_user(g.regnum, &g32->regnum);
145 err |= get_user(g.base, &g32->base);
146 err |= get_user(g.size, &g32->size);
147 err |= get_user(g.type, &g32->type);
148
149 arg = (unsigned long)&g;
150 } else {
151 struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)arg;
152 err = get_user(s.base, &s32->base);
153 err |= get_user(s.size, &s32->size);
154 err |= get_user(s.type, &s32->type);
155
156 arg = (unsigned long)&s;
157 }
158 if (err) return err;
159
160 set_fs(KERNEL_DS);
161 err = sys_ioctl(fd, cmd, arg);
162 set_fs(oldfs);
163
164 if (!err && get) {
165 err = put_user(g.base, &g32->base);
166 err |= put_user(g.size, &g32->size);
167 err |= put_user(g.regnum, &g32->regnum);
168 err |= put_user(g.type, &g32->type);
169 }
170 return err;
171}
172 59
173#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler) }, 60#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler) },
174#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) 61#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
@@ -177,15 +64,8 @@ struct ioctl_trans ioctl_start[] = {
177#include <linux/compat_ioctl.h> 64#include <linux/compat_ioctl.h>
178#define DECLARES 65#define DECLARES
179#include "compat_ioctl.c" 66#include "compat_ioctl.c"
180COMPATIBLE_IOCTL(HDIO_SET_KEEPSETTINGS)
181COMPATIBLE_IOCTL(HDIO_SCAN_HWIF)
182COMPATIBLE_IOCTL(BLKRASET)
183COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */
184COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */
185COMPATIBLE_IOCTL(FIOQSIZE)
186 67
187/* And these ioctls need translation */ 68/* And these ioctls need translation */
188HANDLE_IOCTL(TIOCGDEV, tiocgdev)
189/* realtime device */ 69/* realtime device */
190HANDLE_IOCTL(RTC_IRQP_READ, rtc32_ioctl) 70HANDLE_IOCTL(RTC_IRQP_READ, rtc32_ioctl)
191HANDLE_IOCTL(RTC_IRQP_READ32,rtc32_ioctl) 71HANDLE_IOCTL(RTC_IRQP_READ32,rtc32_ioctl)
@@ -193,17 +73,6 @@ HANDLE_IOCTL(RTC_IRQP_SET32, rtc32_ioctl)
193HANDLE_IOCTL(RTC_EPOCH_READ32, rtc32_ioctl) 73HANDLE_IOCTL(RTC_EPOCH_READ32, rtc32_ioctl)
194HANDLE_IOCTL(RTC_EPOCH_SET32, rtc32_ioctl) 74HANDLE_IOCTL(RTC_EPOCH_SET32, rtc32_ioctl)
195/* take care of sizeof(sizeof()) breakage */ 75/* take care of sizeof(sizeof()) breakage */
196/* mtrr */
197HANDLE_IOCTL(MTRRIOC32_ADD_ENTRY, mtrr_ioctl32)
198HANDLE_IOCTL(MTRRIOC32_SET_ENTRY, mtrr_ioctl32)
199HANDLE_IOCTL(MTRRIOC32_DEL_ENTRY, mtrr_ioctl32)
200HANDLE_IOCTL(MTRRIOC32_GET_ENTRY, mtrr_ioctl32)
201HANDLE_IOCTL(MTRRIOC32_KILL_ENTRY, mtrr_ioctl32)
202HANDLE_IOCTL(MTRRIOC32_ADD_PAGE_ENTRY, mtrr_ioctl32)
203HANDLE_IOCTL(MTRRIOC32_SET_PAGE_ENTRY, mtrr_ioctl32)
204HANDLE_IOCTL(MTRRIOC32_DEL_PAGE_ENTRY, mtrr_ioctl32)
205HANDLE_IOCTL(MTRRIOC32_GET_PAGE_ENTRY, mtrr_ioctl32)
206HANDLE_IOCTL(MTRRIOC32_KILL_PAGE_ENTRY, mtrr_ioctl32)
207}; 76};
208 77
209int ioctl_table_size = ARRAY_SIZE(ioctl_start); 78int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index 66e2821533db..0903cc1faef2 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -425,7 +425,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
425 rsp = (unsigned long) ka->sa.sa_restorer; 425 rsp = (unsigned long) ka->sa.sa_restorer;
426 } 426 }
427 427
428 return (void __user *)((rsp - frame_size) & -8UL); 428 rsp -= frame_size;
429 /* Align the stack pointer according to the i386 ABI,
430 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
431 rsp = ((rsp + 4) & -16ul) - 4;
432 return (void __user *) rsp;
429} 433}
430 434
431int ia32_setup_frame(int sig, struct k_sigaction *ka, 435int ia32_setup_frame(int sig, struct k_sigaction *ka,
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index bcdd0a805fe7..fe4cbd1c4b2f 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -11,6 +11,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
11 11
12obj-$(CONFIG_X86_MCE) += mce.o 12obj-$(CONFIG_X86_MCE) += mce.o
13obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 13obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
14obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
14obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ 15obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
15obj-$(CONFIG_ACPI) += acpi/ 16obj-$(CONFIG_ACPI) += acpi/
16obj-$(CONFIG_X86_MSR) += msr.o 17obj-$(CONFIG_X86_MSR) += msr.o
@@ -27,7 +28,6 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/
27obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 28obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
28obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o 29obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
29obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o 30obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
30obj-$(CONFIG_SWIOTLB) += swiotlb.o
31obj-$(CONFIG_KPROBES) += kprobes.o 31obj-$(CONFIG_KPROBES) += kprobes.o
32obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 32obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
33 33
@@ -41,7 +41,6 @@ CFLAGS_vsyscall.o := $(PROFILING) -g0
41bootflag-y += ../../i386/kernel/bootflag.o 41bootflag-y += ../../i386/kernel/bootflag.o
42cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o 42cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o
43topology-y += ../../i386/mach-default/topology.o 43topology-y += ../../i386/mach-default/topology.o
44swiotlb-$(CONFIG_SWIOTLB) += ../../ia64/lib/swiotlb.o
45microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o 44microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o
46intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o 45intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
47quirks-y += ../../i386/kernel/quirks.o 46quirks-y += ../../i386/kernel/quirks.o
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 962ad4823b6a..c7f4fdd20f05 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -196,7 +196,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
196void __init iommu_hole_init(void) 196void __init iommu_hole_init(void)
197{ 197{
198 int fix, num; 198 int fix, num;
199 u32 aper_size, aper_alloc = 0, aper_order, last_aper_order = 0; 199 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
200 u64 aper_base, last_aper_base = 0; 200 u64 aper_base, last_aper_base = 0;
201 int valid_agp = 0; 201 int valid_agp = 0;
202 202
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index b6e7715d877f..18691ce4c759 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -833,6 +833,16 @@ int setup_profiling_timer(unsigned int multiplier)
833 return 0; 833 return 0;
834} 834}
835 835
836#ifdef CONFIG_X86_MCE_AMD
837void setup_threshold_lvt(unsigned long lvt_off)
838{
839 unsigned int v = 0;
840 unsigned long reg = (lvt_off << 4) + 0x500;
841 v |= THRESHOLD_APIC_VECTOR;
842 apic_write(reg, v);
843}
844#endif /* CONFIG_X86_MCE_AMD */
845
836#undef APIC_DIVISOR 846#undef APIC_DIVISOR
837 847
838/* 848/*
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index ab3f87aaff70..17579a1a174b 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -23,8 +23,7 @@
23#include <asm/e820.h> 23#include <asm/e820.h>
24#include <asm/proto.h> 24#include <asm/proto.h>
25#include <asm/bootsetup.h> 25#include <asm/bootsetup.h>
26 26#include <asm/sections.h>
27extern char _end[];
28 27
29/* 28/*
30 * PFN of last memory page. 29 * PFN of last memory page.
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 7937971d1853..9ff42041bb6b 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -612,6 +612,9 @@ retint_kernel:
612ENTRY(thermal_interrupt) 612ENTRY(thermal_interrupt)
613 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 613 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
614 614
615ENTRY(threshold_interrupt)
616 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
617
615#ifdef CONFIG_SMP 618#ifdef CONFIG_SMP
616ENTRY(reschedule_interrupt) 619ENTRY(reschedule_interrupt)
617 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 620 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 4592bf21fcaf..15290968e49d 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -12,6 +12,7 @@
12 12
13#include <linux/linkage.h> 13#include <linux/linkage.h>
14#include <linux/threads.h> 14#include <linux/threads.h>
15#include <linux/init.h>
15#include <asm/desc.h> 16#include <asm/desc.h>
16#include <asm/segment.h> 17#include <asm/segment.h>
17#include <asm/page.h> 18#include <asm/page.h>
@@ -70,7 +71,7 @@ startup_32:
70 movl %eax, %cr4 71 movl %eax, %cr4
71 72
72 /* Setup early boot stage 4 level pagetables */ 73 /* Setup early boot stage 4 level pagetables */
73 movl $(init_level4_pgt - __START_KERNEL_map), %eax 74 movl $(boot_level4_pgt - __START_KERNEL_map), %eax
74 movl %eax, %cr3 75 movl %eax, %cr3
75 76
76 /* Setup EFER (Extended Feature Enable Register) */ 77 /* Setup EFER (Extended Feature Enable Register) */
@@ -113,7 +114,7 @@ startup_64:
113 movq %rax, %cr4 114 movq %rax, %cr4
114 115
115 /* Setup early boot stage 4 level pagetables. */ 116 /* Setup early boot stage 4 level pagetables. */
116 movq $(init_level4_pgt - __START_KERNEL_map), %rax 117 movq $(boot_level4_pgt - __START_KERNEL_map), %rax
117 movq %rax, %cr3 118 movq %rax, %cr3
118 119
119 /* Check if nx is implemented */ 120 /* Check if nx is implemented */
@@ -240,20 +241,10 @@ ljumpvector:
240ENTRY(stext) 241ENTRY(stext)
241ENTRY(_stext) 242ENTRY(_stext)
242 243
243 /*
244 * This default setting generates an ident mapping at address 0x100000
245 * and a mapping for the kernel that precisely maps virtual address
246 * 0xffffffff80000000 to physical address 0x000000. (always using
247 * 2Mbyte large pages provided by PAE mode)
248 */
249.org 0x1000 244.org 0x1000
250ENTRY(init_level4_pgt) 245ENTRY(init_level4_pgt)
251 .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ 246 /* This gets initialized in x86_64_start_kernel */
252 .fill 255,8,0 247 .fill 512,8,0
253 .quad 0x000000000000a007 + __PHYSICAL_START
254 .fill 254,8,0
255 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
256 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
257 248
258.org 0x2000 249.org 0x2000
259ENTRY(level3_ident_pgt) 250ENTRY(level3_ident_pgt)
@@ -270,26 +261,26 @@ ENTRY(level3_kernel_pgt)
270.org 0x4000 261.org 0x4000
271ENTRY(level2_ident_pgt) 262ENTRY(level2_ident_pgt)
272 /* 40MB for bootup. */ 263 /* 40MB for bootup. */
273 .quad 0x0000000000000183 264 .quad 0x0000000000000083
274 .quad 0x0000000000200183 265 .quad 0x0000000000200083
275 .quad 0x0000000000400183 266 .quad 0x0000000000400083
276 .quad 0x0000000000600183 267 .quad 0x0000000000600083
277 .quad 0x0000000000800183 268 .quad 0x0000000000800083
278 .quad 0x0000000000A00183 269 .quad 0x0000000000A00083
279 .quad 0x0000000000C00183 270 .quad 0x0000000000C00083
280 .quad 0x0000000000E00183 271 .quad 0x0000000000E00083
281 .quad 0x0000000001000183 272 .quad 0x0000000001000083
282 .quad 0x0000000001200183 273 .quad 0x0000000001200083
283 .quad 0x0000000001400183 274 .quad 0x0000000001400083
284 .quad 0x0000000001600183 275 .quad 0x0000000001600083
285 .quad 0x0000000001800183 276 .quad 0x0000000001800083
286 .quad 0x0000000001A00183 277 .quad 0x0000000001A00083
287 .quad 0x0000000001C00183 278 .quad 0x0000000001C00083
288 .quad 0x0000000001E00183 279 .quad 0x0000000001E00083
289 .quad 0x0000000002000183 280 .quad 0x0000000002000083
290 .quad 0x0000000002200183 281 .quad 0x0000000002200083
291 .quad 0x0000000002400183 282 .quad 0x0000000002400083
292 .quad 0x0000000002600183 283 .quad 0x0000000002600083
293 /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ 284 /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */
294 .globl temp_boot_pmds 285 .globl temp_boot_pmds
295temp_boot_pmds: 286temp_boot_pmds:
@@ -350,6 +341,24 @@ ENTRY(wakeup_level4_pgt)
350 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 341 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
351#endif 342#endif
352 343
344#ifndef CONFIG_HOTPLUG_CPU
345 __INITDATA
346#endif
347 /*
348 * This default setting generates an ident mapping at address 0x100000
349 * and a mapping for the kernel that precisely maps virtual address
350 * 0xffffffff80000000 to physical address 0x000000. (always using
351 * 2Mbyte large pages provided by PAE mode)
352 */
353 .align PAGE_SIZE
354ENTRY(boot_level4_pgt)
355 .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */
356 .fill 255,8,0
357 .quad 0x000000000000a007 + __PHYSICAL_START
358 .fill 254,8,0
359 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
360 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
361
353 .data 362 .data
354 363
355 .align 16 364 .align 16
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index cf6ab147a2a5..b675c5add01e 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -19,14 +19,15 @@
19#include <asm/bootsetup.h> 19#include <asm/bootsetup.h>
20#include <asm/setup.h> 20#include <asm/setup.h>
21#include <asm/desc.h> 21#include <asm/desc.h>
22#include <asm/pgtable.h>
23#include <asm/sections.h>
22 24
23/* Don't add a printk in there. printk relies on the PDA which is not initialized 25/* Don't add a printk in there. printk relies on the PDA which is not initialized
24 yet. */ 26 yet. */
25static void __init clear_bss(void) 27static void __init clear_bss(void)
26{ 28{
27 extern char __bss_start[], __bss_end[];
28 memset(__bss_start, 0, 29 memset(__bss_start, 0,
29 (unsigned long) __bss_end - (unsigned long) __bss_start); 30 (unsigned long) __bss_stop - (unsigned long) __bss_start);
30} 31}
31 32
32#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ 33#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
@@ -75,8 +76,6 @@ static void __init setup_boot_cpu_data(void)
75 boot_cpu_data.x86_mask = eax & 0xf; 76 boot_cpu_data.x86_mask = eax & 0xf;
76} 77}
77 78
78extern char _end[];
79
80void __init x86_64_start_kernel(char * real_mode_data) 79void __init x86_64_start_kernel(char * real_mode_data)
81{ 80{
82 char *s; 81 char *s;
@@ -86,6 +85,13 @@ void __init x86_64_start_kernel(char * real_mode_data)
86 set_intr_gate(i, early_idt_handler); 85 set_intr_gate(i, early_idt_handler);
87 asm volatile("lidt %0" :: "m" (idt_descr)); 86 asm volatile("lidt %0" :: "m" (idt_descr));
88 clear_bss(); 87 clear_bss();
88
89 /*
90 * switch to init_level4_pgt from boot_level4_pgt
91 */
92 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
93 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
94
89 pda_init(0); 95 pda_init(0);
90 copy_bootdata(real_mode_data); 96 copy_bootdata(real_mode_data);
91#ifdef CONFIG_SMP 97#ifdef CONFIG_SMP
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index b2a238b5a17e..6e5101ad3d1a 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -492,9 +492,10 @@ void invalidate_interrupt5(void);
492void invalidate_interrupt6(void); 492void invalidate_interrupt6(void);
493void invalidate_interrupt7(void); 493void invalidate_interrupt7(void);
494void thermal_interrupt(void); 494void thermal_interrupt(void);
495void threshold_interrupt(void);
495void i8254_timer_resume(void); 496void i8254_timer_resume(void);
496 497
497static void setup_timer(void) 498static void setup_timer_hardware(void)
498{ 499{
499 outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ 500 outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
500 udelay(10); 501 udelay(10);
@@ -505,17 +506,17 @@ static void setup_timer(void)
505 506
506static int timer_resume(struct sys_device *dev) 507static int timer_resume(struct sys_device *dev)
507{ 508{
508 setup_timer(); 509 setup_timer_hardware();
509 return 0; 510 return 0;
510} 511}
511 512
512void i8254_timer_resume(void) 513void i8254_timer_resume(void)
513{ 514{
514 setup_timer(); 515 setup_timer_hardware();
515} 516}
516 517
517static struct sysdev_class timer_sysclass = { 518static struct sysdev_class timer_sysclass = {
518 set_kset_name("timer"), 519 set_kset_name("timer_pit"),
519 .resume = timer_resume, 520 .resume = timer_resume,
520}; 521};
521 522
@@ -580,6 +581,7 @@ void __init init_IRQ(void)
580 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 581 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
581#endif 582#endif
582 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 583 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
584 set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
583 585
584#ifdef CONFIG_X86_LOCAL_APIC 586#ifdef CONFIG_X86_LOCAL_APIC
585 /* self generated IPI for local APIC timer */ 587 /* self generated IPI for local APIC timer */
@@ -594,7 +596,7 @@ void __init init_IRQ(void)
594 * Set the clock to HZ Hz, we already have a valid 596 * Set the clock to HZ Hz, we already have a valid
595 * vector now: 597 * vector now:
596 */ 598 */
597 setup_timer(); 599 setup_timer_hardware();
598 600
599 if (!acpi_ioapic) 601 if (!acpi_ioapic)
600 setup_irq(2, &irq2); 602 setup_irq(2, &irq2);
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c8eee20cd519..97154ab058b4 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -57,7 +57,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
57 * Rough estimation of how many shared IRQs there are, can 57 * Rough estimation of how many shared IRQs there are, can
58 * be changed anytime. 58 * be changed anytime.
59 */ 59 */
60#define MAX_PLUS_SHARED_IRQS NR_IRQS 60#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
61#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) 61#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
62 62
63/* 63/*
@@ -85,6 +85,7 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
85 int pin; \ 85 int pin; \
86 struct irq_pin_list *entry = irq_2_pin + irq; \ 86 struct irq_pin_list *entry = irq_2_pin + irq; \
87 \ 87 \
88 BUG_ON(irq >= NR_IRQS); \
88 for (;;) { \ 89 for (;;) { \
89 unsigned int reg; \ 90 unsigned int reg; \
90 pin = entry->pin; \ 91 pin = entry->pin; \
@@ -127,6 +128,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
127} 128}
128#endif 129#endif
129 130
131static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
132
130/* 133/*
131 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 134 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
132 * shared ISA-space IRQs, so we have to support them. We are super 135 * shared ISA-space IRQs, so we have to support them. We are super
@@ -137,6 +140,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
137 static int first_free_entry = NR_IRQS; 140 static int first_free_entry = NR_IRQS;
138 struct irq_pin_list *entry = irq_2_pin + irq; 141 struct irq_pin_list *entry = irq_2_pin + irq;
139 142
143 BUG_ON(irq >= NR_IRQS);
140 while (entry->next) 144 while (entry->next)
141 entry = irq_2_pin + entry->next; 145 entry = irq_2_pin + entry->next;
142 146
@@ -144,7 +148,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
144 entry->next = first_free_entry; 148 entry->next = first_free_entry;
145 entry = irq_2_pin + entry->next; 149 entry = irq_2_pin + entry->next;
146 if (++first_free_entry >= PIN_MAP_SIZE) 150 if (++first_free_entry >= PIN_MAP_SIZE)
147 panic("io_apic.c: whoops"); 151 panic("io_apic.c: ran out of irq_2_pin entries!");
148 } 152 }
149 entry->apic = apic; 153 entry->apic = apic;
150 entry->pin = pin; 154 entry->pin = pin;
@@ -420,6 +424,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
420 best_guess = irq; 424 best_guess = irq;
421 } 425 }
422 } 426 }
427 BUG_ON(best_guess >= NR_IRQS);
423 return best_guess; 428 return best_guess;
424} 429}
425 430
@@ -610,6 +615,64 @@ static inline int irq_trigger(int idx)
610 return MPBIOS_trigger(idx); 615 return MPBIOS_trigger(idx);
611} 616}
612 617
618static int next_irq = 16;
619
620/*
621 * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
622 * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
623 * from ACPI, which can reach 800 in large boxen.
624 *
625 * Compact the sparse GSI space into a sequential IRQ series and reuse
626 * vectors if possible.
627 */
628int gsi_irq_sharing(int gsi)
629{
630 int i, tries, vector;
631
632 BUG_ON(gsi >= NR_IRQ_VECTORS);
633
634 if (platform_legacy_irq(gsi))
635 return gsi;
636
637 if (gsi_2_irq[gsi] != 0xFF)
638 return (int)gsi_2_irq[gsi];
639
640 tries = NR_IRQS;
641 try_again:
642 vector = assign_irq_vector(gsi);
643
644 /*
645 * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
646 * use of vector and if found, return that IRQ. However, we never want
647 * to share legacy IRQs, which usually have a different trigger mode
648 * than PCI.
649 */
650 for (i = 0; i < NR_IRQS; i++)
651 if (IO_APIC_VECTOR(i) == vector)
652 break;
653 if (platform_legacy_irq(i)) {
654 if (--tries >= 0) {
655 IO_APIC_VECTOR(i) = 0;
656 goto try_again;
657 }
658 panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
659 }
660 if (i < NR_IRQS) {
661 gsi_2_irq[gsi] = i;
662 printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
663 gsi, vector, i);
664 return i;
665 }
666
667 i = next_irq++;
668 BUG_ON(i >= NR_IRQS);
669 gsi_2_irq[gsi] = i;
670 IO_APIC_VECTOR(i) = vector;
671 printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
672 gsi, vector, i);
673 return i;
674}
675
613static int pin_2_irq(int idx, int apic, int pin) 676static int pin_2_irq(int idx, int apic, int pin)
614{ 677{
615 int irq, i; 678 int irq, i;
@@ -639,6 +702,7 @@ static int pin_2_irq(int idx, int apic, int pin)
639 while (i < apic) 702 while (i < apic)
640 irq += nr_ioapic_registers[i++]; 703 irq += nr_ioapic_registers[i++];
641 irq += pin; 704 irq += pin;
705 irq = gsi_irq_sharing(irq);
642 break; 706 break;
643 } 707 }
644 default: 708 default:
@@ -648,6 +712,7 @@ static int pin_2_irq(int idx, int apic, int pin)
648 break; 712 break;
649 } 713 }
650 } 714 }
715 BUG_ON(irq >= NR_IRQS);
651 716
652 /* 717 /*
653 * PCI IRQ command line redirection. Yes, limits are hardcoded. 718 * PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -663,6 +728,7 @@ static int pin_2_irq(int idx, int apic, int pin)
663 } 728 }
664 } 729 }
665 } 730 }
731 BUG_ON(irq >= NR_IRQS);
666 return irq; 732 return irq;
667} 733}
668 734
@@ -690,8 +756,8 @@ int assign_irq_vector(int irq)
690{ 756{
691 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 757 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
692 758
693 BUG_ON(irq >= NR_IRQ_VECTORS); 759 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
694 if (IO_APIC_VECTOR(irq) > 0) 760 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
695 return IO_APIC_VECTOR(irq); 761 return IO_APIC_VECTOR(irq);
696next: 762next:
697 current_vector += 8; 763 current_vector += 8;
@@ -699,9 +765,8 @@ next:
699 goto next; 765 goto next;
700 766
701 if (current_vector >= FIRST_SYSTEM_VECTOR) { 767 if (current_vector >= FIRST_SYSTEM_VECTOR) {
702 offset++; 768 /* If we run out of vectors on large boxen, must share them. */
703 if (!(offset%8)) 769 offset = (offset + 1) % 8;
704 return -ENOSPC;
705 current_vector = FIRST_DEVICE_VECTOR + offset; 770 current_vector = FIRST_DEVICE_VECTOR + offset;
706 } 771 }
707 772
@@ -1917,6 +1982,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
1917 entry.polarity = active_high_low; 1982 entry.polarity = active_high_low;
1918 entry.mask = 1; /* Disabled (masked) */ 1983 entry.mask = 1; /* Disabled (masked) */
1919 1984
1985 irq = gsi_irq_sharing(irq);
1920 /* 1986 /*
1921 * IRQs < 16 are already in the irq_2_pin[] map 1987 * IRQs < 16 are already in the irq_2_pin[] map
1922 */ 1988 */
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index df08c43276a0..dddeb678b440 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -34,7 +34,6 @@
34#include <linux/config.h> 34#include <linux/config.h>
35#include <linux/kprobes.h> 35#include <linux/kprobes.h>
36#include <linux/ptrace.h> 36#include <linux/ptrace.h>
37#include <linux/spinlock.h>
38#include <linux/string.h> 37#include <linux/string.h>
39#include <linux/slab.h> 38#include <linux/slab.h>
40#include <linux/preempt.h> 39#include <linux/preempt.h>
@@ -44,17 +43,10 @@
44#include <asm/kdebug.h> 43#include <asm/kdebug.h>
45 44
46static DECLARE_MUTEX(kprobe_mutex); 45static DECLARE_MUTEX(kprobe_mutex);
47
48static struct kprobe *current_kprobe;
49static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags;
50static struct kprobe *kprobe_prev;
51static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev;
52static struct pt_regs jprobe_saved_regs;
53static long *jprobe_saved_rsp;
54void jprobe_return_end(void); 46void jprobe_return_end(void);
55 47
56/* copy of the kernel stack at the probe fire time */ 48DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
57static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; 49DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
58 50
59/* 51/*
60 * returns non-zero if opcode modifies the interrupt flag. 52 * returns non-zero if opcode modifies the interrupt flag.
@@ -77,9 +69,9 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn)
77int __kprobes arch_prepare_kprobe(struct kprobe *p) 69int __kprobes arch_prepare_kprobe(struct kprobe *p)
78{ 70{
79 /* insn: must be on special executable page on x86_64. */ 71 /* insn: must be on special executable page on x86_64. */
80 up(&kprobe_mutex);
81 p->ainsn.insn = get_insn_slot();
82 down(&kprobe_mutex); 72 down(&kprobe_mutex);
73 p->ainsn.insn = get_insn_slot();
74 up(&kprobe_mutex);
83 if (!p->ainsn.insn) { 75 if (!p->ainsn.insn) {
84 return -ENOMEM; 76 return -ENOMEM;
85 } 77 }
@@ -231,34 +223,35 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
231 223
232void __kprobes arch_remove_kprobe(struct kprobe *p) 224void __kprobes arch_remove_kprobe(struct kprobe *p)
233{ 225{
234 up(&kprobe_mutex);
235 free_insn_slot(p->ainsn.insn);
236 down(&kprobe_mutex); 226 down(&kprobe_mutex);
227 free_insn_slot(p->ainsn.insn);
228 up(&kprobe_mutex);
237} 229}
238 230
239static inline void save_previous_kprobe(void) 231static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
240{ 232{
241 kprobe_prev = current_kprobe; 233 kcb->prev_kprobe.kp = kprobe_running();
242 kprobe_status_prev = kprobe_status; 234 kcb->prev_kprobe.status = kcb->kprobe_status;
243 kprobe_old_rflags_prev = kprobe_old_rflags; 235 kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags;
244 kprobe_saved_rflags_prev = kprobe_saved_rflags; 236 kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags;
245} 237}
246 238
247static inline void restore_previous_kprobe(void) 239static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
248{ 240{
249 current_kprobe = kprobe_prev; 241 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
250 kprobe_status = kprobe_status_prev; 242 kcb->kprobe_status = kcb->prev_kprobe.status;
251 kprobe_old_rflags = kprobe_old_rflags_prev; 243 kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags;
252 kprobe_saved_rflags = kprobe_saved_rflags_prev; 244 kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags;
253} 245}
254 246
255static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) 247static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
248 struct kprobe_ctlblk *kcb)
256{ 249{
257 current_kprobe = p; 250 __get_cpu_var(current_kprobe) = p;
258 kprobe_saved_rflags = kprobe_old_rflags 251 kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags
259 = (regs->eflags & (TF_MASK | IF_MASK)); 252 = (regs->eflags & (TF_MASK | IF_MASK));
260 if (is_IF_modifier(p->ainsn.insn)) 253 if (is_IF_modifier(p->ainsn.insn))
261 kprobe_saved_rflags &= ~IF_MASK; 254 kcb->kprobe_saved_rflags &= ~IF_MASK;
262} 255}
263 256
264static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 257static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -272,6 +265,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
272 regs->rip = (unsigned long)p->ainsn.insn; 265 regs->rip = (unsigned long)p->ainsn.insn;
273} 266}
274 267
268/* Called with kretprobe_lock held */
275void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, 269void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
276 struct pt_regs *regs) 270 struct pt_regs *regs)
277{ 271{
@@ -292,32 +286,30 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
292 } 286 }
293} 287}
294 288
295/*
296 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
297 * remain disabled thorough out this function.
298 */
299int __kprobes kprobe_handler(struct pt_regs *regs) 289int __kprobes kprobe_handler(struct pt_regs *regs)
300{ 290{
301 struct kprobe *p; 291 struct kprobe *p;
302 int ret = 0; 292 int ret = 0;
303 kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); 293 kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t));
294 struct kprobe_ctlblk *kcb;
304 295
305 /* We're in an interrupt, but this is clear and BUG()-safe. */ 296 /*
297 * We don't want to be preempted for the entire
298 * duration of kprobe processing
299 */
306 preempt_disable(); 300 preempt_disable();
301 kcb = get_kprobe_ctlblk();
307 302
308 /* Check we're not actually recursing */ 303 /* Check we're not actually recursing */
309 if (kprobe_running()) { 304 if (kprobe_running()) {
310 /* We *are* holding lock here, so this is safe.
311 Disarm the probe we just hit, and ignore it. */
312 p = get_kprobe(addr); 305 p = get_kprobe(addr);
313 if (p) { 306 if (p) {
314 if (kprobe_status == KPROBE_HIT_SS && 307 if (kcb->kprobe_status == KPROBE_HIT_SS &&
315 *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { 308 *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
316 regs->eflags &= ~TF_MASK; 309 regs->eflags &= ~TF_MASK;
317 regs->eflags |= kprobe_saved_rflags; 310 regs->eflags |= kcb->kprobe_saved_rflags;
318 unlock_kprobes();
319 goto no_kprobe; 311 goto no_kprobe;
320 } else if (kprobe_status == KPROBE_HIT_SSDONE) { 312 } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
321 /* TODO: Provide re-entrancy from 313 /* TODO: Provide re-entrancy from
322 * post_kprobes_handler() and avoid exception 314 * post_kprobes_handler() and avoid exception
323 * stack corruption while single-stepping on 315 * stack corruption while single-stepping on
@@ -325,6 +317,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
325 */ 317 */
326 arch_disarm_kprobe(p); 318 arch_disarm_kprobe(p);
327 regs->rip = (unsigned long)p->addr; 319 regs->rip = (unsigned long)p->addr;
320 reset_current_kprobe();
328 ret = 1; 321 ret = 1;
329 } else { 322 } else {
330 /* We have reentered the kprobe_handler(), since 323 /* We have reentered the kprobe_handler(), since
@@ -334,27 +327,24 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
334 * of the new probe without calling any user 327 * of the new probe without calling any user
335 * handlers. 328 * handlers.
336 */ 329 */
337 save_previous_kprobe(); 330 save_previous_kprobe(kcb);
338 set_current_kprobe(p, regs); 331 set_current_kprobe(p, regs, kcb);
339 p->nmissed++; 332 p->nmissed++;
340 prepare_singlestep(p, regs); 333 prepare_singlestep(p, regs);
341 kprobe_status = KPROBE_REENTER; 334 kcb->kprobe_status = KPROBE_REENTER;
342 return 1; 335 return 1;
343 } 336 }
344 } else { 337 } else {
345 p = current_kprobe; 338 p = __get_cpu_var(current_kprobe);
346 if (p->break_handler && p->break_handler(p, regs)) { 339 if (p->break_handler && p->break_handler(p, regs)) {
347 goto ss_probe; 340 goto ss_probe;
348 } 341 }
349 } 342 }
350 /* If it's not ours, can't be delete race, (we hold lock). */
351 goto no_kprobe; 343 goto no_kprobe;
352 } 344 }
353 345
354 lock_kprobes();
355 p = get_kprobe(addr); 346 p = get_kprobe(addr);
356 if (!p) { 347 if (!p) {
357 unlock_kprobes();
358 if (*addr != BREAKPOINT_INSTRUCTION) { 348 if (*addr != BREAKPOINT_INSTRUCTION) {
359 /* 349 /*
360 * The breakpoint instruction was removed right 350 * The breakpoint instruction was removed right
@@ -372,8 +362,8 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
372 goto no_kprobe; 362 goto no_kprobe;
373 } 363 }
374 364
375 kprobe_status = KPROBE_HIT_ACTIVE; 365 set_current_kprobe(p, regs, kcb);
376 set_current_kprobe(p, regs); 366 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
377 367
378 if (p->pre_handler && p->pre_handler(p, regs)) 368 if (p->pre_handler && p->pre_handler(p, regs))
379 /* handler has already set things up, so skip ss setup */ 369 /* handler has already set things up, so skip ss setup */
@@ -381,7 +371,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
381 371
382ss_probe: 372ss_probe:
383 prepare_singlestep(p, regs); 373 prepare_singlestep(p, regs);
384 kprobe_status = KPROBE_HIT_SS; 374 kcb->kprobe_status = KPROBE_HIT_SS;
385 return 1; 375 return 1;
386 376
387no_kprobe: 377no_kprobe:
@@ -409,9 +399,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
409 struct kretprobe_instance *ri = NULL; 399 struct kretprobe_instance *ri = NULL;
410 struct hlist_head *head; 400 struct hlist_head *head;
411 struct hlist_node *node, *tmp; 401 struct hlist_node *node, *tmp;
412 unsigned long orig_ret_address = 0; 402 unsigned long flags, orig_ret_address = 0;
413 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; 403 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
414 404
405 spin_lock_irqsave(&kretprobe_lock, flags);
415 head = kretprobe_inst_table_head(current); 406 head = kretprobe_inst_table_head(current);
416 407
417 /* 408 /*
@@ -450,13 +441,14 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
450 BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); 441 BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
451 regs->rip = orig_ret_address; 442 regs->rip = orig_ret_address;
452 443
453 unlock_kprobes(); 444 reset_current_kprobe();
445 spin_unlock_irqrestore(&kretprobe_lock, flags);
454 preempt_enable_no_resched(); 446 preempt_enable_no_resched();
455 447
456 /* 448 /*
457 * By returning a non-zero value, we are telling 449 * By returning a non-zero value, we are telling
458 * kprobe_handler() that we have handled unlocking 450 * kprobe_handler() that we don't want the post_handler
459 * and re-enabling preemption. 451 * to run (and have re-enabled preemption)
460 */ 452 */
461 return 1; 453 return 1;
462} 454}
@@ -483,7 +475,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
483 * that is atop the stack is the address following the copied instruction. 475 * that is atop the stack is the address following the copied instruction.
484 * We need to make it the address following the original instruction. 476 * We need to make it the address following the original instruction.
485 */ 477 */
486static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) 478static void __kprobes resume_execution(struct kprobe *p,
479 struct pt_regs *regs, struct kprobe_ctlblk *kcb)
487{ 480{
488 unsigned long *tos = (unsigned long *)regs->rsp; 481 unsigned long *tos = (unsigned long *)regs->rsp;
489 unsigned long next_rip = 0; 482 unsigned long next_rip = 0;
@@ -498,7 +491,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
498 switch (*insn) { 491 switch (*insn) {
499 case 0x9c: /* pushfl */ 492 case 0x9c: /* pushfl */
500 *tos &= ~(TF_MASK | IF_MASK); 493 *tos &= ~(TF_MASK | IF_MASK);
501 *tos |= kprobe_old_rflags; 494 *tos |= kcb->kprobe_old_rflags;
502 break; 495 break;
503 case 0xc3: /* ret/lret */ 496 case 0xc3: /* ret/lret */
504 case 0xcb: 497 case 0xcb:
@@ -537,30 +530,28 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
537 } 530 }
538} 531}
539 532
540/*
541 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
542 * remain disabled thoroughout this function. And we hold kprobe lock.
543 */
544int __kprobes post_kprobe_handler(struct pt_regs *regs) 533int __kprobes post_kprobe_handler(struct pt_regs *regs)
545{ 534{
546 if (!kprobe_running()) 535 struct kprobe *cur = kprobe_running();
536 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
537
538 if (!cur)
547 return 0; 539 return 0;
548 540
549 if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { 541 if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
550 kprobe_status = KPROBE_HIT_SSDONE; 542 kcb->kprobe_status = KPROBE_HIT_SSDONE;
551 current_kprobe->post_handler(current_kprobe, regs, 0); 543 cur->post_handler(cur, regs, 0);
552 } 544 }
553 545
554 resume_execution(current_kprobe, regs); 546 resume_execution(cur, regs, kcb);
555 regs->eflags |= kprobe_saved_rflags; 547 regs->eflags |= kcb->kprobe_saved_rflags;
556 548
557 /* Restore the original saved kprobes variables and continue. */ 549 /* Restore the original saved kprobes variables and continue. */
558 if (kprobe_status == KPROBE_REENTER) { 550 if (kcb->kprobe_status == KPROBE_REENTER) {
559 restore_previous_kprobe(); 551 restore_previous_kprobe(kcb);
560 goto out; 552 goto out;
561 } else {
562 unlock_kprobes();
563 } 553 }
554 reset_current_kprobe();
564out: 555out:
565 preempt_enable_no_resched(); 556 preempt_enable_no_resched();
566 557
@@ -575,18 +566,19 @@ out:
575 return 1; 566 return 1;
576} 567}
577 568
578/* Interrupts disabled, kprobe_lock held. */
579int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) 569int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
580{ 570{
581 if (current_kprobe->fault_handler 571 struct kprobe *cur = kprobe_running();
582 && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) 572 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
573
574 if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
583 return 1; 575 return 1;
584 576
585 if (kprobe_status & KPROBE_HIT_SS) { 577 if (kcb->kprobe_status & KPROBE_HIT_SS) {
586 resume_execution(current_kprobe, regs); 578 resume_execution(cur, regs, kcb);
587 regs->eflags |= kprobe_old_rflags; 579 regs->eflags |= kcb->kprobe_old_rflags;
588 580
589 unlock_kprobes(); 581 reset_current_kprobe();
590 preempt_enable_no_resched(); 582 preempt_enable_no_resched();
591 } 583 }
592 return 0; 584 return 0;
@@ -599,39 +591,41 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
599 unsigned long val, void *data) 591 unsigned long val, void *data)
600{ 592{
601 struct die_args *args = (struct die_args *)data; 593 struct die_args *args = (struct die_args *)data;
594 int ret = NOTIFY_DONE;
595
602 switch (val) { 596 switch (val) {
603 case DIE_INT3: 597 case DIE_INT3:
604 if (kprobe_handler(args->regs)) 598 if (kprobe_handler(args->regs))
605 return NOTIFY_STOP; 599 ret = NOTIFY_STOP;
606 break; 600 break;
607 case DIE_DEBUG: 601 case DIE_DEBUG:
608 if (post_kprobe_handler(args->regs)) 602 if (post_kprobe_handler(args->regs))
609 return NOTIFY_STOP; 603 ret = NOTIFY_STOP;
610 break; 604 break;
611 case DIE_GPF: 605 case DIE_GPF:
612 if (kprobe_running() &&
613 kprobe_fault_handler(args->regs, args->trapnr))
614 return NOTIFY_STOP;
615 break;
616 case DIE_PAGE_FAULT: 606 case DIE_PAGE_FAULT:
607 /* kprobe_running() needs smp_processor_id() */
608 preempt_disable();
617 if (kprobe_running() && 609 if (kprobe_running() &&
618 kprobe_fault_handler(args->regs, args->trapnr)) 610 kprobe_fault_handler(args->regs, args->trapnr))
619 return NOTIFY_STOP; 611 ret = NOTIFY_STOP;
612 preempt_enable();
620 break; 613 break;
621 default: 614 default:
622 break; 615 break;
623 } 616 }
624 return NOTIFY_DONE; 617 return ret;
625} 618}
626 619
627int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) 620int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
628{ 621{
629 struct jprobe *jp = container_of(p, struct jprobe, kp); 622 struct jprobe *jp = container_of(p, struct jprobe, kp);
630 unsigned long addr; 623 unsigned long addr;
624 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
631 625
632 jprobe_saved_regs = *regs; 626 kcb->jprobe_saved_regs = *regs;
633 jprobe_saved_rsp = (long *) regs->rsp; 627 kcb->jprobe_saved_rsp = (long *) regs->rsp;
634 addr = (unsigned long)jprobe_saved_rsp; 628 addr = (unsigned long)(kcb->jprobe_saved_rsp);
635 /* 629 /*
636 * As Linus pointed out, gcc assumes that the callee 630 * As Linus pointed out, gcc assumes that the callee
637 * owns the argument space and could overwrite it, e.g. 631 * owns the argument space and could overwrite it, e.g.
@@ -639,7 +633,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
639 * we also save and restore enough stack bytes to cover 633 * we also save and restore enough stack bytes to cover
640 * the argument area. 634 * the argument area.
641 */ 635 */
642 memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr)); 636 memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
637 MIN_STACK_SIZE(addr));
643 regs->eflags &= ~IF_MASK; 638 regs->eflags &= ~IF_MASK;
644 regs->rip = (unsigned long)(jp->entry); 639 regs->rip = (unsigned long)(jp->entry);
645 return 1; 640 return 1;
@@ -647,36 +642,40 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
647 642
648void __kprobes jprobe_return(void) 643void __kprobes jprobe_return(void)
649{ 644{
650 preempt_enable_no_resched(); 645 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
646
651 asm volatile (" xchg %%rbx,%%rsp \n" 647 asm volatile (" xchg %%rbx,%%rsp \n"
652 " int3 \n" 648 " int3 \n"
653 " .globl jprobe_return_end \n" 649 " .globl jprobe_return_end \n"
654 " jprobe_return_end: \n" 650 " jprobe_return_end: \n"
655 " nop \n"::"b" 651 " nop \n"::"b"
656 (jprobe_saved_rsp):"memory"); 652 (kcb->jprobe_saved_rsp):"memory");
657} 653}
658 654
659int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 655int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
660{ 656{
657 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
661 u8 *addr = (u8 *) (regs->rip - 1); 658 u8 *addr = (u8 *) (regs->rip - 1);
662 unsigned long stack_addr = (unsigned long)jprobe_saved_rsp; 659 unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp);
663 struct jprobe *jp = container_of(p, struct jprobe, kp); 660 struct jprobe *jp = container_of(p, struct jprobe, kp);
664 661
665 if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { 662 if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
666 if ((long *)regs->rsp != jprobe_saved_rsp) { 663 if ((long *)regs->rsp != kcb->jprobe_saved_rsp) {
667 struct pt_regs *saved_regs = 664 struct pt_regs *saved_regs =
668 container_of(jprobe_saved_rsp, struct pt_regs, rsp); 665 container_of(kcb->jprobe_saved_rsp,
666 struct pt_regs, rsp);
669 printk("current rsp %p does not match saved rsp %p\n", 667 printk("current rsp %p does not match saved rsp %p\n",
670 (long *)regs->rsp, jprobe_saved_rsp); 668 (long *)regs->rsp, kcb->jprobe_saved_rsp);
671 printk("Saved registers for jprobe %p\n", jp); 669 printk("Saved registers for jprobe %p\n", jp);
672 show_registers(saved_regs); 670 show_registers(saved_regs);
673 printk("Current registers\n"); 671 printk("Current registers\n");
674 show_registers(regs); 672 show_registers(regs);
675 BUG(); 673 BUG();
676 } 674 }
677 *regs = jprobe_saved_regs; 675 *regs = kcb->jprobe_saved_regs;
678 memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack, 676 memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
679 MIN_STACK_SIZE(stack_addr)); 677 MIN_STACK_SIZE(stack_addr));
678 preempt_enable_no_resched();
680 return 1; 679 return 1;
681 } 680 }
682 return 0; 681 return 0;
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 08203b07f4bd..183dc6105429 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -37,7 +37,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
37static unsigned long console_logged; 37static unsigned long console_logged;
38static int notify_user; 38static int notify_user;
39static int rip_msr; 39static int rip_msr;
40static int mce_bootlog; 40static int mce_bootlog = 1;
41 41
42/* 42/*
43 * Lockless MCE logging infrastructure. 43 * Lockless MCE logging infrastructure.
@@ -54,9 +54,12 @@ void mce_log(struct mce *mce)
54{ 54{
55 unsigned next, entry; 55 unsigned next, entry;
56 mce->finished = 0; 56 mce->finished = 0;
57 smp_wmb(); 57 wmb();
58 for (;;) { 58 for (;;) {
59 entry = rcu_dereference(mcelog.next); 59 entry = rcu_dereference(mcelog.next);
60 /* The rmb forces the compiler to reload next in each
61 iteration */
62 rmb();
60 for (;;) { 63 for (;;) {
61 /* When the buffer fills up discard new entries. Assume 64 /* When the buffer fills up discard new entries. Assume
62 that the earlier errors are the more interesting. */ 65 that the earlier errors are the more interesting. */
@@ -69,6 +72,7 @@ void mce_log(struct mce *mce)
69 entry++; 72 entry++;
70 continue; 73 continue;
71 } 74 }
75 break;
72 } 76 }
73 smp_rmb(); 77 smp_rmb();
74 next = entry + 1; 78 next = entry + 1;
@@ -76,9 +80,9 @@ void mce_log(struct mce *mce)
76 break; 80 break;
77 } 81 }
78 memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); 82 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
79 smp_wmb(); 83 wmb();
80 mcelog.entry[entry].finished = 1; 84 mcelog.entry[entry].finished = 1;
81 smp_wmb(); 85 wmb();
82 86
83 if (!test_and_set_bit(0, &console_logged)) 87 if (!test_and_set_bit(0, &console_logged))
84 notify_user = 1; 88 notify_user = 1;
@@ -343,7 +347,11 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
343 /* disable GART TBL walk error reporting, which trips off 347 /* disable GART TBL walk error reporting, which trips off
344 incorrectly with the IOMMU & 3ware & Cerberus. */ 348 incorrectly with the IOMMU & 3ware & Cerberus. */
345 clear_bit(10, &bank[4]); 349 clear_bit(10, &bank[4]);
350 /* Lots of broken BIOS around that don't clear them
351 by default and leave crap in there. Don't log. */
352 mce_bootlog = 0;
346 } 353 }
354
347} 355}
348 356
349static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) 357static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
@@ -352,6 +360,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
352 case X86_VENDOR_INTEL: 360 case X86_VENDOR_INTEL:
353 mce_intel_feature_init(c); 361 mce_intel_feature_init(c);
354 break; 362 break;
363 case X86_VENDOR_AMD:
364 mce_amd_feature_init(c);
365 break;
355 default: 366 default:
356 break; 367 break;
357 } 368 }
@@ -491,16 +502,16 @@ static int __init mcheck_disable(char *str)
491/* mce=off disables machine check. Note you can reenable it later 502/* mce=off disables machine check. Note you can reenable it later
492 using sysfs. 503 using sysfs.
493 mce=TOLERANCELEVEL (number, see above) 504 mce=TOLERANCELEVEL (number, see above)
494 mce=bootlog Log MCEs from before booting. Disabled by default to work 505 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
495 around buggy BIOS that leave bogus MCEs. */ 506 mce=nobootlog Don't log MCEs from before booting. */
496static int __init mcheck_enable(char *str) 507static int __init mcheck_enable(char *str)
497{ 508{
498 if (*str == '=') 509 if (*str == '=')
499 str++; 510 str++;
500 if (!strcmp(str, "off")) 511 if (!strcmp(str, "off"))
501 mce_dont_init = 1; 512 mce_dont_init = 1;
502 else if (!strcmp(str, "bootlog")) 513 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
503 mce_bootlog = 1; 514 mce_bootlog = str[0] == 'b';
504 else if (isdigit(str[0])) 515 else if (isdigit(str[0]))
505 get_option(&str, &tolerant); 516 get_option(&str, &tolerant);
506 else 517 else
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
new file mode 100644
index 000000000000..1f76175ace02
--- /dev/null
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -0,0 +1,538 @@
1/*
2 * (c) 2005 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 *
7 * Written by Jacob Shin - AMD, Inc.
8 *
9 * Support : jacob.shin@amd.com
10 *
11 * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
12 * MC4_MISC0 exists per physical processor.
13 *
14 */
15
16#include <linux/cpu.h>
17#include <linux/errno.h>
18#include <linux/init.h>
19#include <linux/interrupt.h>
20#include <linux/kobject.h>
21#include <linux/notifier.h>
22#include <linux/sched.h>
23#include <linux/smp.h>
24#include <linux/sysdev.h>
25#include <linux/sysfs.h>
26#include <asm/apic.h>
27#include <asm/mce.h>
28#include <asm/msr.h>
29#include <asm/percpu.h>
30
31#define PFX "mce_threshold: "
32#define VERSION "version 1.00.9"
33#define NR_BANKS 5
34#define THRESHOLD_MAX 0xFFF
35#define INT_TYPE_APIC 0x00020000
36#define MASK_VALID_HI 0x80000000
37#define MASK_LVTOFF_HI 0x00F00000
38#define MASK_COUNT_EN_HI 0x00080000
39#define MASK_INT_TYPE_HI 0x00060000
40#define MASK_OVERFLOW_HI 0x00010000
41#define MASK_ERR_COUNT_HI 0x00000FFF
42#define MASK_OVERFLOW 0x0001000000000000L
43
44struct threshold_bank {
45 unsigned int cpu;
46 u8 bank;
47 u8 interrupt_enable;
48 u16 threshold_limit;
49 struct kobject kobj;
50};
51
52static struct threshold_bank threshold_defaults = {
53 .interrupt_enable = 0,
54 .threshold_limit = THRESHOLD_MAX,
55};
56
57#ifdef CONFIG_SMP
58static unsigned char shared_bank[NR_BANKS] = {
59 0, 0, 0, 0, 1
60};
61#endif
62
63static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
64
65/*
66 * CPU Initialization
67 */
68
69/* must be called with correct cpu affinity */
70static void threshold_restart_bank(struct threshold_bank *b,
71 int reset, u16 old_limit)
72{
73 u32 mci_misc_hi, mci_misc_lo;
74
75 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
76
77 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
78 reset = 1; /* limit cannot be lower than err count */
79
80 if (reset) { /* reset err count and overflow bit */
81 mci_misc_hi =
82 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
83 (THRESHOLD_MAX - b->threshold_limit);
84 } else if (old_limit) { /* change limit w/o reset */
85 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
86 (old_limit - b->threshold_limit);
87 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
88 (new_count & THRESHOLD_MAX);
89 }
90
91 b->interrupt_enable ?
92 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
93 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
94
95 mci_misc_hi |= MASK_COUNT_EN_HI;
96 wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
97}
98
99void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
100{
101 int bank;
102 u32 mci_misc_lo, mci_misc_hi;
103 unsigned int cpu = smp_processor_id();
104
105 for (bank = 0; bank < NR_BANKS; ++bank) {
106 rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
107
108 /* !valid, !counter present, bios locked */
109 if (!(mci_misc_hi & MASK_VALID_HI) ||
110 !(mci_misc_hi & MASK_VALID_HI >> 1) ||
111 (mci_misc_hi & MASK_VALID_HI >> 2))
112 continue;
113
114 per_cpu(bank_map, cpu) |= (1 << bank);
115
116#ifdef CONFIG_SMP
117 if (shared_bank[bank] && cpu_core_id[cpu])
118 continue;
119#endif
120
121 setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
122 threshold_defaults.cpu = cpu;
123 threshold_defaults.bank = bank;
124 threshold_restart_bank(&threshold_defaults, 0, 0);
125 }
126}
127
128/*
129 * APIC Interrupt Handler
130 */
131
132/*
133 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
134 * the interrupt goes off when error_count reaches threshold_limit.
135 * the handler will simply log mcelog w/ software defined bank number.
136 */
137asmlinkage void mce_threshold_interrupt(void)
138{
139 int bank;
140 struct mce m;
141
142 ack_APIC_irq();
143 irq_enter();
144
145 memset(&m, 0, sizeof(m));
146 rdtscll(m.tsc);
147 m.cpu = smp_processor_id();
148
149 /* assume first bank caused it */
150 for (bank = 0; bank < NR_BANKS; ++bank) {
151 m.bank = MCE_THRESHOLD_BASE + bank;
152 rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
153
154 if (m.misc & MASK_OVERFLOW) {
155 mce_log(&m);
156 goto out;
157 }
158 }
159 out:
160 irq_exit();
161}
162
163/*
164 * Sysfs Interface
165 */
166
167static struct sysdev_class threshold_sysclass = {
168 set_kset_name("threshold"),
169};
170
171static DEFINE_PER_CPU(struct sys_device, device_threshold);
172
173struct threshold_attr {
174 struct attribute attr;
175 ssize_t(*show) (struct threshold_bank *, char *);
176 ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
177};
178
179static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
180
181static cpumask_t affinity_set(unsigned int cpu)
182{
183 cpumask_t oldmask = current->cpus_allowed;
184 cpumask_t newmask = CPU_MASK_NONE;
185 cpu_set(cpu, newmask);
186 set_cpus_allowed(current, newmask);
187 return oldmask;
188}
189
190static void affinity_restore(cpumask_t oldmask)
191{
192 set_cpus_allowed(current, oldmask);
193}
194
195#define SHOW_FIELDS(name) \
196 static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
197 { \
198 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
199 }
200SHOW_FIELDS(interrupt_enable)
201SHOW_FIELDS(threshold_limit)
202
203static ssize_t store_interrupt_enable(struct threshold_bank *b,
204 const char *buf, size_t count)
205{
206 char *end;
207 cpumask_t oldmask;
208 unsigned long new = simple_strtoul(buf, &end, 0);
209 if (end == buf)
210 return -EINVAL;
211 b->interrupt_enable = !!new;
212
213 oldmask = affinity_set(b->cpu);
214 threshold_restart_bank(b, 0, 0);
215 affinity_restore(oldmask);
216
217 return end - buf;
218}
219
220static ssize_t store_threshold_limit(struct threshold_bank *b,
221 const char *buf, size_t count)
222{
223 char *end;
224 cpumask_t oldmask;
225 u16 old;
226 unsigned long new = simple_strtoul(buf, &end, 0);
227 if (end == buf)
228 return -EINVAL;
229 if (new > THRESHOLD_MAX)
230 new = THRESHOLD_MAX;
231 if (new < 1)
232 new = 1;
233 old = b->threshold_limit;
234 b->threshold_limit = new;
235
236 oldmask = affinity_set(b->cpu);
237 threshold_restart_bank(b, 0, old);
238 affinity_restore(oldmask);
239
240 return end - buf;
241}
242
243static ssize_t show_error_count(struct threshold_bank *b, char *buf)
244{
245 u32 high, low;
246 cpumask_t oldmask;
247 oldmask = affinity_set(b->cpu);
248 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
249 affinity_restore(oldmask);
250 return sprintf(buf, "%x\n",
251 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
252}
253
254static ssize_t store_error_count(struct threshold_bank *b,
255 const char *buf, size_t count)
256{
257 cpumask_t oldmask;
258 oldmask = affinity_set(b->cpu);
259 threshold_restart_bank(b, 1, 0);
260 affinity_restore(oldmask);
261 return 1;
262}
263
264#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \
265 .attr = {.name = __stringify(_name), .mode = _mode }, \
266 .show = _show, \
267 .store = _store, \
268};
269
270#define ATTR_FIELDS(name) \
271 static struct threshold_attr name = \
272 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
273
274ATTR_FIELDS(interrupt_enable);
275ATTR_FIELDS(threshold_limit);
276ATTR_FIELDS(error_count);
277
278static struct attribute *default_attrs[] = {
279 &interrupt_enable.attr,
280 &threshold_limit.attr,
281 &error_count.attr,
282 NULL
283};
284
285#define to_bank(k) container_of(k,struct threshold_bank,kobj)
286#define to_attr(a) container_of(a,struct threshold_attr,attr)
287
288static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
289{
290 struct threshold_bank *b = to_bank(kobj);
291 struct threshold_attr *a = to_attr(attr);
292 ssize_t ret;
293 ret = a->show ? a->show(b, buf) : -EIO;
294 return ret;
295}
296
297static ssize_t store(struct kobject *kobj, struct attribute *attr,
298 const char *buf, size_t count)
299{
300 struct threshold_bank *b = to_bank(kobj);
301 struct threshold_attr *a = to_attr(attr);
302 ssize_t ret;
303 ret = a->store ? a->store(b, buf, count) : -EIO;
304 return ret;
305}
306
307static struct sysfs_ops threshold_ops = {
308 .show = show,
309 .store = store,
310};
311
312static struct kobj_type threshold_ktype = {
313 .sysfs_ops = &threshold_ops,
314 .default_attrs = default_attrs,
315};
316
317/* symlinks sibling shared banks to first core. first core owns dir/files. */
318static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
319{
320 int err = 0;
321 struct threshold_bank *b = 0;
322
323#ifdef CONFIG_SMP
324 if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */
325 char name[16];
326 unsigned lcpu = first_cpu(cpu_core_map[cpu]);
327 if (cpu_core_id[lcpu])
328 goto out; /* first core not up yet */
329
330 b = per_cpu(threshold_banks, lcpu)[bank];
331 if (!b)
332 goto out;
333 sprintf(name, "bank%i", bank);
334 err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
335 &b->kobj, name);
336 if (err)
337 goto out;
338 per_cpu(threshold_banks, cpu)[bank] = b;
339 goto out;
340 }
341#endif
342
343 b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
344 if (!b) {
345 err = -ENOMEM;
346 goto out;
347 }
348 memset(b, 0, sizeof(struct threshold_bank));
349
350 b->cpu = cpu;
351 b->bank = bank;
352 b->interrupt_enable = 0;
353 b->threshold_limit = THRESHOLD_MAX;
354 kobject_set_name(&b->kobj, "bank%i", bank);
355 b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
356 b->kobj.ktype = &threshold_ktype;
357
358 err = kobject_register(&b->kobj);
359 if (err) {
360 kfree(b);
361 goto out;
362 }
363 per_cpu(threshold_banks, cpu)[bank] = b;
364 out:
365 return err;
366}
367
368/* create dir/files for all valid threshold banks */
369static __cpuinit int threshold_create_device(unsigned int cpu)
370{
371 int bank;
372 int err = 0;
373
374 per_cpu(device_threshold, cpu).id = cpu;
375 per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
376 err = sysdev_register(&per_cpu(device_threshold, cpu));
377 if (err)
378 goto out;
379
380 for (bank = 0; bank < NR_BANKS; ++bank) {
381 if (!(per_cpu(bank_map, cpu) & 1 << bank))
382 continue;
383 err = threshold_create_bank(cpu, bank);
384 if (err)
385 goto out;
386 }
387 out:
388 return err;
389}
390
391#ifdef CONFIG_HOTPLUG_CPU
392/*
393 * let's be hotplug friendly.
394 * in case of multiple core processors, the first core always takes ownership
395 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
396 */
397
398/* cpu hotplug call removes all symlinks before first core dies */
399static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
400{
401 struct threshold_bank *b;
402 char name[16];
403
404 b = per_cpu(threshold_banks, cpu)[bank];
405 if (!b)
406 return;
407 if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
408 sprintf(name, "bank%i", bank);
409 sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
410 per_cpu(threshold_banks, cpu)[bank] = 0;
411 } else {
412 kobject_unregister(&b->kobj);
413 kfree(per_cpu(threshold_banks, cpu)[bank]);
414 }
415}
416
417static __cpuinit void threshold_remove_device(unsigned int cpu)
418{
419 int bank;
420
421 for (bank = 0; bank < NR_BANKS; ++bank) {
422 if (!(per_cpu(bank_map, cpu) & 1 << bank))
423 continue;
424 threshold_remove_bank(cpu, bank);
425 }
426 sysdev_unregister(&per_cpu(device_threshold, cpu));
427}
428
429/* link all existing siblings when first core comes up */
430static __cpuinit int threshold_create_symlinks(unsigned int cpu)
431{
432 int bank, err = 0;
433 unsigned int lcpu = 0;
434
435 if (cpu_core_id[cpu])
436 return 0;
437 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
438 if (lcpu == cpu)
439 continue;
440 for (bank = 0; bank < NR_BANKS; ++bank) {
441 if (!(per_cpu(bank_map, cpu) & 1 << bank))
442 continue;
443 if (!shared_bank[bank])
444 continue;
445 err = threshold_create_bank(lcpu, bank);
446 }
447 }
448 return err;
449}
450
451/* remove all symlinks before first core dies. */
452static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
453{
454 int bank;
455 unsigned int lcpu = 0;
456 if (cpu_core_id[cpu])
457 return;
458 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
459 if (lcpu == cpu)
460 continue;
461 for (bank = 0; bank < NR_BANKS; ++bank) {
462 if (!(per_cpu(bank_map, cpu) & 1 << bank))
463 continue;
464 if (!shared_bank[bank])
465 continue;
466 threshold_remove_bank(lcpu, bank);
467 }
468 }
469}
470#else /* !CONFIG_HOTPLUG_CPU */
471static __cpuinit void threshold_create_symlinks(unsigned int cpu)
472{
473}
474static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
475{
476}
477static void threshold_remove_device(unsigned int cpu)
478{
479}
480#endif
481
482/* get notified when a cpu comes on/off */
483static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb,
484 unsigned long action, void *hcpu)
485{
486 /* cpu was unsigned int to begin with */
487 unsigned int cpu = (unsigned long)hcpu;
488
489 if (cpu >= NR_CPUS)
490 goto out;
491
492 switch (action) {
493 case CPU_ONLINE:
494 threshold_create_device(cpu);
495 threshold_create_symlinks(cpu);
496 break;
497 case CPU_DOWN_PREPARE:
498 threshold_remove_symlinks(cpu);
499 break;
500 case CPU_DOWN_FAILED:
501 threshold_create_symlinks(cpu);
502 break;
503 case CPU_DEAD:
504 threshold_remove_device(cpu);
505 break;
506 default:
507 break;
508 }
509 out:
510 return NOTIFY_OK;
511}
512
513static struct notifier_block threshold_cpu_notifier = {
514 .notifier_call = threshold_cpu_callback,
515};
516
517static __init int threshold_init_device(void)
518{
519 int err;
520 int lcpu = 0;
521
522 err = sysdev_class_register(&threshold_sysclass);
523 if (err)
524 goto out;
525
526 /* to hit CPUs online before the notifier is up */
527 for_each_online_cpu(lcpu) {
528 err = threshold_create_device(lcpu);
529 if (err)
530 goto out;
531 }
532 register_cpu_notifier(&threshold_cpu_notifier);
533
534 out:
535 return err;
536}
537
538device_initcall(threshold_init_device);
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index f16d38d09daf..1105250bf02c 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -42,7 +42,7 @@ int acpi_found_madt;
42 * Various Linux-internal data structures created from the 42 * Various Linux-internal data structures created from the
43 * MP-table. 43 * MP-table.
44 */ 44 */
45int apic_version [MAX_APICS]; 45unsigned char apic_version [MAX_APICS];
46unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; 46unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
47int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; 47int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
48 48
@@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0;
65/* Processor that is doing the boot up */ 65/* Processor that is doing the boot up */
66unsigned int boot_cpu_id = -1U; 66unsigned int boot_cpu_id = -1U;
67/* Internal processor count */ 67/* Internal processor count */
68static unsigned int num_processors = 0; 68unsigned int num_processors __initdata = 0;
69
70unsigned disabled_cpus __initdata;
69 71
70/* Bitmask of physically existing CPUs */ 72/* Bitmask of physically existing CPUs */
71physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; 73physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
@@ -106,11 +108,14 @@ static int __init mpf_checksum(unsigned char *mp, int len)
106 108
107static void __init MP_processor_info (struct mpc_config_processor *m) 109static void __init MP_processor_info (struct mpc_config_processor *m)
108{ 110{
109 int ver, cpu; 111 int cpu;
112 unsigned char ver;
110 static int found_bsp=0; 113 static int found_bsp=0;
111 114
112 if (!(m->mpc_cpuflag & CPU_ENABLED)) 115 if (!(m->mpc_cpuflag & CPU_ENABLED)) {
116 disabled_cpus++;
113 return; 117 return;
118 }
114 119
115 printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", 120 printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
116 m->mpc_apicid, 121 m->mpc_apicid,
@@ -129,12 +134,14 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
129 } 134 }
130 135
131 cpu = num_processors++; 136 cpu = num_processors++;
132 137
133 if (m->mpc_apicid > MAX_APICS) { 138#if MAX_APICS < 255
139 if ((int)m->mpc_apicid > MAX_APICS) {
134 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", 140 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
135 m->mpc_apicid, MAX_APICS); 141 m->mpc_apicid, MAX_APICS);
136 return; 142 return;
137 } 143 }
144#endif
138 ver = m->mpc_apicver; 145 ver = m->mpc_apicver;
139 146
140 physid_set(m->mpc_apicid, phys_cpu_present_map); 147 physid_set(m->mpc_apicid, phys_cpu_present_map);
@@ -218,7 +225,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
218 m->mpc_irqtype, m->mpc_irqflag & 3, 225 m->mpc_irqtype, m->mpc_irqflag & 3,
219 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 226 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
220 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); 227 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
221 if (++mp_irq_entries == MAX_IRQ_SOURCES) 228 if (++mp_irq_entries >= MAX_IRQ_SOURCES)
222 panic("Max # of irq sources exceeded!!\n"); 229 panic("Max # of irq sources exceeded!!\n");
223} 230}
224 231
@@ -549,7 +556,7 @@ void __init get_smp_config (void)
549 * Read the physical hardware table. Anything here will 556 * Read the physical hardware table. Anything here will
550 * override the defaults. 557 * override the defaults.
551 */ 558 */
552 if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) { 559 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) {
553 smp_found_config = 0; 560 smp_found_config = 0;
554 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); 561 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
555 printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); 562 printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index cf0a0315d586..2e28e855ec3c 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -187,7 +187,7 @@ static void flush_gart(struct device *dev)
187 187
188/* Allocate DMA memory on node near device */ 188/* Allocate DMA memory on node near device */
189noinline 189noinline
190static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order) 190static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
191{ 191{
192 struct page *page; 192 struct page *page;
193 int node; 193 int node;
@@ -204,7 +204,7 @@ static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order)
204 */ 204 */
205void * 205void *
206dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 206dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
207 unsigned gfp) 207 gfp_t gfp)
208{ 208{
209 void *memory; 209 void *memory;
210 unsigned long dma_mask = 0; 210 unsigned long dma_mask = 0;
@@ -220,6 +220,12 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
220 uses the normal dma_mask for alloc_coherent. */ 220 uses the normal dma_mask for alloc_coherent. */
221 dma_mask &= *dev->dma_mask; 221 dma_mask &= *dev->dma_mask;
222 222
223 /* Why <=? Even when the mask is smaller than 4GB it is often larger
224 than 16MB and in this case we have a chance of finding fitting memory
225 in the next higher zone first. If not retry with true GFP_DMA. -AK */
226 if (dma_mask <= 0xffffffff)
227 gfp |= GFP_DMA32;
228
223 again: 229 again:
224 memory = dma_alloc_pages(dev, gfp, get_order(size)); 230 memory = dma_alloc_pages(dev, gfp, get_order(size));
225 if (memory == NULL) 231 if (memory == NULL)
@@ -245,7 +251,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
245 } 251 }
246 252
247 if (!(gfp & GFP_DMA)) { 253 if (!(gfp & GFP_DMA)) {
248 gfp |= GFP_DMA; 254 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
249 goto again; 255 goto again;
250 } 256 }
251 return NULL; 257 return NULL;
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 67d90b89af0b..5a981dca87ff 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -24,7 +24,7 @@ EXPORT_SYMBOL(iommu_sac_force);
24 */ 24 */
25 25
26void *dma_alloc_coherent(struct device *hwdev, size_t size, 26void *dma_alloc_coherent(struct device *hwdev, size_t size,
27 dma_addr_t *dma_handle, unsigned gfp) 27 dma_addr_t *dma_handle, gfp_t gfp)
28{ 28{
29 void *ret; 29 void *ret;
30 u64 mask; 30 u64 mask;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index b5a89c0bdf59..7519fc520eb3 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -86,12 +86,22 @@ EXPORT_SYMBOL(enable_hlt);
86 */ 86 */
87void default_idle(void) 87void default_idle(void)
88{ 88{
89 local_irq_enable();
90
89 if (!atomic_read(&hlt_counter)) { 91 if (!atomic_read(&hlt_counter)) {
90 local_irq_disable(); 92 clear_thread_flag(TIF_POLLING_NRFLAG);
91 if (!need_resched()) 93 smp_mb__after_clear_bit();
92 safe_halt(); 94 while (!need_resched()) {
93 else 95 local_irq_disable();
94 local_irq_enable(); 96 if (!need_resched())
97 safe_halt();
98 else
99 local_irq_enable();
100 }
101 set_thread_flag(TIF_POLLING_NRFLAG);
102 } else {
103 while (!need_resched())
104 cpu_relax();
95 } 105 }
96} 106}
97 107
@@ -102,30 +112,16 @@ void default_idle(void)
102 */ 112 */
103static void poll_idle (void) 113static void poll_idle (void)
104{ 114{
105 int oldval;
106
107 local_irq_enable(); 115 local_irq_enable();
108 116
109 /* 117 asm volatile(
110 * Deal with another CPU just having chosen a thread to 118 "2:"
111 * run here: 119 "testl %0,%1;"
112 */ 120 "rep; nop;"
113 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); 121 "je 2b;"
114 122 : :
115 if (!oldval) { 123 "i" (_TIF_NEED_RESCHED),
116 set_thread_flag(TIF_POLLING_NRFLAG); 124 "m" (current_thread_info()->flags));
117 asm volatile(
118 "2:"
119 "testl %0,%1;"
120 "rep; nop;"
121 "je 2b;"
122 : :
123 "i" (_TIF_NEED_RESCHED),
124 "m" (current_thread_info()->flags));
125 clear_thread_flag(TIF_POLLING_NRFLAG);
126 } else {
127 set_need_resched();
128 }
129} 125}
130 126
131void cpu_idle_wait(void) 127void cpu_idle_wait(void)
@@ -148,7 +144,8 @@ void cpu_idle_wait(void)
148 do { 144 do {
149 ssleep(1); 145 ssleep(1);
150 for_each_online_cpu(cpu) { 146 for_each_online_cpu(cpu) {
151 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) 147 if (cpu_isset(cpu, map) &&
148 !per_cpu(cpu_idle_state, cpu))
152 cpu_clear(cpu, map); 149 cpu_clear(cpu, map);
153 } 150 }
154 cpus_and(map, map, cpu_online_map); 151 cpus_and(map, map, cpu_online_map);
@@ -187,6 +184,8 @@ static inline void play_dead(void)
187 */ 184 */
188void cpu_idle (void) 185void cpu_idle (void)
189{ 186{
187 set_thread_flag(TIF_POLLING_NRFLAG);
188
190 /* endless idle loop with no priority at all */ 189 /* endless idle loop with no priority at all */
191 while (1) { 190 while (1) {
192 while (!need_resched()) { 191 while (!need_resched()) {
@@ -204,7 +203,9 @@ void cpu_idle (void)
204 idle(); 203 idle();
205 } 204 }
206 205
206 preempt_enable_no_resched();
207 schedule(); 207 schedule();
208 preempt_disable();
208 } 209 }
209} 210}
210 211
@@ -219,15 +220,12 @@ static void mwait_idle(void)
219{ 220{
220 local_irq_enable(); 221 local_irq_enable();
221 222
222 if (!need_resched()) { 223 while (!need_resched()) {
223 set_thread_flag(TIF_POLLING_NRFLAG); 224 __monitor((void *)&current_thread_info()->flags, 0, 0);
224 do { 225 smp_mb();
225 __monitor((void *)&current_thread_info()->flags, 0, 0); 226 if (need_resched())
226 if (need_resched()) 227 break;
227 break; 228 __mwait(0, 0);
228 __mwait(0, 0);
229 } while (!need_resched());
230 clear_thread_flag(TIF_POLLING_NRFLAG);
231 } 229 }
232} 230}
233 231
@@ -278,7 +276,8 @@ void __show_regs(struct pt_regs * regs)
278 system_utsname.version); 276 system_utsname.version);
279 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); 277 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
280 printk_address(regs->rip); 278 printk_address(regs->rip);
281 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); 279 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
280 regs->eflags);
282 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 281 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
283 regs->rax, regs->rbx, regs->rcx); 282 regs->rax, regs->rbx, regs->rcx);
284 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", 283 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
@@ -352,13 +351,6 @@ void flush_thread(void)
352 struct task_struct *tsk = current; 351 struct task_struct *tsk = current;
353 struct thread_info *t = current_thread_info(); 352 struct thread_info *t = current_thread_info();
354 353
355 /*
356 * Remove function-return probe instances associated with this task
357 * and put them back on the free list. Do not insert an exit probe for
358 * this function, it will be disabled by kprobe_flush_task if you do.
359 */
360 kprobe_flush_task(tsk);
361
362 if (t->flags & _TIF_ABI_PENDING) 354 if (t->flags & _TIF_ABI_PENDING)
363 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); 355 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
364 356
@@ -430,15 +422,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
430 struct pt_regs * childregs; 422 struct pt_regs * childregs;
431 struct task_struct *me = current; 423 struct task_struct *me = current;
432 424
433 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; 425 childregs = ((struct pt_regs *)
434 426 (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
435 *childregs = *regs; 427 *childregs = *regs;
436 428
437 childregs->rax = 0; 429 childregs->rax = 0;
438 childregs->rsp = rsp; 430 childregs->rsp = rsp;
439 if (rsp == ~0UL) { 431 if (rsp == ~0UL)
440 childregs->rsp = (unsigned long)childregs; 432 childregs->rsp = (unsigned long)childregs;
441 }
442 433
443 p->thread.rsp = (unsigned long) childregs; 434 p->thread.rsp = (unsigned long) childregs;
444 p->thread.rsp0 = (unsigned long) (childregs+1); 435 p->thread.rsp0 = (unsigned long) (childregs+1);
@@ -460,7 +451,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
460 p->thread.io_bitmap_max = 0; 451 p->thread.io_bitmap_max = 0;
461 return -ENOMEM; 452 return -ENOMEM;
462 } 453 }
463 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); 454 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
455 IO_BITMAP_BYTES);
464 } 456 }
465 457
466 /* 458 /*
@@ -497,7 +489,8 @@ out:
497 * - fold all the options into a flag word and test it with a single test. 489 * - fold all the options into a flag word and test it with a single test.
498 * - could test fs/gs bitsliced 490 * - could test fs/gs bitsliced
499 */ 491 */
500struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 492struct task_struct *
493__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
501{ 494{
502 struct thread_struct *prev = &prev_p->thread, 495 struct thread_struct *prev = &prev_p->thread,
503 *next = &next_p->thread; 496 *next = &next_p->thread;
@@ -568,7 +561,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *
568 prev->userrsp = read_pda(oldrsp); 561 prev->userrsp = read_pda(oldrsp);
569 write_pda(oldrsp, next->userrsp); 562 write_pda(oldrsp, next->userrsp);
570 write_pda(pcurrent, next_p); 563 write_pda(pcurrent, next_p);
571 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); 564 write_pda(kernelstack,
565 (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
572 566
573 /* 567 /*
574 * Now maybe reload the debug registers 568 * Now maybe reload the debug registers
@@ -649,7 +643,9 @@ asmlinkage long sys_fork(struct pt_regs *regs)
649 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); 643 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
650} 644}
651 645
652asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) 646asmlinkage long
647sys_clone(unsigned long clone_flags, unsigned long newsp,
648 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
653{ 649{
654 if (!newsp) 650 if (!newsp)
655 newsp = regs->rsp; 651 newsp = regs->rsp;
@@ -685,7 +681,8 @@ unsigned long get_wchan(struct task_struct *p)
685 return 0; 681 return 0;
686 fp = *(u64 *)(p->thread.rsp); 682 fp = *(u64 *)(p->thread.rsp);
687 do { 683 do {
688 if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) 684 if (fp < (unsigned long)stack ||
685 fp > (unsigned long)stack+THREAD_SIZE)
689 return 0; 686 return 0;
690 rip = *(u64 *)(fp+8); 687 rip = *(u64 *)(fp+8);
691 if (!in_sched_functions(rip)) 688 if (!in_sched_functions(rip))
@@ -720,8 +717,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
720 task->thread.gsindex = 0; 717 task->thread.gsindex = 0;
721 task->thread.gs = addr; 718 task->thread.gs = addr;
722 if (doit) { 719 if (doit) {
723 load_gs_index(0); 720 load_gs_index(0);
724 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 721 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
725 } 722 }
726 } 723 }
727 put_cpu(); 724 put_cpu();
@@ -738,7 +735,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
738 set_32bit_tls(task, FS_TLS, addr); 735 set_32bit_tls(task, FS_TLS, addr);
739 if (doit) { 736 if (doit) {
740 load_TLS(&task->thread, cpu); 737 load_TLS(&task->thread, cpu);
741 asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL)); 738 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
742 } 739 }
743 task->thread.fsindex = FS_TLS_SEL; 740 task->thread.fsindex = FS_TLS_SEL;
744 task->thread.fs = 0; 741 task->thread.fs = 0;
@@ -748,8 +745,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
748 if (doit) { 745 if (doit) {
749 /* set the selector to 0 to not confuse 746 /* set the selector to 0 to not confuse
750 __switch_to */ 747 __switch_to */
751 asm volatile("movl %0,%%fs" :: "r" (0)); 748 asm volatile("movl %0,%%fs" :: "r" (0));
752 ret = checking_wrmsrl(MSR_FS_BASE, addr); 749 ret = checking_wrmsrl(MSR_FS_BASE, addr);
753 } 750 }
754 } 751 }
755 put_cpu(); 752 put_cpu();
@@ -758,9 +755,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
758 unsigned long base; 755 unsigned long base;
759 if (task->thread.fsindex == FS_TLS_SEL) 756 if (task->thread.fsindex == FS_TLS_SEL)
760 base = read_32bit_tls(task, FS_TLS); 757 base = read_32bit_tls(task, FS_TLS);
761 else if (doit) { 758 else if (doit)
762 rdmsrl(MSR_FS_BASE, base); 759 rdmsrl(MSR_FS_BASE, base);
763 } else 760 else
764 base = task->thread.fs; 761 base = task->thread.fs;
765 ret = put_user(base, (unsigned long __user *)addr); 762 ret = put_user(base, (unsigned long __user *)addr);
766 break; 763 break;
@@ -769,9 +766,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
769 unsigned long base; 766 unsigned long base;
770 if (task->thread.gsindex == GS_TLS_SEL) 767 if (task->thread.gsindex == GS_TLS_SEL)
771 base = read_32bit_tls(task, GS_TLS); 768 base = read_32bit_tls(task, GS_TLS);
772 else if (doit) { 769 else if (doit)
773 rdmsrl(MSR_KERNEL_GS_BASE, base); 770 rdmsrl(MSR_KERNEL_GS_BASE, base);
774 } else 771 else
775 base = task->thread.gs; 772 base = task->thread.gs;
776 ret = put_user(base, (unsigned long __user *)addr); 773 ret = put_user(base, (unsigned long __user *)addr);
777 break; 774 break;
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index bbf64b59a21e..a87b6cebe80f 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -313,48 +313,11 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
313 313
314} 314}
315 315
316asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data) 316long arch_ptrace(struct task_struct *child, long request, long addr, long data)
317{ 317{
318 struct task_struct *child;
319 long i, ret; 318 long i, ret;
320 unsigned ui; 319 unsigned ui;
321 320
322 /* This lock_kernel fixes a subtle race with suid exec */
323 lock_kernel();
324 ret = -EPERM;
325 if (request == PTRACE_TRACEME) {
326 /* are we already being traced? */
327 if (current->ptrace & PT_PTRACED)
328 goto out;
329 ret = security_ptrace(current->parent, current);
330 if (ret)
331 goto out;
332 /* set the ptrace bit in the process flags. */
333 current->ptrace |= PT_PTRACED;
334 ret = 0;
335 goto out;
336 }
337 ret = -ESRCH;
338 read_lock(&tasklist_lock);
339 child = find_task_by_pid(pid);
340 if (child)
341 get_task_struct(child);
342 read_unlock(&tasklist_lock);
343 if (!child)
344 goto out;
345
346 ret = -EPERM;
347 if (pid == 1) /* you may not mess with init */
348 goto out_tsk;
349
350 if (request == PTRACE_ATTACH) {
351 ret = ptrace_attach(child);
352 goto out_tsk;
353 }
354 ret = ptrace_check_attach(child, request == PTRACE_KILL);
355 if (ret < 0)
356 goto out_tsk;
357
358 switch (request) { 321 switch (request) {
359 /* when I and D space are separate, these will need to be fixed. */ 322 /* when I and D space are separate, these will need to be fixed. */
360 case PTRACE_PEEKTEXT: /* read word at location addr. */ 323 case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -608,10 +571,6 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
608 ret = ptrace_request(child, request, addr, data); 571 ret = ptrace_request(child, request, addr, data);
609 break; 572 break;
610 } 573 }
611out_tsk:
612 put_task_struct(child);
613out:
614 unlock_kernel();
615 return ret; 574 return ret;
616} 575}
617 576
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
index 47f95687905f..75235ed2b31b 100644
--- a/arch/x86_64/kernel/reboot.c
+++ b/arch/x86_64/kernel/reboot.c
@@ -77,6 +77,7 @@ static inline void kb_wait(void)
77 77
78void machine_shutdown(void) 78void machine_shutdown(void)
79{ 79{
80 unsigned long flags;
80 /* Stop the cpus and apics */ 81 /* Stop the cpus and apics */
81#ifdef CONFIG_SMP 82#ifdef CONFIG_SMP
82 int reboot_cpu_id; 83 int reboot_cpu_id;
@@ -98,7 +99,7 @@ void machine_shutdown(void)
98 smp_send_stop(); 99 smp_send_stop();
99#endif 100#endif
100 101
101 local_irq_disable(); 102 local_irq_save(flags);
102 103
103#ifndef CONFIG_SMP 104#ifndef CONFIG_SMP
104 disable_local_APIC(); 105 disable_local_APIC();
@@ -106,7 +107,7 @@ void machine_shutdown(void)
106 107
107 disable_IO_APIC(); 108 disable_IO_APIC();
108 109
109 local_irq_enable(); 110 local_irq_restore(flags);
110} 111}
111 112
112void machine_emergency_restart(void) 113void machine_emergency_restart(void)
@@ -120,7 +121,7 @@ void machine_emergency_restart(void)
120 /* Could also try the reset bit in the Hammer NB */ 121 /* Could also try the reset bit in the Hammer NB */
121 switch (reboot_type) { 122 switch (reboot_type) {
122 case BOOT_KBD: 123 case BOOT_KBD:
123 for (i=0; i<100; i++) { 124 for (i=0; i<10; i++) {
124 kb_wait(); 125 kb_wait();
125 udelay(50); 126 udelay(50);
126 outb(0xfe,0x64); /* pulse reset low */ 127 outb(0xfe,0x64); /* pulse reset low */
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 238f73e1a834..750e01dcbdf4 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -60,6 +60,7 @@
60#include <asm/setup.h> 60#include <asm/setup.h>
61#include <asm/mach_apic.h> 61#include <asm/mach_apic.h>
62#include <asm/numa.h> 62#include <asm/numa.h>
63#include <asm/sections.h>
63 64
64/* 65/*
65 * Machine setup.. 66 * Machine setup..
@@ -103,7 +104,6 @@ struct edid_info edid_info;
103struct e820map e820; 104struct e820map e820;
104 105
105extern int root_mountflags; 106extern int root_mountflags;
106extern char _text, _etext, _edata, _end;
107 107
108char command_line[COMMAND_LINE_SIZE]; 108char command_line[COMMAND_LINE_SIZE];
109 109
@@ -412,7 +412,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
412{ 412{
413 unsigned long bootmap_size, bootmap; 413 unsigned long bootmap_size, bootmap;
414 414
415 memory_present(0, start_pfn, end_pfn);
416 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; 415 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
417 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); 416 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
418 if (bootmap == -1L) 417 if (bootmap == -1L)
@@ -571,6 +570,8 @@ void __init setup_arch(char **cmdline_p)
571 570
572 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); 571 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
573 572
573 zap_low_mappings(0);
574
574#ifdef CONFIG_ACPI 575#ifdef CONFIG_ACPI
575 /* 576 /*
576 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). 577 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -657,8 +658,6 @@ void __init setup_arch(char **cmdline_p)
657 } 658 }
658#endif 659#endif
659 660
660 sparse_init();
661
662 paging_init(); 661 paging_init();
663 662
664 check_ioapic(); 663 check_ioapic();
@@ -793,7 +792,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
793#endif 792#endif
794 793
795 bits = 0; 794 bits = 0;
796 while ((1 << bits) < c->x86_num_cores) 795 while ((1 << bits) < c->x86_max_cores)
797 bits++; 796 bits++;
798 797
799 /* Low order bits define the core id (index of core in socket) */ 798 /* Low order bits define the core id (index of core in socket) */
@@ -823,16 +822,14 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
823 if (!node_online(node)) 822 if (!node_online(node))
824 node = nearby_node(apicid); 823 node = nearby_node(apicid);
825 } 824 }
826 cpu_to_node[cpu] = node; 825 numa_set_node(cpu, node);
827 826
828 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", 827 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
829 cpu, c->x86_num_cores, node, cpu_core_id[cpu]); 828 cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
830#endif 829#endif
831#endif 830#endif
832} 831}
833 832
834#define HWCR 0xc0010015
835
836static int __init init_amd(struct cpuinfo_x86 *c) 833static int __init init_amd(struct cpuinfo_x86 *c)
837{ 834{
838 int r; 835 int r;
@@ -841,14 +838,18 @@ static int __init init_amd(struct cpuinfo_x86 *c)
841#ifdef CONFIG_SMP 838#ifdef CONFIG_SMP
842 unsigned long value; 839 unsigned long value;
843 840
844 // Disable TLB flush filter by setting HWCR.FFDIS: 841 /*
845 // bit 6 of msr C001_0015 842 * Disable TLB flush filter by setting HWCR.FFDIS on K8
846 // 843 * bit 6 of msr C001_0015
847 // Errata 63 for SH-B3 steppings 844 *
848 // Errata 122 for all(?) steppings 845 * Errata 63 for SH-B3 steppings
849 rdmsrl(HWCR, value); 846 * Errata 122 for all steppings (F+ have it disabled by default)
850 value |= 1 << 6; 847 */
851 wrmsrl(HWCR, value); 848 if (c->x86 == 15) {
849 rdmsrl(MSR_K8_HWCR, value);
850 value |= 1 << 6;
851 wrmsrl(MSR_K8_HWCR, value);
852 }
852#endif 853#endif
853 854
854 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 855 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
@@ -873,9 +874,9 @@ static int __init init_amd(struct cpuinfo_x86 *c)
873 display_cacheinfo(c); 874 display_cacheinfo(c);
874 875
875 if (c->extended_cpuid_level >= 0x80000008) { 876 if (c->extended_cpuid_level >= 0x80000008) {
876 c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 877 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
877 if (c->x86_num_cores & (c->x86_num_cores - 1)) 878 if (c->x86_max_cores & (c->x86_max_cores - 1))
878 c->x86_num_cores = 1; 879 c->x86_max_cores = 1;
879 880
880 amd_detect_cmp(c); 881 amd_detect_cmp(c);
881 } 882 }
@@ -887,54 +888,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
887{ 888{
888#ifdef CONFIG_SMP 889#ifdef CONFIG_SMP
889 u32 eax, ebx, ecx, edx; 890 u32 eax, ebx, ecx, edx;
890 int index_msb, tmp; 891 int index_msb, core_bits;
891 int cpu = smp_processor_id(); 892 int cpu = smp_processor_id();
892 893
894 cpuid(1, &eax, &ebx, &ecx, &edx);
895
896 c->apicid = phys_pkg_id(0);
897
893 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 898 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
894 return; 899 return;
895 900
896 cpuid(1, &eax, &ebx, &ecx, &edx);
897 smp_num_siblings = (ebx & 0xff0000) >> 16; 901 smp_num_siblings = (ebx & 0xff0000) >> 16;
898 902
899 if (smp_num_siblings == 1) { 903 if (smp_num_siblings == 1) {
900 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 904 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
901 } else if (smp_num_siblings > 1) { 905 } else if (smp_num_siblings > 1 ) {
902 index_msb = 31; 906
903 /*
904 * At this point we only support two siblings per
905 * processor package.
906 */
907 if (smp_num_siblings > NR_CPUS) { 907 if (smp_num_siblings > NR_CPUS) {
908 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); 908 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
909 smp_num_siblings = 1; 909 smp_num_siblings = 1;
910 return; 910 return;
911 } 911 }
912 tmp = smp_num_siblings; 912
913 while ((tmp & 0x80000000 ) == 0) { 913 index_msb = get_count_order(smp_num_siblings);
914 tmp <<=1 ;
915 index_msb--;
916 }
917 if (smp_num_siblings & (smp_num_siblings - 1))
918 index_msb++;
919 phys_proc_id[cpu] = phys_pkg_id(index_msb); 914 phys_proc_id[cpu] = phys_pkg_id(index_msb);
920 915
921 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 916 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
922 phys_proc_id[cpu]); 917 phys_proc_id[cpu]);
923 918
924 smp_num_siblings = smp_num_siblings / c->x86_num_cores; 919 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
925 920
926 tmp = smp_num_siblings; 921 index_msb = get_count_order(smp_num_siblings) ;
927 index_msb = 31; 922
928 while ((tmp & 0x80000000) == 0) { 923 core_bits = get_count_order(c->x86_max_cores);
929 tmp <<=1 ;
930 index_msb--;
931 }
932 if (smp_num_siblings & (smp_num_siblings - 1))
933 index_msb++;
934 924
935 cpu_core_id[cpu] = phys_pkg_id(index_msb); 925 cpu_core_id[cpu] = phys_pkg_id(index_msb) &
926 ((1 << core_bits) - 1);
936 927
937 if (c->x86_num_cores > 1) 928 if (c->x86_max_cores > 1)
938 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 929 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
939 cpu_core_id[cpu]); 930 cpu_core_id[cpu]);
940 } 931 }
@@ -965,16 +956,15 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
965static void srat_detect_node(void) 956static void srat_detect_node(void)
966{ 957{
967#ifdef CONFIG_NUMA 958#ifdef CONFIG_NUMA
968 unsigned apicid, node; 959 unsigned node;
969 int cpu = smp_processor_id(); 960 int cpu = smp_processor_id();
970 961
971 /* Don't do the funky fallback heuristics the AMD version employs 962 /* Don't do the funky fallback heuristics the AMD version employs
972 for now. */ 963 for now. */
973 apicid = phys_proc_id[cpu]; 964 node = apicid_to_node[hard_smp_processor_id()];
974 node = apicid_to_node[apicid];
975 if (node == NUMA_NO_NODE) 965 if (node == NUMA_NO_NODE)
976 node = 0; 966 node = 0;
977 cpu_to_node[cpu] = node; 967 numa_set_node(cpu, node);
978 968
979 if (acpi_numa > 0) 969 if (acpi_numa > 0)
980 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); 970 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
@@ -992,13 +982,18 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
992 unsigned eax = cpuid_eax(0x80000008); 982 unsigned eax = cpuid_eax(0x80000008);
993 c->x86_virt_bits = (eax >> 8) & 0xff; 983 c->x86_virt_bits = (eax >> 8) & 0xff;
994 c->x86_phys_bits = eax & 0xff; 984 c->x86_phys_bits = eax & 0xff;
985 /* CPUID workaround for Intel 0F34 CPU */
986 if (c->x86_vendor == X86_VENDOR_INTEL &&
987 c->x86 == 0xF && c->x86_model == 0x3 &&
988 c->x86_mask == 0x4)
989 c->x86_phys_bits = 36;
995 } 990 }
996 991
997 if (c->x86 == 15) 992 if (c->x86 == 15)
998 c->x86_cache_alignment = c->x86_clflush_size * 2; 993 c->x86_cache_alignment = c->x86_clflush_size * 2;
999 if (c->x86 >= 15) 994 if (c->x86 >= 15)
1000 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 995 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1001 c->x86_num_cores = intel_num_cpu_cores(c); 996 c->x86_max_cores = intel_num_cpu_cores(c);
1002 997
1003 srat_detect_node(); 998 srat_detect_node();
1004} 999}
@@ -1036,7 +1031,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1036 c->x86_model_id[0] = '\0'; /* Unset */ 1031 c->x86_model_id[0] = '\0'; /* Unset */
1037 c->x86_clflush_size = 64; 1032 c->x86_clflush_size = 64;
1038 c->x86_cache_alignment = c->x86_clflush_size; 1033 c->x86_cache_alignment = c->x86_clflush_size;
1039 c->x86_num_cores = 1; 1034 c->x86_max_cores = 1;
1040 c->extended_cpuid_level = 0; 1035 c->extended_cpuid_level = 0;
1041 memset(&c->x86_capability, 0, sizeof c->x86_capability); 1036 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1042 1037
@@ -1059,10 +1054,10 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1059 c->x86 = (tfms >> 8) & 0xf; 1054 c->x86 = (tfms >> 8) & 0xf;
1060 c->x86_model = (tfms >> 4) & 0xf; 1055 c->x86_model = (tfms >> 4) & 0xf;
1061 c->x86_mask = tfms & 0xf; 1056 c->x86_mask = tfms & 0xf;
1062 if (c->x86 == 0xf) { 1057 if (c->x86 == 0xf)
1063 c->x86 += (tfms >> 20) & 0xff; 1058 c->x86 += (tfms >> 20) & 0xff;
1059 if (c->x86 >= 0x6)
1064 c->x86_model += ((tfms >> 16) & 0xF) << 4; 1060 c->x86_model += ((tfms >> 16) & 0xF) << 4;
1065 }
1066 if (c->x86_capability[0] & (1<<19)) 1061 if (c->x86_capability[0] & (1<<19))
1067 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 1062 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
1068 } else { 1063 } else {
@@ -1212,7 +1207,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1212 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1207 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1213 1208
1214 /* Intel-defined (#2) */ 1209 /* Intel-defined (#2) */
1215 "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est", 1210 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
1216 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, 1211 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
1217 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1212 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1218 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1213 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1270,13 +1265,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1270 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); 1265 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1271 1266
1272#ifdef CONFIG_SMP 1267#ifdef CONFIG_SMP
1273 if (smp_num_siblings * c->x86_num_cores > 1) { 1268 if (smp_num_siblings * c->x86_max_cores > 1) {
1274 int cpu = c - cpu_data; 1269 int cpu = c - cpu_data;
1275 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); 1270 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
1276 seq_printf(m, "siblings\t: %d\n", 1271 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1277 c->x86_num_cores * smp_num_siblings);
1278 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); 1272 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
1279 seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); 1273 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1280 } 1274 }
1281#endif 1275#endif
1282 1276
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index bd33be24a386..06dc354375c3 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -87,6 +87,10 @@ void __init setup_per_cpu_areas(void)
87 int i; 87 int i;
88 unsigned long size; 88 unsigned long size;
89 89
90#ifdef CONFIG_HOTPLUG_CPU
91 prefill_possible_map();
92#endif
93
90 /* Copy section for each CPU (we discard the original) */ 94 /* Copy section for each CPU (we discard the original) */
91 size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); 95 size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
92#ifdef CONFIG_MODULES 96#ifdef CONFIG_MODULES
@@ -137,7 +141,6 @@ void pda_init(int cpu)
137 panic("cannot allocate irqstack for cpu %d", cpu); 141 panic("cannot allocate irqstack for cpu %d", cpu);
138 } 142 }
139 143
140 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
141 144
142 pda->irqstackptr += IRQSTACKSIZE-64; 145 pda->irqstackptr += IRQSTACKSIZE-64;
143} 146}
@@ -193,6 +196,7 @@ void __cpuinit cpu_init (void)
193 /* CPU 0 is initialised in head64.c */ 196 /* CPU 0 is initialised in head64.c */
194 if (cpu != 0) { 197 if (cpu != 0) {
195 pda_init(cpu); 198 pda_init(cpu);
199 zap_low_mappings(cpu);
196 } else 200 } else
197 estacks = boot_exception_stacks; 201 estacks = boot_exception_stacks;
198 202
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index d642fbf3da29..5876df116c92 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -110,6 +110,15 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
110 COPY(r14); 110 COPY(r14);
111 COPY(r15); 111 COPY(r15);
112 112
113 /* Kernel saves and restores only the CS segment register on signals,
114 * which is the bare minimum needed to allow mixed 32/64-bit code.
115 * App's signal handler can save/restore other segments if needed. */
116 {
117 unsigned cs;
118 err |= __get_user(cs, &sc->cs);
119 regs->cs = cs | 3; /* Force into user mode */
120 }
121
113 { 122 {
114 unsigned int tmpflags; 123 unsigned int tmpflags;
115 err |= __get_user(tmpflags, &sc->eflags); 124 err |= __get_user(tmpflags, &sc->eflags);
@@ -187,6 +196,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
187{ 196{
188 int err = 0; 197 int err = 0;
189 198
199 err |= __put_user(regs->cs, &sc->cs);
190 err |= __put_user(0, &sc->gs); 200 err |= __put_user(0, &sc->gs);
191 err |= __put_user(0, &sc->fs); 201 err |= __put_user(0, &sc->fs);
192 202
@@ -318,7 +328,14 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
318 328
319 regs->rsp = (unsigned long)frame; 329 regs->rsp = (unsigned long)frame;
320 330
331 /* Set up the CS register to run signal handlers in 64-bit mode,
332 even if the handler happens to be interrupting 32-bit code. */
333 regs->cs = __USER_CS;
334
335 /* This, by contrast, has nothing to do with segment registers -
336 see include/asm-x86_64/uaccess.h for details. */
321 set_fs(USER_DS); 337 set_fs(USER_DS);
338
322 regs->eflags &= ~TF_MASK; 339 regs->eflags &= ~TF_MASK;
323 if (test_thread_flag(TIF_SINGLESTEP)) 340 if (test_thread_flag(TIF_SINGLESTEP))
324 ptrace_notify(SIGTRAP); 341 ptrace_notify(SIGTRAP);
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 9db9dda161b4..cfc3d9dccbd9 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -28,8 +28,6 @@
28#include <asm/proto.h> 28#include <asm/proto.h>
29#include <asm/apicdef.h> 29#include <asm/apicdef.h>
30 30
31#define __cpuinit __init
32
33/* 31/*
34 * Smarter SMP flushing macros. 32 * Smarter SMP flushing macros.
35 * c/o Linus Torvalds. 33 * c/o Linus Torvalds.
@@ -452,13 +450,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
452 450
453void smp_stop_cpu(void) 451void smp_stop_cpu(void)
454{ 452{
453 unsigned long flags;
455 /* 454 /*
456 * Remove this CPU: 455 * Remove this CPU:
457 */ 456 */
458 cpu_clear(smp_processor_id(), cpu_online_map); 457 cpu_clear(smp_processor_id(), cpu_online_map);
459 local_irq_disable(); 458 local_irq_save(flags);
460 disable_local_APIC(); 459 disable_local_APIC();
461 local_irq_enable(); 460 local_irq_restore(flags);
462} 461}
463 462
464static void smp_really_stop_cpu(void *dummy) 463static void smp_really_stop_cpu(void *dummy)
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index e12d7baeb33e..683c33f7b967 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -64,9 +64,8 @@
64int smp_num_siblings = 1; 64int smp_num_siblings = 1;
65/* Package ID of each logical CPU */ 65/* Package ID of each logical CPU */
66u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 66u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
67/* core ID of each logical CPU */
67u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 68u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
68EXPORT_SYMBOL(phys_proc_id);
69EXPORT_SYMBOL(cpu_core_id);
70 69
71/* Bitmask of currently online CPUs */ 70/* Bitmask of currently online CPUs */
72cpumask_t cpu_online_map __read_mostly; 71cpumask_t cpu_online_map __read_mostly;
@@ -89,7 +88,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
89/* Set when the idlers are all forked */ 88/* Set when the idlers are all forked */
90int smp_threads_ready; 89int smp_threads_ready;
91 90
91/* representing HT siblings of each logical CPU */
92cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 92cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
93
94/* representing HT and core siblings of each logical CPU */
93cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 95cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
94EXPORT_SYMBOL(cpu_core_map); 96EXPORT_SYMBOL(cpu_core_map);
95 97
@@ -436,30 +438,59 @@ void __cpuinit smp_callin(void)
436 cpu_set(cpuid, cpu_callin_map); 438 cpu_set(cpuid, cpu_callin_map);
437} 439}
438 440
441/* representing cpus for which sibling maps can be computed */
442static cpumask_t cpu_sibling_setup_map;
443
439static inline void set_cpu_sibling_map(int cpu) 444static inline void set_cpu_sibling_map(int cpu)
440{ 445{
441 int i; 446 int i;
447 struct cpuinfo_x86 *c = cpu_data;
448
449 cpu_set(cpu, cpu_sibling_setup_map);
442 450
443 if (smp_num_siblings > 1) { 451 if (smp_num_siblings > 1) {
444 for_each_cpu(i) { 452 for_each_cpu_mask(i, cpu_sibling_setup_map) {
445 if (cpu_core_id[cpu] == cpu_core_id[i]) { 453 if (phys_proc_id[cpu] == phys_proc_id[i] &&
454 cpu_core_id[cpu] == cpu_core_id[i]) {
446 cpu_set(i, cpu_sibling_map[cpu]); 455 cpu_set(i, cpu_sibling_map[cpu]);
447 cpu_set(cpu, cpu_sibling_map[i]); 456 cpu_set(cpu, cpu_sibling_map[i]);
457 cpu_set(i, cpu_core_map[cpu]);
458 cpu_set(cpu, cpu_core_map[i]);
448 } 459 }
449 } 460 }
450 } else { 461 } else {
451 cpu_set(cpu, cpu_sibling_map[cpu]); 462 cpu_set(cpu, cpu_sibling_map[cpu]);
452 } 463 }
453 464
454 if (current_cpu_data.x86_num_cores > 1) { 465 if (current_cpu_data.x86_max_cores == 1) {
455 for_each_cpu(i) {
456 if (phys_proc_id[cpu] == phys_proc_id[i]) {
457 cpu_set(i, cpu_core_map[cpu]);
458 cpu_set(cpu, cpu_core_map[i]);
459 }
460 }
461 } else {
462 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 466 cpu_core_map[cpu] = cpu_sibling_map[cpu];
467 c[cpu].booted_cores = 1;
468 return;
469 }
470
471 for_each_cpu_mask(i, cpu_sibling_setup_map) {
472 if (phys_proc_id[cpu] == phys_proc_id[i]) {
473 cpu_set(i, cpu_core_map[cpu]);
474 cpu_set(cpu, cpu_core_map[i]);
475 /*
476 * Does this new cpu bringup a new core?
477 */
478 if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
479 /*
480 * for each core in package, increment
481 * the booted_cores for this new cpu
482 */
483 if (first_cpu(cpu_sibling_map[i]) == i)
484 c[cpu].booted_cores++;
485 /*
486 * increment the core count for all
487 * the other cpus in this package
488 */
489 if (i != cpu)
490 c[i].booted_cores++;
491 } else if (i != cpu && !c[cpu].booted_cores)
492 c[cpu].booted_cores = c[i].booted_cores;
493 }
463 } 494 }
464} 495}
465 496
@@ -474,6 +505,7 @@ void __cpuinit start_secondary(void)
474 * things done here to the most necessary things. 505 * things done here to the most necessary things.
475 */ 506 */
476 cpu_init(); 507 cpu_init();
508 preempt_disable();
477 smp_callin(); 509 smp_callin();
478 510
479 /* otherwise gcc will move up the smp_processor_id before the cpu_init */ 511 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
@@ -880,6 +912,9 @@ static __init void disable_smp(void)
880} 912}
881 913
882#ifdef CONFIG_HOTPLUG_CPU 914#ifdef CONFIG_HOTPLUG_CPU
915
916int additional_cpus __initdata = -1;
917
883/* 918/*
884 * cpu_possible_map should be static, it cannot change as cpu's 919 * cpu_possible_map should be static, it cannot change as cpu's
885 * are onlined, or offlined. The reason is per-cpu data-structures 920 * are onlined, or offlined. The reason is per-cpu data-structures
@@ -888,14 +923,38 @@ static __init void disable_smp(void)
888 * cpu_present_map on the other hand can change dynamically. 923 * cpu_present_map on the other hand can change dynamically.
889 * In case when cpu_hotplug is not compiled, then we resort to current 924 * In case when cpu_hotplug is not compiled, then we resort to current
890 * behaviour, which is cpu_possible == cpu_present. 925 * behaviour, which is cpu_possible == cpu_present.
891 * If cpu-hotplug is supported, then we need to preallocate for all
892 * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
893 * - Ashok Raj 926 * - Ashok Raj
927 *
928 * Three ways to find out the number of additional hotplug CPUs:
929 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
930 * - otherwise use half of the available CPUs or 2, whatever is more.
931 * - The user can overwrite it with additional_cpus=NUM
932 * We do this because additional CPUs waste a lot of memory.
933 * -AK
894 */ 934 */
895static void prefill_possible_map(void) 935__init void prefill_possible_map(void)
896{ 936{
897 int i; 937 int i;
898 for (i = 0; i < NR_CPUS; i++) 938 int possible;
939
940 if (additional_cpus == -1) {
941 if (disabled_cpus > 0) {
942 additional_cpus = disabled_cpus;
943 } else {
944 additional_cpus = num_processors / 2;
945 if (additional_cpus == 0)
946 additional_cpus = 2;
947 }
948 }
949 possible = num_processors + additional_cpus;
950 if (possible > NR_CPUS)
951 possible = NR_CPUS;
952
953 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
954 possible,
955 max_t(int, possible - num_processors, 0));
956
957 for (i = 0; i < possible; i++)
899 cpu_set(i, cpu_possible_map); 958 cpu_set(i, cpu_possible_map);
900} 959}
901#endif 960#endif
@@ -966,10 +1025,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
966 nmi_watchdog_default(); 1025 nmi_watchdog_default();
967 current_cpu_data = boot_cpu_data; 1026 current_cpu_data = boot_cpu_data;
968 current_thread_info()->cpu = 0; /* needed? */ 1027 current_thread_info()->cpu = 0; /* needed? */
969 1028 set_cpu_sibling_map(0);
970#ifdef CONFIG_HOTPLUG_CPU
971 prefill_possible_map();
972#endif
973 1029
974 if (smp_sanity_check(max_cpus) < 0) { 1030 if (smp_sanity_check(max_cpus) < 0) {
975 printk(KERN_INFO "SMP disabled\n"); 1031 printk(KERN_INFO "SMP disabled\n");
@@ -1013,8 +1069,6 @@ void __init smp_prepare_boot_cpu(void)
1013 int me = smp_processor_id(); 1069 int me = smp_processor_id();
1014 cpu_set(me, cpu_online_map); 1070 cpu_set(me, cpu_online_map);
1015 cpu_set(me, cpu_callout_map); 1071 cpu_set(me, cpu_callout_map);
1016 cpu_set(0, cpu_sibling_map[0]);
1017 cpu_set(0, cpu_core_map[0]);
1018 per_cpu(cpu_state, me) = CPU_ONLINE; 1072 per_cpu(cpu_state, me) = CPU_ONLINE;
1019} 1073}
1020 1074
@@ -1067,9 +1121,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
1067 */ 1121 */
1068void __init smp_cpus_done(unsigned int max_cpus) 1122void __init smp_cpus_done(unsigned int max_cpus)
1069{ 1123{
1070#ifndef CONFIG_HOTPLUG_CPU
1071 zap_low_mappings();
1072#endif
1073 smp_cleanup_boot(); 1124 smp_cleanup_boot();
1074 1125
1075#ifdef CONFIG_X86_IO_APIC 1126#ifdef CONFIG_X86_IO_APIC
@@ -1086,15 +1137,24 @@ void __init smp_cpus_done(unsigned int max_cpus)
1086static void remove_siblinginfo(int cpu) 1137static void remove_siblinginfo(int cpu)
1087{ 1138{
1088 int sibling; 1139 int sibling;
1140 struct cpuinfo_x86 *c = cpu_data;
1089 1141
1142 for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
1143 cpu_clear(cpu, cpu_core_map[sibling]);
1144 /*
1145 * last thread sibling in this cpu core going down
1146 */
1147 if (cpus_weight(cpu_sibling_map[cpu]) == 1)
1148 c[sibling].booted_cores--;
1149 }
1150
1090 for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) 1151 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1091 cpu_clear(cpu, cpu_sibling_map[sibling]); 1152 cpu_clear(cpu, cpu_sibling_map[sibling]);
1092 for_each_cpu_mask(sibling, cpu_core_map[cpu])
1093 cpu_clear(cpu, cpu_core_map[sibling]);
1094 cpus_clear(cpu_sibling_map[cpu]); 1153 cpus_clear(cpu_sibling_map[cpu]);
1095 cpus_clear(cpu_core_map[cpu]); 1154 cpus_clear(cpu_core_map[cpu]);
1096 phys_proc_id[cpu] = BAD_APICID; 1155 phys_proc_id[cpu] = BAD_APICID;
1097 cpu_core_id[cpu] = BAD_APICID; 1156 cpu_core_id[cpu] = BAD_APICID;
1157 cpu_clear(cpu, cpu_sibling_setup_map);
1098} 1158}
1099 1159
1100void remove_cpu_from_maps(void) 1160void remove_cpu_from_maps(void)
@@ -1158,6 +1218,12 @@ void __cpu_die(unsigned int cpu)
1158 printk(KERN_ERR "CPU %u didn't die...\n", cpu); 1218 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1159} 1219}
1160 1220
1221static __init int setup_additional_cpus(char *s)
1222{
1223 return get_option(&s, &additional_cpus);
1224}
1225__setup("additional_cpus=", setup_additional_cpus);
1226
1161#else /* ... !CONFIG_HOTPLUG_CPU */ 1227#else /* ... !CONFIG_HOTPLUG_CPU */
1162 1228
1163int __cpu_disable(void) 1229int __cpu_disable(void)
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index ebb9abf3ce6d..fd2bef780882 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -11,6 +11,8 @@
11#include <linux/smp.h> 11#include <linux/smp.h>
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <asm/proto.h> 13#include <asm/proto.h>
14#include <asm/page.h>
15#include <asm/pgtable.h>
14 16
15struct saved_context saved_context; 17struct saved_context saved_context;
16 18
@@ -61,13 +63,12 @@ void save_processor_state(void)
61 __save_processor_state(&saved_context); 63 __save_processor_state(&saved_context);
62} 64}
63 65
64static void 66static void do_fpu_end(void)
65do_fpu_end(void)
66{ 67{
67 /* restore FPU regs if necessary */ 68 /*
68 /* Do it out of line so that gcc does not move cr0 load to some stupid place */ 69 * Restore FPU regs if necessary
69 kernel_fpu_end(); 70 */
70 mxcsr_feature_mask_init(); 71 kernel_fpu_end();
71} 72}
72 73
73void __restore_processor_state(struct saved_context *ctxt) 74void __restore_processor_state(struct saved_context *ctxt)
@@ -140,4 +141,83 @@ void fix_processor_context(void)
140 141
141} 142}
142 143
144#ifdef CONFIG_SOFTWARE_SUSPEND
145/* Defined in arch/x86_64/kernel/suspend_asm.S */
146extern int restore_image(void);
147
148pgd_t *temp_level4_pgt;
149
150static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
151{
152 long i, j;
153
154 i = pud_index(address);
155 pud = pud + i;
156 for (; i < PTRS_PER_PUD; pud++, i++) {
157 unsigned long paddr;
158 pmd_t *pmd;
159
160 paddr = address + i*PUD_SIZE;
161 if (paddr >= end)
162 break;
163
164 pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
165 if (!pmd)
166 return -ENOMEM;
167 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
168 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
169 unsigned long pe;
170
171 if (paddr >= end)
172 break;
173 pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
174 pe &= __supported_pte_mask;
175 set_pmd(pmd, __pmd(pe));
176 }
177 }
178 return 0;
179}
180
181static int set_up_temporary_mappings(void)
182{
183 unsigned long start, end, next;
184 int error;
185
186 temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
187 if (!temp_level4_pgt)
188 return -ENOMEM;
189
190 /* It is safe to reuse the original kernel mapping */
191 set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
192 init_level4_pgt[pgd_index(__START_KERNEL_map)]);
193
194 /* Set up the direct mapping from scratch */
195 start = (unsigned long)pfn_to_kaddr(0);
196 end = (unsigned long)pfn_to_kaddr(end_pfn);
197
198 for (; start < end; start = next) {
199 pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
200 if (!pud)
201 return -ENOMEM;
202 next = start + PGDIR_SIZE;
203 if (next > end)
204 next = end;
205 if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
206 return error;
207 set_pgd(temp_level4_pgt + pgd_index(start),
208 mk_kernel_pgd(__pa(pud)));
209 }
210 return 0;
211}
212
213int swsusp_arch_resume(void)
214{
215 int error;
143 216
217 /* We have got enough memory and from now on we cannot recover */
218 if ((error = set_up_temporary_mappings()))
219 return error;
220 restore_image();
221 return 0;
222}
223#endif /* CONFIG_SOFTWARE_SUSPEND */
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
index 4d659e97df10..320b6fb00cca 100644
--- a/arch/x86_64/kernel/suspend_asm.S
+++ b/arch/x86_64/kernel/suspend_asm.S
@@ -39,12 +39,13 @@ ENTRY(swsusp_arch_suspend)
39 call swsusp_save 39 call swsusp_save
40 ret 40 ret
41 41
42ENTRY(swsusp_arch_resume) 42ENTRY(restore_image)
43 /* set up cr3 */ 43 /* switch to temporary page tables */
44 leaq init_level4_pgt(%rip),%rax 44 movq $__PAGE_OFFSET, %rdx
45 subq $__START_KERNEL_map,%rax 45 movq temp_level4_pgt(%rip), %rax
46 movq %rax,%cr3 46 subq %rdx, %rax
47 47 movq %rax, %cr3
48 /* Flush TLB */
48 movq mmu_cr4_features(%rip), %rax 49 movq mmu_cr4_features(%rip), %rax
49 movq %rax, %rdx 50 movq %rax, %rdx
50 andq $~(1<<7), %rdx # PGE 51 andq $~(1<<7), %rdx # PGE
@@ -69,6 +70,10 @@ loop:
69 movq pbe_next(%rdx), %rdx 70 movq pbe_next(%rdx), %rdx
70 jmp loop 71 jmp loop
71done: 72done:
73 /* go back to the original page tables */
74 leaq init_level4_pgt(%rip), %rax
75 subq $__START_KERNEL_map, %rax
76 movq %rax, %cr3
72 /* Flush TLB, including "global" things (vmalloc) */ 77 /* Flush TLB, including "global" things (vmalloc) */
73 movq mmu_cr4_features(%rip), %rax 78 movq mmu_cr4_features(%rip), %rax
74 movq %rax, %rdx 79 movq %rax, %rdx
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c
index cc7821c68851..6449ea8fe756 100644
--- a/arch/x86_64/kernel/sys_x86_64.c
+++ b/arch/x86_64/kernel/sys_x86_64.c
@@ -154,17 +154,3 @@ asmlinkage long sys_uname(struct new_utsname __user * name)
154 err |= copy_to_user(&name->machine, "i686", 5); 154 err |= copy_to_user(&name->machine, "i686", 5);
155 return err ? -EFAULT : 0; 155 return err ? -EFAULT : 0;
156} 156}
157
158asmlinkage long sys_time64(long __user * tloc)
159{
160 struct timeval now;
161 int i;
162
163 do_gettimeofday(&now);
164 i = now.tv_sec;
165 if (tloc) {
166 if (put_user(i,tloc))
167 i = -EFAULT;
168 }
169 return i;
170}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 2373cb8b8625..fdaddc4e5284 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -42,10 +42,6 @@
42#include <asm/apic.h> 42#include <asm/apic.h>
43#endif 43#endif
44 44
45u64 jiffies_64 = INITIAL_JIFFIES;
46
47EXPORT_SYMBOL(jiffies_64);
48
49#ifdef CONFIG_CPU_FREQ 45#ifdef CONFIG_CPU_FREQ
50static void cpufreq_delayed_get(void); 46static void cpufreq_delayed_get(void);
51#endif 47#endif
@@ -481,9 +477,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
481static unsigned int cyc2ns_scale; 477static unsigned int cyc2ns_scale;
482#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ 478#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
483 479
484static inline void set_cyc2ns_scale(unsigned long cpu_mhz) 480static inline void set_cyc2ns_scale(unsigned long cpu_khz)
485{ 481{
486 cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; 482 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
487} 483}
488 484
489static inline unsigned long long cycles_2_ns(unsigned long long cyc) 485static inline unsigned long long cycles_2_ns(unsigned long long cyc)
@@ -655,7 +651,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
655 vxtime.tsc_quot = (1000L << 32) / cpu_khz; 651 vxtime.tsc_quot = (1000L << 32) / cpu_khz;
656 } 652 }
657 653
658 set_cyc2ns_scale(cpu_khz_ref / 1000); 654 set_cyc2ns_scale(cpu_khz_ref);
659 655
660 return 0; 656 return 0;
661} 657}
@@ -939,7 +935,7 @@ void __init time_init(void)
939 rdtscll_sync(&vxtime.last_tsc); 935 rdtscll_sync(&vxtime.last_tsc);
940 setup_irq(0, &irq0); 936 setup_irq(0, &irq0);
941 937
942 set_cyc2ns_scale(cpu_khz / 1000); 938 set_cyc2ns_scale(cpu_khz);
943 939
944#ifndef CONFIG_SMP 940#ifndef CONFIG_SMP
945 time_init_gtod(); 941 time_init_gtod();
@@ -959,9 +955,6 @@ static __init int unsynchronized_tsc(void)
959 are handled in the OEM check above. */ 955 are handled in the OEM check above. */
960 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 956 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
961 return 0; 957 return 0;
962 /* All in a single socket - should be synchronized */
963 if (cpus_weight(cpu_core_map[0]) == num_online_cpus())
964 return 0;
965#endif 958#endif
966 /* Assume multi socket systems are not synchronized */ 959 /* Assume multi socket systems are not synchronized */
967 return num_online_cpus() > 1; 960 return num_online_cpus() > 1;
@@ -1096,6 +1089,7 @@ static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
1096static unsigned long PIE_count; 1089static unsigned long PIE_count;
1097 1090
1098static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ 1091static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
1092static unsigned int hpet_t1_cmp; /* cached comparator register */
1099 1093
1100int is_hpet_enabled(void) 1094int is_hpet_enabled(void)
1101{ 1095{
@@ -1132,10 +1126,12 @@ int hpet_rtc_timer_init(void)
1132 cnt = hpet_readl(HPET_COUNTER); 1126 cnt = hpet_readl(HPET_COUNTER);
1133 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); 1127 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
1134 hpet_writel(cnt, HPET_T1_CMP); 1128 hpet_writel(cnt, HPET_T1_CMP);
1129 hpet_t1_cmp = cnt;
1135 local_irq_restore(flags); 1130 local_irq_restore(flags);
1136 1131
1137 cfg = hpet_readl(HPET_T1_CFG); 1132 cfg = hpet_readl(HPET_T1_CFG);
1138 cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; 1133 cfg &= ~HPET_TN_PERIODIC;
1134 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
1139 hpet_writel(cfg, HPET_T1_CFG); 1135 hpet_writel(cfg, HPET_T1_CFG);
1140 1136
1141 return 1; 1137 return 1;
@@ -1145,8 +1141,12 @@ static void hpet_rtc_timer_reinit(void)
1145{ 1141{
1146 unsigned int cfg, cnt; 1142 unsigned int cfg, cnt;
1147 1143
1148 if (!(PIE_on | AIE_on | UIE_on)) 1144 if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
1145 cfg = hpet_readl(HPET_T1_CFG);
1146 cfg &= ~HPET_TN_ENABLE;
1147 hpet_writel(cfg, HPET_T1_CFG);
1149 return; 1148 return;
1149 }
1150 1150
1151 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) 1151 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
1152 hpet_rtc_int_freq = PIE_freq; 1152 hpet_rtc_int_freq = PIE_freq;
@@ -1154,15 +1154,10 @@ static void hpet_rtc_timer_reinit(void)
1154 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; 1154 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
1155 1155
1156 /* It is more accurate to use the comparator value than current count.*/ 1156 /* It is more accurate to use the comparator value than current count.*/
1157 cnt = hpet_readl(HPET_T1_CMP); 1157 cnt = hpet_t1_cmp;
1158 cnt += hpet_tick*HZ/hpet_rtc_int_freq; 1158 cnt += hpet_tick*HZ/hpet_rtc_int_freq;
1159 hpet_writel(cnt, HPET_T1_CMP); 1159 hpet_writel(cnt, HPET_T1_CMP);
1160 1160 hpet_t1_cmp = cnt;
1161 cfg = hpet_readl(HPET_T1_CFG);
1162 cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
1163 hpet_writel(cfg, HPET_T1_CFG);
1164
1165 return;
1166} 1161}
1167 1162
1168/* 1163/*
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index b5e09e6b5536..bf337f493189 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -428,19 +428,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
428{ 428{
429 conditional_sti(regs); 429 conditional_sti(regs);
430 430
431#ifdef CONFIG_CHECKING
432 {
433 unsigned long gs;
434 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
435 rdmsrl(MSR_GS_BASE, gs);
436 if (gs != (unsigned long)pda) {
437 wrmsrl(MSR_GS_BASE, pda);
438 printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda,
439 regs->rip);
440 }
441 }
442#endif
443
444 if (user_mode(regs)) { 431 if (user_mode(regs)) {
445 struct task_struct *tsk = current; 432 struct task_struct *tsk = current;
446 433
@@ -513,20 +500,6 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
513{ 500{
514 conditional_sti(regs); 501 conditional_sti(regs);
515 502
516#ifdef CONFIG_CHECKING
517 {
518 unsigned long gs;
519 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
520 rdmsrl(MSR_GS_BASE, gs);
521 if (gs != (unsigned long)pda) {
522 wrmsrl(MSR_GS_BASE, pda);
523 oops_in_progress++;
524 printk("general protection handler: wrong gs %lx expected %p\n", gs, pda);
525 oops_in_progress--;
526 }
527 }
528#endif
529
530 if (user_mode(regs)) { 503 if (user_mode(regs)) {
531 struct task_struct *tsk = current; 504 struct task_struct *tsk = current;
532 505
@@ -665,19 +638,6 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
665 struct task_struct *tsk = current; 638 struct task_struct *tsk = current;
666 siginfo_t info; 639 siginfo_t info;
667 640
668#ifdef CONFIG_CHECKING
669 {
670 /* RED-PEN interaction with debugger - could destroy gs */
671 unsigned long gs;
672 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
673 rdmsrl(MSR_GS_BASE, gs);
674 if (gs != (unsigned long)pda) {
675 wrmsrl(MSR_GS_BASE, pda);
676 printk("debug handler: wrong gs %lx expected %p\n", gs, pda);
677 }
678 }
679#endif
680
681 get_debugreg(condition, 6); 641 get_debugreg(condition, 6);
682 642
683 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 643 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
@@ -888,6 +848,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
888{ 848{
889} 849}
890 850
851asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
852{
853}
854
891/* 855/*
892 * 'math_state_restore()' saves the current math information in the 856 * 'math_state_restore()' saves the current math information in the
893 * old math state array, and gets the new ones from the current task 857 * old math state array, and gets the new ones from the current task
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 6dd642cad2ef..58b19215b4b3 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -50,7 +50,7 @@ SECTIONS
50 *(.bss.page_aligned) 50 *(.bss.page_aligned)
51 *(.bss) 51 *(.bss)
52 } 52 }
53 __bss_end = .; 53 __bss_stop = .;
54 54
55 . = ALIGN(PAGE_SIZE); 55 . = ALIGN(PAGE_SIZE);
56 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 56 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index fd99ddd009bc..4a54221e10bc 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -203,3 +203,6 @@ EXPORT_SYMBOL(flush_tlb_page);
203#endif 203#endif
204 204
205EXPORT_SYMBOL(cpu_khz); 205EXPORT_SYMBOL(cpu_khz);
206
207EXPORT_SYMBOL(load_gs_index);
208
diff --git a/arch/x86_64/lib/bitops.c b/arch/x86_64/lib/bitops.c
index a29fb75b33ac..95b6d9639fba 100644
--- a/arch/x86_64/lib/bitops.c
+++ b/arch/x86_64/lib/bitops.c
@@ -5,19 +5,23 @@
5#undef find_first_bit 5#undef find_first_bit
6#undef find_next_bit 6#undef find_next_bit
7 7
8/** 8static inline long
9 * find_first_zero_bit - find the first zero bit in a memory region 9__find_first_zero_bit(const unsigned long * addr, unsigned long size)
10 * @addr: The address to start the search at
11 * @size: The maximum size to search
12 *
13 * Returns the bit-number of the first zero bit, not the number of the byte
14 * containing a bit.
15 */
16inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
17{ 10{
18 long d0, d1, d2; 11 long d0, d1, d2;
19 long res; 12 long res;
20 13
14 /*
15 * We must test the size in words, not in bits, because
16 * otherwise incoming sizes in the range -63..-1 will not run
17 * any scasq instructions, and then the flags used by the je
18 * instruction will have whatever random value was in place
19 * before. Nobody should call us like that, but
20 * find_next_zero_bit() does when offset and size are at the
21 * same word and it fails to find a zero itself.
22 */
23 size += 63;
24 size >>= 6;
21 if (!size) 25 if (!size)
22 return 0; 26 return 0;
23 asm volatile( 27 asm volatile(
@@ -30,12 +34,30 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
30 " shlq $3,%%rdi\n" 34 " shlq $3,%%rdi\n"
31 " addq %%rdi,%%rdx" 35 " addq %%rdi,%%rdx"
32 :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) 36 :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
33 :"0" (0ULL), "1" ((size + 63) >> 6), "2" (addr), "3" (-1ULL), 37 :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
34 [addr] "r" (addr) : "memory"); 38 [addr] "S" (addr) : "memory");
39 /*
40 * Any register would do for [addr] above, but GCC tends to
41 * prefer rbx over rsi, even though rsi is readily available
42 * and doesn't have to be saved.
43 */
35 return res; 44 return res;
36} 45}
37 46
38/** 47/**
48 * find_first_zero_bit - find the first zero bit in a memory region
49 * @addr: The address to start the search at
50 * @size: The maximum size to search
51 *
52 * Returns the bit-number of the first zero bit, not the number of the byte
53 * containing a bit.
54 */
55long find_first_zero_bit(const unsigned long * addr, unsigned long size)
56{
57 return __find_first_zero_bit (addr, size);
58}
59
60/**
39 * find_next_zero_bit - find the first zero bit in a memory region 61 * find_next_zero_bit - find the first zero bit in a memory region
40 * @addr: The address to base the search on 62 * @addr: The address to base the search on
41 * @offset: The bitnumber to start searching at 63 * @offset: The bitnumber to start searching at
@@ -43,7 +65,7 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
43 */ 65 */
44long find_next_zero_bit (const unsigned long * addr, long size, long offset) 66long find_next_zero_bit (const unsigned long * addr, long size, long offset)
45{ 67{
46 unsigned long * p = ((unsigned long *) addr) + (offset >> 6); 68 const unsigned long * p = addr + (offset >> 6);
47 unsigned long set = 0; 69 unsigned long set = 0;
48 unsigned long res, bit = offset&63; 70 unsigned long res, bit = offset&63;
49 71
@@ -63,8 +85,8 @@ long find_next_zero_bit (const unsigned long * addr, long size, long offset)
63 /* 85 /*
64 * No zero yet, search remaining full words for a zero 86 * No zero yet, search remaining full words for a zero
65 */ 87 */
66 res = find_first_zero_bit ((const unsigned long *)p, 88 res = __find_first_zero_bit (p, size - 64 * (p - addr));
67 size - 64 * (p - (unsigned long *) addr)); 89
68 return (offset + set + res); 90 return (offset + set + res);
69} 91}
70 92
@@ -74,6 +96,19 @@ __find_first_bit(const unsigned long * addr, unsigned long size)
74 long d0, d1; 96 long d0, d1;
75 long res; 97 long res;
76 98
99 /*
100 * We must test the size in words, not in bits, because
101 * otherwise incoming sizes in the range -63..-1 will not run
102 * any scasq instructions, and then the flags used by the jz
103 * instruction will have whatever random value was in place
104 * before. Nobody should call us like that, but
105 * find_next_bit() does when offset and size are at the same
106 * word and it fails to find a one itself.
107 */
108 size += 63;
109 size >>= 6;
110 if (!size)
111 return 0;
77 asm volatile( 112 asm volatile(
78 " repe; scasq\n" 113 " repe; scasq\n"
79 " jz 1f\n" 114 " jz 1f\n"
@@ -83,8 +118,7 @@ __find_first_bit(const unsigned long * addr, unsigned long size)
83 " shlq $3,%%rdi\n" 118 " shlq $3,%%rdi\n"
84 " addq %%rdi,%%rax" 119 " addq %%rdi,%%rax"
85 :"=a" (res), "=&c" (d0), "=&D" (d1) 120 :"=a" (res), "=&c" (d0), "=&D" (d1)
86 :"0" (0ULL), 121 :"0" (0ULL), "1" (size), "2" (addr),
87 "1" ((size + 63) >> 6), "2" (addr),
88 [addr] "r" (addr) : "memory"); 122 [addr] "r" (addr) : "memory");
89 return res; 123 return res;
90} 124}
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index 30a9da458c15..43d9fa136180 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -5,46 +5,8 @@
5 .globl clear_page 5 .globl clear_page
6 .p2align 4 6 .p2align 4
7clear_page: 7clear_page:
8 xorl %eax,%eax
9 movl $4096/64,%ecx
10 .p2align 4
11.Lloop:
12 decl %ecx
13#define PUT(x) movq %rax,x*8(%rdi)
14 movq %rax,(%rdi)
15 PUT(1)
16 PUT(2)
17 PUT(3)
18 PUT(4)
19 PUT(5)
20 PUT(6)
21 PUT(7)
22 leaq 64(%rdi),%rdi
23 jnz .Lloop
24 nop
25 ret
26clear_page_end:
27
28 /* C stepping K8 run faster using the string instructions.
29 It is also a lot simpler. Use this when possible */
30
31#include <asm/cpufeature.h>
32
33 .section .altinstructions,"a"
34 .align 8
35 .quad clear_page
36 .quad clear_page_c
37 .byte X86_FEATURE_K8_C
38 .byte clear_page_end-clear_page
39 .byte clear_page_c_end-clear_page_c
40 .previous
41
42 .section .altinstr_replacement,"ax"
43clear_page_c:
44 movl $4096/8,%ecx 8 movl $4096/8,%ecx
45 xorl %eax,%eax 9 xorl %eax,%eax
46 rep 10 rep
47 stosq 11 stosq
48 ret 12 ret
49clear_page_c_end:
50 .previous
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index dd3aa47b6bf5..621a19769406 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -8,94 +8,7 @@
8 .globl copy_page 8 .globl copy_page
9 .p2align 4 9 .p2align 4
10copy_page: 10copy_page:
11 subq $3*8,%rsp
12 movq %rbx,(%rsp)
13 movq %r12,1*8(%rsp)
14 movq %r13,2*8(%rsp)
15
16 movl $(4096/64)-5,%ecx
17 .p2align 4
18.Loop64:
19 dec %rcx
20
21 movq (%rsi), %rax
22 movq 8 (%rsi), %rbx
23 movq 16 (%rsi), %rdx
24 movq 24 (%rsi), %r8
25 movq 32 (%rsi), %r9
26 movq 40 (%rsi), %r10
27 movq 48 (%rsi), %r11
28 movq 56 (%rsi), %r12
29
30 prefetcht0 5*64(%rsi)
31
32 movq %rax, (%rdi)
33 movq %rbx, 8 (%rdi)
34 movq %rdx, 16 (%rdi)
35 movq %r8, 24 (%rdi)
36 movq %r9, 32 (%rdi)
37 movq %r10, 40 (%rdi)
38 movq %r11, 48 (%rdi)
39 movq %r12, 56 (%rdi)
40
41 leaq 64 (%rsi), %rsi
42 leaq 64 (%rdi), %rdi
43
44 jnz .Loop64
45
46 movl $5,%ecx
47 .p2align 4
48.Loop2:
49 decl %ecx
50
51 movq (%rsi), %rax
52 movq 8 (%rsi), %rbx
53 movq 16 (%rsi), %rdx
54 movq 24 (%rsi), %r8
55 movq 32 (%rsi), %r9
56 movq 40 (%rsi), %r10
57 movq 48 (%rsi), %r11
58 movq 56 (%rsi), %r12
59
60 movq %rax, (%rdi)
61 movq %rbx, 8 (%rdi)
62 movq %rdx, 16 (%rdi)
63 movq %r8, 24 (%rdi)
64 movq %r9, 32 (%rdi)
65 movq %r10, 40 (%rdi)
66 movq %r11, 48 (%rdi)
67 movq %r12, 56 (%rdi)
68
69 leaq 64(%rdi),%rdi
70 leaq 64(%rsi),%rsi
71
72 jnz .Loop2
73
74 movq (%rsp),%rbx
75 movq 1*8(%rsp),%r12
76 movq 2*8(%rsp),%r13
77 addq $3*8,%rsp
78 ret
79
80 /* C stepping K8 run faster using the string copy instructions.
81 It is also a lot simpler. Use this when possible */
82
83#include <asm/cpufeature.h>
84
85 .section .altinstructions,"a"
86 .align 8
87 .quad copy_page
88 .quad copy_page_c
89 .byte X86_FEATURE_K8_C
90 .byte copy_page_c_end-copy_page_c
91 .byte copy_page_c_end-copy_page_c
92 .previous
93
94 .section .altinstr_replacement,"ax"
95copy_page_c:
96 movl $4096/8,%ecx 11 movl $4096/8,%ecx
97 rep 12 rep
98 movsq 13 movsq
99 ret 14 ret
100copy_page_c_end:
101 .previous
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index c6c46494fef5..92dd80544602 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -11,6 +11,8 @@
11 * 11 *
12 * Output: 12 * Output:
13 * rax original destination 13 * rax original destination
14 *
15 * TODO: check best memcpy for PSC
14 */ 16 */
15 17
16 .globl __memcpy 18 .globl __memcpy
@@ -18,95 +20,6 @@
18 .p2align 4 20 .p2align 4
19__memcpy: 21__memcpy:
20memcpy: 22memcpy:
21 pushq %rbx
22 movq %rdi,%rax
23
24 movl %edx,%ecx
25 shrl $6,%ecx
26 jz .Lhandle_tail
27
28 .p2align 4
29.Lloop_64:
30 decl %ecx
31
32 movq (%rsi),%r11
33 movq 8(%rsi),%r8
34
35 movq %r11,(%rdi)
36 movq %r8,1*8(%rdi)
37
38 movq 2*8(%rsi),%r9
39 movq 3*8(%rsi),%r10
40
41 movq %r9,2*8(%rdi)
42 movq %r10,3*8(%rdi)
43
44 movq 4*8(%rsi),%r11
45 movq 5*8(%rsi),%r8
46
47 movq %r11,4*8(%rdi)
48 movq %r8,5*8(%rdi)
49
50 movq 6*8(%rsi),%r9
51 movq 7*8(%rsi),%r10
52
53 movq %r9,6*8(%rdi)
54 movq %r10,7*8(%rdi)
55
56 leaq 64(%rsi),%rsi
57 leaq 64(%rdi),%rdi
58 jnz .Lloop_64
59
60.Lhandle_tail:
61 movl %edx,%ecx
62 andl $63,%ecx
63 shrl $3,%ecx
64 jz .Lhandle_7
65 .p2align 4
66.Lloop_8:
67 decl %ecx
68 movq (%rsi),%r8
69 movq %r8,(%rdi)
70 leaq 8(%rdi),%rdi
71 leaq 8(%rsi),%rsi
72 jnz .Lloop_8
73
74.Lhandle_7:
75 movl %edx,%ecx
76 andl $7,%ecx
77 jz .Lende
78 .p2align 4
79.Lloop_1:
80 movb (%rsi),%r8b
81 movb %r8b,(%rdi)
82 incq %rdi
83 incq %rsi
84 decl %ecx
85 jnz .Lloop_1
86
87.Lende:
88 popq %rbx
89 ret
90.Lfinal:
91
92 /* C stepping K8 run faster using the string copy instructions.
93 It is also a lot simpler. Use this when possible */
94
95 .section .altinstructions,"a"
96 .align 8
97 .quad memcpy
98 .quad memcpy_c
99 .byte X86_FEATURE_K8_C
100 .byte .Lfinal-memcpy
101 .byte memcpy_c_end-memcpy_c
102 .previous
103
104 .section .altinstr_replacement,"ax"
105 /* rdi destination
106 * rsi source
107 * rdx count
108 */
109memcpy_c:
110 movq %rdi,%rax 23 movq %rdi,%rax
111 movl %edx,%ecx 24 movl %edx,%ecx
112 shrl $3,%ecx 25 shrl $3,%ecx
@@ -117,5 +30,3 @@ memcpy_c:
117 rep 30 rep
118 movsb 31 movsb
119 ret 32 ret
120memcpy_c_end:
121 .previous
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index 4b4c40638640..2aa48f24ed1e 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -13,98 +13,6 @@
13 .p2align 4 13 .p2align 4
14memset: 14memset:
15__memset: 15__memset:
16 movq %rdi,%r10
17 movq %rdx,%r11
18
19 /* expand byte value */
20 movzbl %sil,%ecx
21 movabs $0x0101010101010101,%rax
22 mul %rcx /* with rax, clobbers rdx */
23
24 /* align dst */
25 movl %edi,%r9d
26 andl $7,%r9d
27 jnz .Lbad_alignment
28.Lafter_bad_alignment:
29
30 movl %r11d,%ecx
31 shrl $6,%ecx
32 jz .Lhandle_tail
33
34 .p2align 4
35.Lloop_64:
36 decl %ecx
37 movq %rax,(%rdi)
38 movq %rax,8(%rdi)
39 movq %rax,16(%rdi)
40 movq %rax,24(%rdi)
41 movq %rax,32(%rdi)
42 movq %rax,40(%rdi)
43 movq %rax,48(%rdi)
44 movq %rax,56(%rdi)
45 leaq 64(%rdi),%rdi
46 jnz .Lloop_64
47
48 /* Handle tail in loops. The loops should be faster than hard
49 to predict jump tables. */
50 .p2align 4
51.Lhandle_tail:
52 movl %r11d,%ecx
53 andl $63&(~7),%ecx
54 jz .Lhandle_7
55 shrl $3,%ecx
56 .p2align 4
57.Lloop_8:
58 decl %ecx
59 movq %rax,(%rdi)
60 leaq 8(%rdi),%rdi
61 jnz .Lloop_8
62
63.Lhandle_7:
64 movl %r11d,%ecx
65 andl $7,%ecx
66 jz .Lende
67 .p2align 4
68.Lloop_1:
69 decl %ecx
70 movb %al,(%rdi)
71 leaq 1(%rdi),%rdi
72 jnz .Lloop_1
73
74.Lende:
75 movq %r10,%rax
76 ret
77
78.Lbad_alignment:
79 cmpq $7,%r11
80 jbe .Lhandle_7
81 movq %rax,(%rdi) /* unaligned store */
82 movq $8,%r8
83 subq %r9,%r8
84 addq %r8,%rdi
85 subq %r8,%r11
86 jmp .Lafter_bad_alignment
87
88 /* C stepping K8 run faster using the string instructions.
89 It is also a lot simpler. Use this when possible */
90
91#include <asm/cpufeature.h>
92
93 .section .altinstructions,"a"
94 .align 8
95 .quad memset
96 .quad memset_c
97 .byte X86_FEATURE_K8_C
98 .byte memset_c_end-memset_c
99 .byte memset_c_end-memset_c
100 .previous
101
102 .section .altinstr_replacement,"ax"
103 /* rdi destination
104 * rsi value
105 * rdx count
106 */
107memset_c:
108 movq %rdi,%r9 16 movq %rdi,%r9
109 movl %edx,%r8d 17 movl %edx,%r8d
110 andl $7,%r8d 18 andl $7,%r8d
@@ -121,5 +29,3 @@ memset_c:
121 stosb 29 stosb
122 movq %r9,%rax 30 movq %r9,%rax
123 ret 31 ret
124memset_c_end:
125 .previous
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index b75b872ec154..3a63707a698b 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -308,18 +308,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
308 unsigned long flags; 308 unsigned long flags;
309 siginfo_t info; 309 siginfo_t info;
310 310
311#ifdef CONFIG_CHECKING
312 {
313 unsigned long gs;
314 struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
315 rdmsrl(MSR_GS_BASE, gs);
316 if (gs != (unsigned long)pda) {
317 wrmsrl(MSR_GS_BASE, pda);
318 printk("page_fault: wrong gs %lx expected %p\n", gs, pda);
319 }
320 }
321#endif
322
323 /* get the address */ 311 /* get the address */
324 __asm__("movq %%cr2,%0":"=r" (address)); 312 __asm__("movq %%cr2,%0":"=r" (address));
325 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 313 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
@@ -571,3 +559,10 @@ do_sigbus:
571 force_sig_info(SIGBUS, &info, tsk); 559 force_sig_info(SIGBUS, &info, tsk);
572 return; 560 return;
573} 561}
562
563static int __init enable_pagefaulttrace(char *str)
564{
565 page_fault_trace = 1;
566 return 0;
567}
568__setup("pagefaulttrace", enable_pagefaulttrace);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index e60a1a848de8..286f6a624c3a 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -22,6 +22,7 @@
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/proc_fs.h> 24#include <linux/proc_fs.h>
25#include <linux/pci.h>
25 26
26#include <asm/processor.h> 27#include <asm/processor.h>
27#include <asm/system.h> 28#include <asm/system.h>
@@ -36,16 +37,13 @@
36#include <asm/mmu_context.h> 37#include <asm/mmu_context.h>
37#include <asm/proto.h> 38#include <asm/proto.h>
38#include <asm/smp.h> 39#include <asm/smp.h>
40#include <asm/sections.h>
39 41
40#ifndef Dprintk 42#ifndef Dprintk
41#define Dprintk(x...) 43#define Dprintk(x...)
42#endif 44#endif
43 45
44#ifdef CONFIG_GART_IOMMU 46static unsigned long dma_reserve __initdata;
45extern int swiotlb;
46#endif
47
48extern char _stext[];
49 47
50DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 48DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
51 49
@@ -86,9 +84,6 @@ void show_mem(void)
86 84
87/* References to section boundaries */ 85/* References to section boundaries */
88 86
89extern char _text, _etext, _edata, __bss_start, _end[];
90extern char __init_begin, __init_end;
91
92int after_bootmem; 87int after_bootmem;
93 88
94static void *spp_getpage(void) 89static void *spp_getpage(void)
@@ -308,42 +303,81 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
308 table_end<<PAGE_SHIFT); 303 table_end<<PAGE_SHIFT);
309} 304}
310 305
311extern struct x8664_pda cpu_pda[NR_CPUS]; 306void __cpuinit zap_low_mappings(int cpu)
307{
308 if (cpu == 0) {
309 pgd_t *pgd = pgd_offset_k(0UL);
310 pgd_clear(pgd);
311 } else {
312 /*
313 * For AP's, zap the low identity mappings by changing the cr3
314 * to init_level4_pgt and doing local flush tlb all
315 */
316 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
317 }
318 __flush_tlb_all();
319}
312 320
313/* Assumes all CPUs still execute in init_mm */ 321/* Compute zone sizes for the DMA and DMA32 zones in a node. */
314void zap_low_mappings(void) 322__init void
323size_zones(unsigned long *z, unsigned long *h,
324 unsigned long start_pfn, unsigned long end_pfn)
315{ 325{
316 pgd_t *pgd = pgd_offset_k(0UL); 326 int i;
317 pgd_clear(pgd); 327 unsigned long w;
318 flush_tlb_all(); 328
329 for (i = 0; i < MAX_NR_ZONES; i++)
330 z[i] = 0;
331
332 if (start_pfn < MAX_DMA_PFN)
333 z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
334 if (start_pfn < MAX_DMA32_PFN) {
335 unsigned long dma32_pfn = MAX_DMA32_PFN;
336 if (dma32_pfn > end_pfn)
337 dma32_pfn = end_pfn;
338 z[ZONE_DMA32] = dma32_pfn - start_pfn;
339 }
340 z[ZONE_NORMAL] = end_pfn - start_pfn;
341
342 /* Remove lower zones from higher ones. */
343 w = 0;
344 for (i = 0; i < MAX_NR_ZONES; i++) {
345 if (z[i])
346 z[i] -= w;
347 w += z[i];
348 }
349
350 /* Compute holes */
351 w = 0;
352 for (i = 0; i < MAX_NR_ZONES; i++) {
353 unsigned long s = w;
354 w += z[i];
355 h[i] = e820_hole_size(s, w);
356 }
357
358 /* Add the space pace needed for mem_map to the holes too. */
359 for (i = 0; i < MAX_NR_ZONES; i++)
360 h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
361
362 /* The 16MB DMA zone has the kernel and other misc mappings.
363 Account them too */
364 if (h[ZONE_DMA]) {
365 h[ZONE_DMA] += dma_reserve;
366 if (h[ZONE_DMA] >= z[ZONE_DMA]) {
367 printk(KERN_WARNING
368 "Kernel too large and filling up ZONE_DMA?\n");
369 h[ZONE_DMA] = z[ZONE_DMA];
370 }
371 }
319} 372}
320 373
321#ifndef CONFIG_NUMA 374#ifndef CONFIG_NUMA
322void __init paging_init(void) 375void __init paging_init(void)
323{ 376{
324 { 377 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
325 unsigned long zones_size[MAX_NR_ZONES]; 378 size_zones(zones, holes, 0, end_pfn);
326 unsigned long holes[MAX_NR_ZONES]; 379 free_area_init_node(0, NODE_DATA(0), zones,
327 unsigned int max_dma; 380 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
328
329 memset(zones_size, 0, sizeof(zones_size));
330 memset(holes, 0, sizeof(holes));
331
332 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
333
334 if (end_pfn < max_dma) {
335 zones_size[ZONE_DMA] = end_pfn;
336 holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
337 } else {
338 zones_size[ZONE_DMA] = max_dma;
339 holes[ZONE_DMA] = e820_hole_size(0, max_dma);
340 zones_size[ZONE_NORMAL] = end_pfn - max_dma;
341 holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
342 }
343 free_area_init_node(0, NODE_DATA(0), zones_size,
344 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
345 }
346 return;
347} 381}
348#endif 382#endif
349 383
@@ -438,19 +472,16 @@ void __init mem_init(void)
438 datasize >> 10, 472 datasize >> 10,
439 initsize >> 10); 473 initsize >> 10);
440 474
475#ifdef CONFIG_SMP
441 /* 476 /*
442 * Subtle. SMP is doing its boot stuff late (because it has to 477 * Sync boot_level4_pgt mappings with the init_level4_pgt
443 * fork idle threads) - but it also needs low mappings for the 478 * except for the low identity mappings which are already zapped
444 * protected-mode entry to work. We zap these entries only after 479 * in init_level4_pgt. This sync-up is essential for AP's bringup
445 * the WP-bit has been tested.
446 */ 480 */
447#ifndef CONFIG_SMP 481 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
448 zap_low_mappings();
449#endif 482#endif
450} 483}
451 484
452extern char __initdata_begin[], __initdata_end[];
453
454void free_initmem(void) 485void free_initmem(void)
455{ 486{
456 unsigned long addr; 487 unsigned long addr;
@@ -464,7 +495,7 @@ void free_initmem(void)
464 totalram_pages++; 495 totalram_pages++;
465 } 496 }
466 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); 497 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
467 printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10); 498 printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
468} 499}
469 500
470#ifdef CONFIG_BLK_DEV_INITRD 501#ifdef CONFIG_BLK_DEV_INITRD
@@ -491,6 +522,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
491#else 522#else
492 reserve_bootmem(phys, len); 523 reserve_bootmem(phys, len);
493#endif 524#endif
525 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
526 dma_reserve += len / PAGE_SIZE;
494} 527}
495 528
496int kern_addr_valid(unsigned long addr) 529int kern_addr_valid(unsigned long addr)
@@ -532,10 +565,6 @@ extern int exception_trace, page_fault_trace;
532static ctl_table debug_table2[] = { 565static ctl_table debug_table2[] = {
533 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, 566 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
534 proc_dointvec }, 567 proc_dointvec },
535#ifdef CONFIG_CHECKING
536 { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
537 proc_dointvec },
538#endif
539 { 0, } 568 { 0, }
540}; 569};
541 570
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index 6972df480d2b..ecf7acb5db9b 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -60,7 +60,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
60 if (address >= end) 60 if (address >= end)
61 BUG(); 61 BUG();
62 do { 62 do {
63 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 63 pte_t * pte = pte_alloc_kernel(pmd, address);
64 if (!pte) 64 if (!pte)
65 return -ENOMEM; 65 return -ENOMEM;
66 remap_area_pte(pte, address, end - address, address + phys_addr, flags); 66 remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -105,7 +105,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
105 flush_cache_all(); 105 flush_cache_all();
106 if (address >= end) 106 if (address >= end)
107 BUG(); 107 BUG();
108 spin_lock(&init_mm.page_table_lock);
109 do { 108 do {
110 pud_t *pud; 109 pud_t *pud;
111 pud = pud_alloc(&init_mm, pgd, address); 110 pud = pud_alloc(&init_mm, pgd, address);
@@ -119,7 +118,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
119 address = (address + PGDIR_SIZE) & PGDIR_MASK; 118 address = (address + PGDIR_SIZE) & PGDIR_MASK;
120 pgd++; 119 pgd++;
121 } while (address && (address < end)); 120 } while (address && (address < end));
122 spin_unlock(&init_mm.page_table_lock);
123 flush_tlb_all(); 121 flush_tlb_all();
124 return error; 122 return error;
125} 123}
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index 65417b040c1b..a5663e0bb01c 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -108,6 +108,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
108 limit >>= 16; 108 limit >>= 16;
109 limit <<= 24; 109 limit <<= 24;
110 limit |= (1<<24)-1; 110 limit |= (1<<24)-1;
111 limit++;
111 112
112 if (limit > end_pfn << PAGE_SHIFT) 113 if (limit > end_pfn << PAGE_SHIFT)
113 limit = end_pfn << PAGE_SHIFT; 114 limit = end_pfn << PAGE_SHIFT;
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 80a49d9bd8a7..a828a01739cc 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -38,38 +38,57 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
38 38
39int numa_off __initdata; 39int numa_off __initdata;
40 40
41int __init compute_hash_shift(struct node *nodes, int numnodes) 41
42/*
43 * Given a shift value, try to populate memnodemap[]
44 * Returns :
45 * 1 if OK
46 * 0 if memnodmap[] too small (of shift too small)
47 * -1 if node overlap or lost ram (shift too big)
48 */
49static int __init populate_memnodemap(
50 const struct node *nodes, int numnodes, int shift)
42{ 51{
43 int i; 52 int i;
44 int shift = 20; 53 int res = -1;
45 unsigned long addr,maxend=0; 54 unsigned long addr, end;
46
47 for (i = 0; i < numnodes; i++)
48 if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend))
49 maxend = nodes[i].end;
50 55
51 while ((1UL << shift) < (maxend / NODEMAPSIZE)) 56 memset(memnodemap, 0xff, sizeof(memnodemap));
52 shift++;
53
54 printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n",
55 shift,maxend);
56 memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
57 for (i = 0; i < numnodes; i++) { 57 for (i = 0; i < numnodes; i++) {
58 if (nodes[i].start == nodes[i].end) 58 addr = nodes[i].start;
59 end = nodes[i].end;
60 if (addr >= end)
59 continue; 61 continue;
60 for (addr = nodes[i].start; 62 if ((end >> shift) >= NODEMAPSIZE)
61 addr < nodes[i].end; 63 return 0;
62 addr += (1UL << shift)) { 64 do {
63 if (memnodemap[addr >> shift] != 0xff) { 65 if (memnodemap[addr >> shift] != 0xff)
64 printk(KERN_INFO
65 "Your memory is not aligned you need to rebuild your kernel "
66 "with a bigger NODEMAPSIZE shift=%d adder=%lu\n",
67 shift,addr);
68 return -1; 66 return -1;
69 }
70 memnodemap[addr >> shift] = i; 67 memnodemap[addr >> shift] = i;
71 } 68 addr += (1 << shift);
69 } while (addr < end);
70 res = 1;
72 } 71 }
72 return res;
73}
74
75int __init compute_hash_shift(struct node *nodes, int numnodes)
76{
77 int shift = 20;
78
79 while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
80 shift++;
81
82 printk(KERN_DEBUG "Using %d for the hash shift.\n",
83 shift);
84
85 if (populate_memnodemap(nodes, numnodes, shift) != 1) {
86 printk(KERN_INFO
87 "Your memory is not aligned you need to rebuild your kernel "
88 "with a bigger NODEMAPSIZE shift=%d\n",
89 shift);
90 return -1;
91 }
73 return shift; 92 return shift;
74} 93}
75 94
@@ -94,7 +113,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
94 start_pfn = start >> PAGE_SHIFT; 113 start_pfn = start >> PAGE_SHIFT;
95 end_pfn = end >> PAGE_SHIFT; 114 end_pfn = end >> PAGE_SHIFT;
96 115
97 memory_present(nodeid, start_pfn, end_pfn);
98 nodedata_phys = find_e820_area(start, end, pgdat_size); 116 nodedata_phys = find_e820_area(start, end, pgdat_size);
99 if (nodedata_phys == -1L) 117 if (nodedata_phys == -1L)
100 panic("Cannot find memory pgdat in node %d\n", nodeid); 118 panic("Cannot find memory pgdat in node %d\n", nodeid);
@@ -132,29 +150,14 @@ void __init setup_node_zones(int nodeid)
132 unsigned long start_pfn, end_pfn; 150 unsigned long start_pfn, end_pfn;
133 unsigned long zones[MAX_NR_ZONES]; 151 unsigned long zones[MAX_NR_ZONES];
134 unsigned long holes[MAX_NR_ZONES]; 152 unsigned long holes[MAX_NR_ZONES];
135 unsigned long dma_end_pfn;
136 153
137 memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 154 start_pfn = node_start_pfn(nodeid);
138 memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); 155 end_pfn = node_end_pfn(nodeid);
139 156
140 start_pfn = node_start_pfn(nodeid); 157 Dprintk(KERN_INFO "setting up node %d %lx-%lx\n",
141 end_pfn = node_end_pfn(nodeid); 158 nodeid, start_pfn, end_pfn);
142 159
143 Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); 160 size_zones(zones, holes, start_pfn, end_pfn);
144
145 /* All nodes > 0 have a zero length zone DMA */
146 dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT;
147 if (start_pfn < dma_end_pfn) {
148 zones[ZONE_DMA] = dma_end_pfn - start_pfn;
149 holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn);
150 zones[ZONE_NORMAL] = end_pfn - dma_end_pfn;
151 holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn);
152
153 } else {
154 zones[ZONE_NORMAL] = end_pfn - start_pfn;
155 holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn);
156 }
157
158 free_area_init_node(nodeid, NODE_DATA(nodeid), zones, 161 free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
159 start_pfn, holes); 162 start_pfn, holes);
160} 163}
@@ -167,18 +170,16 @@ void __init numa_init_array(void)
167 mapping. To avoid this fill in the mapping for all possible 170 mapping. To avoid this fill in the mapping for all possible
168 CPUs, as the number of CPUs is not known yet. 171 CPUs, as the number of CPUs is not known yet.
169 We round robin the existing nodes. */ 172 We round robin the existing nodes. */
170 rr = 0; 173 rr = first_node(node_online_map);
171 for (i = 0; i < NR_CPUS; i++) { 174 for (i = 0; i < NR_CPUS; i++) {
172 if (cpu_to_node[i] != NUMA_NO_NODE) 175 if (cpu_to_node[i] != NUMA_NO_NODE)
173 continue; 176 continue;
177 numa_set_node(i, rr);
174 rr = next_node(rr, node_online_map); 178 rr = next_node(rr, node_online_map);
175 if (rr == MAX_NUMNODES) 179 if (rr == MAX_NUMNODES)
176 rr = first_node(node_online_map); 180 rr = first_node(node_online_map);
177 cpu_to_node[i] = rr;
178 rr++;
179 } 181 }
180 182
181 set_bit(0, &node_to_cpumask[cpu_to_node(0)]);
182} 183}
183 184
184#ifdef CONFIG_NUMA_EMU 185#ifdef CONFIG_NUMA_EMU
@@ -207,8 +208,6 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
207 if (i == numa_fake-1) 208 if (i == numa_fake-1)
208 sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; 209 sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
209 nodes[i].end = nodes[i].start + sz; 210 nodes[i].end = nodes[i].start + sz;
210 if (i != numa_fake-1)
211 nodes[i].end--;
212 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", 211 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
213 i, 212 i,
214 nodes[i].start, nodes[i].end, 213 nodes[i].start, nodes[i].end,
@@ -259,18 +258,22 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
259 nodes_clear(node_online_map); 258 nodes_clear(node_online_map);
260 node_set_online(0); 259 node_set_online(0);
261 for (i = 0; i < NR_CPUS; i++) 260 for (i = 0; i < NR_CPUS; i++)
262 cpu_to_node[i] = 0; 261 numa_set_node(i, 0);
263 node_to_cpumask[0] = cpumask_of_cpu(0); 262 node_to_cpumask[0] = cpumask_of_cpu(0);
264 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); 263 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
265} 264}
266 265
267__cpuinit void numa_add_cpu(int cpu) 266__cpuinit void numa_add_cpu(int cpu)
268{ 267{
269 /* BP is initialized elsewhere */ 268 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
270 if (cpu)
271 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
272} 269}
273 270
271void __cpuinit numa_set_node(int cpu, int node)
272{
273 cpu_pda[cpu].nodenumber = node;
274 cpu_to_node[cpu] = node;
275}
276
274unsigned long __init numa_free_all_bootmem(void) 277unsigned long __init numa_free_all_bootmem(void)
275{ 278{
276 int i; 279 int i;
@@ -281,9 +284,26 @@ unsigned long __init numa_free_all_bootmem(void)
281 return pages; 284 return pages;
282} 285}
283 286
287#ifdef CONFIG_SPARSEMEM
288static void __init arch_sparse_init(void)
289{
290 int i;
291
292 for_each_online_node(i)
293 memory_present(i, node_start_pfn(i), node_end_pfn(i));
294
295 sparse_init();
296}
297#else
298#define arch_sparse_init() do {} while (0)
299#endif
300
284void __init paging_init(void) 301void __init paging_init(void)
285{ 302{
286 int i; 303 int i;
304
305 arch_sparse_init();
306
287 for_each_online_node(i) { 307 for_each_online_node(i) {
288 setup_node_zones(i); 308 setup_node_zones(i);
289 } 309 }
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 94862e1ec032..b90e8fe9eeb0 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -220,8 +220,6 @@ void global_flush_tlb(void)
220 down_read(&init_mm.mmap_sem); 220 down_read(&init_mm.mmap_sem);
221 df = xchg(&df_list, NULL); 221 df = xchg(&df_list, NULL);
222 up_read(&init_mm.mmap_sem); 222 up_read(&init_mm.mmap_sem);
223 if (!df)
224 return;
225 flush_map((df && !df->next) ? df->address : 0); 223 flush_map((df && !df->next) ? df->address : 0);
226 for (; df; df = next_df) { 224 for (; df; df = next_df) {
227 next_df = df->next; 225 next_df = df->next;
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 4b2e844c15a7..33340bd1e328 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -71,8 +71,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
71 nd->start = nd->end; 71 nd->start = nd->end;
72 } 72 }
73 if (nd->end > end) { 73 if (nd->end > end) {
74 if (!(end & 0xfff))
75 end--;
76 nd->end = end; 74 nd->end = end;
77 if (nd->start > nd->end) 75 if (nd->start > nd->end)
78 nd->start = nd->end; 76 nd->start = nd->end;
@@ -166,8 +164,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
166 if (nd->end < end) 164 if (nd->end < end)
167 nd->end = end; 165 nd->end = end;
168 } 166 }
169 if (!(nd->end & 0xfff))
170 nd->end--;
171 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, 167 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
172 nd->start, nd->end); 168 nd->start, nd->end);
173} 169}
@@ -203,7 +199,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
203 if (cpu_to_node[i] == NUMA_NO_NODE) 199 if (cpu_to_node[i] == NUMA_NO_NODE)
204 continue; 200 continue;
205 if (!node_isset(cpu_to_node[i], nodes_parsed)) 201 if (!node_isset(cpu_to_node[i], nodes_parsed))
206 cpu_to_node[i] = NUMA_NO_NODE; 202 numa_set_node(i, NUMA_NO_NODE);
207 } 203 }
208 numa_init_array(); 204 numa_init_array();
209 return 0; 205 return 0;
diff --git a/arch/x86_64/oprofile/Kconfig b/arch/x86_64/oprofile/Kconfig
index 5ade19801b97..d8a84088471a 100644
--- a/arch/x86_64/oprofile/Kconfig
+++ b/arch/x86_64/oprofile/Kconfig
@@ -1,7 +1,3 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING 1config PROFILING
6 bool "Profiling support (EXPERIMENTAL)" 2 bool "Profiling support (EXPERIMENTAL)"
7 help 3 help
@@ -19,5 +15,3 @@ config OPROFILE
19 15
20 If unsure, say N. 16 If unsure, say N.
21 17
22endmenu
23