aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig58
-rw-r--r--arch/x86_64/Makefile12
-rw-r--r--arch/x86_64/boot/setup.S5
-rw-r--r--arch/x86_64/defconfig70
-rw-r--r--arch/x86_64/ia32/ia32_aout.c8
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c4
-rw-r--r--arch/x86_64/ia32/ia32_signal.c10
-rw-r--r--arch/x86_64/ia32/ptrace32.c2
-rw-r--r--arch/x86_64/ia32/syscall32.c2
-rw-r--r--arch/x86_64/kernel/apic.c104
-rw-r--r--arch/x86_64/kernel/crash.c69
-rw-r--r--arch/x86_64/kernel/e820.c18
-rw-r--r--arch/x86_64/kernel/early-quirks.c23
-rw-r--r--arch/x86_64/kernel/early_printk.c2
-rw-r--r--arch/x86_64/kernel/entry.S46
-rw-r--r--arch/x86_64/kernel/genapic.c9
-rw-r--r--arch/x86_64/kernel/genapic_flat.c2
-rw-r--r--arch/x86_64/kernel/head64.c6
-rw-r--r--arch/x86_64/kernel/i387.c7
-rw-r--r--arch/x86_64/kernel/i8259.c10
-rw-r--r--arch/x86_64/kernel/io_apic.c447
-rw-r--r--arch/x86_64/kernel/irq.c4
-rw-r--r--arch/x86_64/kernel/kprobes.c2
-rw-r--r--arch/x86_64/kernel/mce.c9
-rw-r--r--arch/x86_64/kernel/mce_amd.c4
-rw-r--r--arch/x86_64/kernel/module.c5
-rw-r--r--arch/x86_64/kernel/mpparse.c2
-rw-r--r--arch/x86_64/kernel/nmi.c38
-rw-r--r--arch/x86_64/kernel/pci-calgary.c260
-rw-r--r--arch/x86_64/kernel/pci-dma.c5
-rw-r--r--arch/x86_64/kernel/pci-gart.c3
-rw-r--r--arch/x86_64/kernel/process.c52
-rw-r--r--arch/x86_64/kernel/setup.c24
-rw-r--r--arch/x86_64/kernel/smp.c8
-rw-r--r--arch/x86_64/kernel/smpboot.c27
-rw-r--r--arch/x86_64/kernel/time.c17
-rw-r--r--arch/x86_64/kernel/traps.c95
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S22
-rw-r--r--arch/x86_64/kernel/vsyscall.c50
-rw-r--r--arch/x86_64/lib/csum-partial.c11
-rw-r--r--arch/x86_64/lib/csum-wrappers.c37
-rw-r--r--arch/x86_64/lib/delay.c4
-rw-r--r--arch/x86_64/mm/fault.c10
-rw-r--r--arch/x86_64/mm/init.c31
-rw-r--r--arch/x86_64/mm/pageattr.c58
-rw-r--r--arch/x86_64/mm/srat.c4
-rw-r--r--arch/x86_64/pci/Makefile2
-rw-r--r--arch/x86_64/pci/mmconfig.c37
48 files changed, 1064 insertions, 671 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 010d2265f1cf..d4275537b25b 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -96,6 +96,19 @@ config AUDIT_ARCH
96 bool 96 bool
97 default y 97 default y
98 98
99config GENERIC_BUG
100 bool
101 default y
102 depends on BUG
103
104config ARCH_HAS_ILOG2_U32
105 bool
106 default n
107
108config ARCH_HAS_ILOG2_U64
109 bool
110 default n
111
99source "init/Kconfig" 112source "init/Kconfig"
100 113
101 114
@@ -122,7 +135,7 @@ endchoice
122 135
123choice 136choice
124 prompt "Processor family" 137 prompt "Processor family"
125 default MK8 138 default GENERIC_CPU
126 139
127config MK8 140config MK8
128 bool "AMD-Opteron/Athlon64" 141 bool "AMD-Opteron/Athlon64"
@@ -130,16 +143,31 @@ config MK8
130 Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs. 143 Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs.
131 144
132config MPSC 145config MPSC
133 bool "Intel EM64T" 146 bool "Intel P4 / older Netburst based Xeon"
134 help 147 help
135 Optimize for Intel Pentium 4 and Xeon CPUs with Intel 148 Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs
136 Extended Memory 64 Technology(EM64T). For details see 149 with Intel Extended Memory 64 Technology(EM64T). For details see
137 <http://www.intel.com/technology/64bitextensions/>. 150 <http://www.intel.com/technology/64bitextensions/>.
151 Note the the latest Xeons (Xeon 51xx and 53xx) are not based on the
152 Netburst core and shouldn't use this option. You can distingush them
153 using the cpu family field
154 in /proc/cpuinfo. Family 15 is a older Xeon, Family 6 a newer one
155 (this rule only applies to system that support EM64T)
156
157config MCORE2
158 bool "Intel Core2 / newer Xeon"
159 help
160 Optimize for Intel Core2 and newer Xeons (51xx)
161 You can distingush the newer Xeons from the older ones using
162 the cpu family field in /proc/cpuinfo. 15 is a older Xeon
163 (use CONFIG_MPSC then), 6 is a newer one. This rule only
164 applies to CPUs that support EM64T.
138 165
139config GENERIC_CPU 166config GENERIC_CPU
140 bool "Generic-x86-64" 167 bool "Generic-x86-64"
141 help 168 help
142 Generic x86-64 CPU. 169 Generic x86-64 CPU.
170 Run equally well on all x86-64 CPUs.
143 171
144endchoice 172endchoice
145 173
@@ -149,12 +177,12 @@ endchoice
149config X86_L1_CACHE_BYTES 177config X86_L1_CACHE_BYTES
150 int 178 int
151 default "128" if GENERIC_CPU || MPSC 179 default "128" if GENERIC_CPU || MPSC
152 default "64" if MK8 180 default "64" if MK8 || MCORE2
153 181
154config X86_L1_CACHE_SHIFT 182config X86_L1_CACHE_SHIFT
155 int 183 int
156 default "7" if GENERIC_CPU || MPSC 184 default "7" if GENERIC_CPU || MPSC
157 default "6" if MK8 185 default "6" if MK8 || MCORE2
158 186
159config X86_INTERNODE_CACHE_BYTES 187config X86_INTERNODE_CACHE_BYTES
160 int 188 int
@@ -344,11 +372,6 @@ config ARCH_DISCONTIGMEM_ENABLE
344 depends on NUMA 372 depends on NUMA
345 default y 373 default y
346 374
347
348config ARCH_DISCONTIGMEM_ENABLE
349 def_bool y
350 depends on NUMA
351
352config ARCH_DISCONTIGMEM_DEFAULT 375config ARCH_DISCONTIGMEM_DEFAULT
353 def_bool y 376 def_bool y
354 depends on NUMA 377 depends on NUMA
@@ -455,6 +478,17 @@ config CALGARY_IOMMU
455 Normally the kernel will make the right choice by itself. 478 Normally the kernel will make the right choice by itself.
456 If unsure, say Y. 479 If unsure, say Y.
457 480
481config CALGARY_IOMMU_ENABLED_BY_DEFAULT
482 bool "Should Calgary be enabled by default?"
483 default y
484 depends on CALGARY_IOMMU
485 help
486 Should Calgary be enabled by default? if you choose 'y', Calgary
487 will be used (if it exists). If you choose 'n', Calgary will not be
488 used even if it exists. If you choose 'n' and would like to use
489 Calgary anyway, pass 'iommu=calgary' on the kernel command line.
490 If unsure, say Y.
491
458# need this always selected by IOMMU for the VIA workaround 492# need this always selected by IOMMU for the VIA workaround
459config SWIOTLB 493config SWIOTLB
460 bool 494 bool
@@ -550,7 +584,7 @@ config SECCOMP
550 If unsure, say Y. Only embedded should say N here. 584 If unsure, say Y. Only embedded should say N here.
551 585
552config CC_STACKPROTECTOR 586config CC_STACKPROTECTOR
553 bool "Enable -fstack-protector buffer overflow detection (EXPRIMENTAL)" 587 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
554 depends on EXPERIMENTAL 588 depends on EXPERIMENTAL
555 help 589 help
556 This option turns on the -fstack-protector GCC feature. This 590 This option turns on the -fstack-protector GCC feature. This
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 1c0f18d4f887..b471b8550d03 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -30,6 +30,10 @@ cflags-y :=
30cflags-kernel-y := 30cflags-kernel-y :=
31cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) 31cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
32cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) 32cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
33# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it
34# will eventually. Use -mtune=generic as fallback
35cflags-$(CONFIG_MCORE2) += \
36 $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
33cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) 37cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
34 38
35cflags-y += -m64 39cflags-y += -m64
@@ -54,6 +58,10 @@ endif
54cflags-y += $(call cc-option,-funit-at-a-time) 58cflags-y += $(call cc-option,-funit-at-a-time)
55# prevent gcc from generating any FP code by mistake 59# prevent gcc from generating any FP code by mistake
56cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) 60cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
61# this works around some issues with generating unwind tables in older gccs
62# newer gccs do it by default
63cflags-y += -maccumulate-outgoing-args
64
57# do binutils support CFI? 65# do binutils support CFI?
58cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) 66cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
59AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) 67AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
@@ -62,8 +70,8 @@ AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
62cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,) 70cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
63AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,) 71AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
64 72
65cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector ) 73cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector )
66cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector-all ) 74cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector-all )
67 75
68CFLAGS += $(cflags-y) 76CFLAGS += $(cflags-y)
69CFLAGS_KERNEL += $(cflags-kernel-y) 77CFLAGS_KERNEL += $(cflags-kernel-y)
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index c3bfd223ab49..770940cc0108 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -836,13 +836,12 @@ gdt:
836 .word 0x9200 # data read/write 836 .word 0x9200 # data read/write
837 .word 0x00CF # granularity = 4096, 386 837 .word 0x00CF # granularity = 4096, 386
838 # (+5th nibble of limit) 838 # (+5th nibble of limit)
839gdt_end:
839idt_48: 840idt_48:
840 .word 0 # idt limit = 0 841 .word 0 # idt limit = 0
841 .word 0, 0 # idt base = 0L 842 .word 0, 0 # idt base = 0L
842gdt_48: 843gdt_48:
843 .word 0x8000 # gdt limit=2048, 844 .word gdt_end-gdt-1 # gdt limit
844 # 256 GDT entries
845
846 .word 0, 0 # gdt base (filled in later) 845 .word 0, 0 # gdt base (filled in later)
847 846
848# Include video setup & detection code 847# Include video setup & detection code
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 47bfba6e9dc4..1a1c6a1a299b 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.19-rc1 3# Linux kernel version: 2.6.19-git14
4# Thu Oct 5 13:04:43 2006 4# Sat Dec 9 21:23:09 2006
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -22,6 +22,9 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y
22CONFIG_ARCH_POPULATES_NODE_MAP=y 22CONFIG_ARCH_POPULATES_NODE_MAP=y
23CONFIG_DMI=y 23CONFIG_DMI=y
24CONFIG_AUDIT_ARCH=y 24CONFIG_AUDIT_ARCH=y
25CONFIG_GENERIC_BUG=y
26# CONFIG_ARCH_HAS_ILOG2_U32 is not set
27# CONFIG_ARCH_HAS_ILOG2_U64 is not set
25CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 28CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
26 29
27# 30#
@@ -47,13 +50,14 @@ CONFIG_POSIX_MQUEUE=y
47CONFIG_IKCONFIG=y 50CONFIG_IKCONFIG=y
48CONFIG_IKCONFIG_PROC=y 51CONFIG_IKCONFIG_PROC=y
49# CONFIG_CPUSETS is not set 52# CONFIG_CPUSETS is not set
53CONFIG_SYSFS_DEPRECATED=y
50# CONFIG_RELAY is not set 54# CONFIG_RELAY is not set
51CONFIG_INITRAMFS_SOURCE="" 55CONFIG_INITRAMFS_SOURCE=""
52CONFIG_CC_OPTIMIZE_FOR_SIZE=y 56CONFIG_CC_OPTIMIZE_FOR_SIZE=y
53CONFIG_SYSCTL=y 57CONFIG_SYSCTL=y
54# CONFIG_EMBEDDED is not set 58# CONFIG_EMBEDDED is not set
55CONFIG_UID16=y 59CONFIG_UID16=y
56# CONFIG_SYSCTL_SYSCALL is not set 60CONFIG_SYSCTL_SYSCALL=y
57CONFIG_KALLSYMS=y 61CONFIG_KALLSYMS=y
58CONFIG_KALLSYMS_ALL=y 62CONFIG_KALLSYMS_ALL=y
59# CONFIG_KALLSYMS_EXTRA_PASS is not set 63# CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -87,9 +91,7 @@ CONFIG_STOP_MACHINE=y
87# Block layer 91# Block layer
88# 92#
89CONFIG_BLOCK=y 93CONFIG_BLOCK=y
90CONFIG_LBD=y
91# CONFIG_BLK_DEV_IO_TRACE is not set 94# CONFIG_BLK_DEV_IO_TRACE is not set
92# CONFIG_LSF is not set
93 95
94# 96#
95# IO Schedulers 97# IO Schedulers
@@ -111,10 +113,11 @@ CONFIG_X86_PC=y
111# CONFIG_X86_VSMP is not set 113# CONFIG_X86_VSMP is not set
112# CONFIG_MK8 is not set 114# CONFIG_MK8 is not set
113# CONFIG_MPSC is not set 115# CONFIG_MPSC is not set
114CONFIG_GENERIC_CPU=y 116CONFIG_MCORE2=y
115CONFIG_X86_L1_CACHE_BYTES=128 117# CONFIG_GENERIC_CPU is not set
116CONFIG_X86_L1_CACHE_SHIFT=7 118CONFIG_X86_L1_CACHE_BYTES=64
117CONFIG_X86_INTERNODE_CACHE_BYTES=128 119CONFIG_X86_L1_CACHE_SHIFT=6
120CONFIG_X86_INTERNODE_CACHE_BYTES=64
118CONFIG_X86_TSC=y 121CONFIG_X86_TSC=y
119CONFIG_X86_GOOD_APIC=y 122CONFIG_X86_GOOD_APIC=y
120# CONFIG_MICROCODE is not set 123# CONFIG_MICROCODE is not set
@@ -170,6 +173,7 @@ CONFIG_SECCOMP=y
170# CONFIG_CC_STACKPROTECTOR is not set 173# CONFIG_CC_STACKPROTECTOR is not set
171# CONFIG_HZ_100 is not set 174# CONFIG_HZ_100 is not set
172CONFIG_HZ_250=y 175CONFIG_HZ_250=y
176# CONFIG_HZ_300 is not set
173# CONFIG_HZ_1000 is not set 177# CONFIG_HZ_1000 is not set
174CONFIG_HZ=250 178CONFIG_HZ=250
175# CONFIG_REORDER is not set 179# CONFIG_REORDER is not set
@@ -322,6 +326,7 @@ CONFIG_INET_TCP_DIAG=y
322# CONFIG_TCP_CONG_ADVANCED is not set 326# CONFIG_TCP_CONG_ADVANCED is not set
323CONFIG_TCP_CONG_CUBIC=y 327CONFIG_TCP_CONG_CUBIC=y
324CONFIG_DEFAULT_TCP_CONG="cubic" 328CONFIG_DEFAULT_TCP_CONG="cubic"
329# CONFIG_TCP_MD5SIG is not set
325CONFIG_IPV6=y 330CONFIG_IPV6=y
326# CONFIG_IPV6_PRIVACY is not set 331# CONFIG_IPV6_PRIVACY is not set
327# CONFIG_IPV6_ROUTER_PREF is not set 332# CONFIG_IPV6_ROUTER_PREF is not set
@@ -335,8 +340,8 @@ CONFIG_IPV6=y
335# CONFIG_INET6_XFRM_MODE_TUNNEL is not set 340# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
336# CONFIG_INET6_XFRM_MODE_BEET is not set 341# CONFIG_INET6_XFRM_MODE_BEET is not set
337# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set 342# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
343CONFIG_IPV6_SIT=y
338# CONFIG_IPV6_TUNNEL is not set 344# CONFIG_IPV6_TUNNEL is not set
339# CONFIG_IPV6_SUBTREES is not set
340# CONFIG_IPV6_MULTIPLE_TABLES is not set 345# CONFIG_IPV6_MULTIPLE_TABLES is not set
341# CONFIG_NETWORK_SECMARK is not set 346# CONFIG_NETWORK_SECMARK is not set
342# CONFIG_NETFILTER is not set 347# CONFIG_NETFILTER is not set
@@ -438,6 +443,13 @@ CONFIG_BLK_DEV_INITRD=y
438# CONFIG_ATA_OVER_ETH is not set 443# CONFIG_ATA_OVER_ETH is not set
439 444
440# 445#
446# Misc devices
447#
448# CONFIG_IBM_ASM is not set
449# CONFIG_SGI_IOC4 is not set
450# CONFIG_TIFM_CORE is not set
451
452#
441# ATA/ATAPI/MFM/RLL support 453# ATA/ATAPI/MFM/RLL support
442# 454#
443CONFIG_IDE=y 455CONFIG_IDE=y
@@ -506,6 +518,7 @@ CONFIG_IDEDMA_AUTO=y
506# 518#
507# CONFIG_RAID_ATTRS is not set 519# CONFIG_RAID_ATTRS is not set
508CONFIG_SCSI=y 520CONFIG_SCSI=y
521# CONFIG_SCSI_TGT is not set
509CONFIG_SCSI_NETLINK=y 522CONFIG_SCSI_NETLINK=y
510# CONFIG_SCSI_PROC_FS is not set 523# CONFIG_SCSI_PROC_FS is not set
511 524
@@ -526,6 +539,7 @@ CONFIG_CHR_DEV_SG=y
526# CONFIG_SCSI_MULTI_LUN is not set 539# CONFIG_SCSI_MULTI_LUN is not set
527CONFIG_SCSI_CONSTANTS=y 540CONFIG_SCSI_CONSTANTS=y
528# CONFIG_SCSI_LOGGING is not set 541# CONFIG_SCSI_LOGGING is not set
542# CONFIG_SCSI_SCAN_ASYNC is not set
529 543
530# 544#
531# SCSI Transports 545# SCSI Transports
@@ -579,6 +593,7 @@ CONFIG_MEGARAID_SAS=y
579# CONFIG_SCSI_DC395x is not set 593# CONFIG_SCSI_DC395x is not set
580# CONFIG_SCSI_DC390T is not set 594# CONFIG_SCSI_DC390T is not set
581# CONFIG_SCSI_DEBUG is not set 595# CONFIG_SCSI_DEBUG is not set
596# CONFIG_SCSI_SRP is not set
582 597
583# 598#
584# Serial ATA (prod) and Parallel ATA (experimental) drivers 599# Serial ATA (prod) and Parallel ATA (experimental) drivers
@@ -617,6 +632,7 @@ CONFIG_SATA_INTEL_COMBINED=y
617# CONFIG_PATA_IT821X is not set 632# CONFIG_PATA_IT821X is not set
618# CONFIG_PATA_JMICRON is not set 633# CONFIG_PATA_JMICRON is not set
619# CONFIG_PATA_TRIFLEX is not set 634# CONFIG_PATA_TRIFLEX is not set
635# CONFIG_PATA_MARVELL is not set
620# CONFIG_PATA_MPIIX is not set 636# CONFIG_PATA_MPIIX is not set
621# CONFIG_PATA_OLDPIIX is not set 637# CONFIG_PATA_OLDPIIX is not set
622# CONFIG_PATA_NETCELL is not set 638# CONFIG_PATA_NETCELL is not set
@@ -788,6 +804,7 @@ CONFIG_BNX2=y
788CONFIG_S2IO=m 804CONFIG_S2IO=m
789# CONFIG_S2IO_NAPI is not set 805# CONFIG_S2IO_NAPI is not set
790# CONFIG_MYRI10GE is not set 806# CONFIG_MYRI10GE is not set
807# CONFIG_NETXEN_NIC is not set
791 808
792# 809#
793# Token Ring devices 810# Token Ring devices
@@ -920,10 +937,6 @@ CONFIG_RTC=y
920# CONFIG_DTLK is not set 937# CONFIG_DTLK is not set
921# CONFIG_R3964 is not set 938# CONFIG_R3964 is not set
922# CONFIG_APPLICOM is not set 939# CONFIG_APPLICOM is not set
923
924#
925# Ftape, the floppy tape device driver
926#
927CONFIG_AGP=y 940CONFIG_AGP=y
928CONFIG_AGP_AMD64=y 941CONFIG_AGP_AMD64=y
929CONFIG_AGP_INTEL=y 942CONFIG_AGP_INTEL=y
@@ -1008,6 +1021,7 @@ CONFIG_I2C_ISA=m
1008# 1021#
1009# Dallas's 1-wire bus 1022# Dallas's 1-wire bus
1010# 1023#
1024# CONFIG_W1 is not set
1011 1025
1012# 1026#
1013# Hardware Monitoring support 1027# Hardware Monitoring support
@@ -1059,12 +1073,6 @@ CONFIG_SENSORS_SMSC47B397=m
1059# CONFIG_HWMON_DEBUG_CHIP is not set 1073# CONFIG_HWMON_DEBUG_CHIP is not set
1060 1074
1061# 1075#
1062# Misc devices
1063#
1064# CONFIG_IBM_ASM is not set
1065# CONFIG_TIFM_CORE is not set
1066
1067#
1068# Multimedia devices 1076# Multimedia devices
1069# 1077#
1070# CONFIG_VIDEO_DEV is not set 1078# CONFIG_VIDEO_DEV is not set
@@ -1105,10 +1113,7 @@ CONFIG_SOUND=y
1105# Open Sound System 1113# Open Sound System
1106# 1114#
1107CONFIG_SOUND_PRIME=y 1115CONFIG_SOUND_PRIME=y
1108CONFIG_OSS_OBSOLETE_DRIVER=y
1109# CONFIG_SOUND_BT878 is not set 1116# CONFIG_SOUND_BT878 is not set
1110# CONFIG_SOUND_EMU10K1 is not set
1111# CONFIG_SOUND_FUSION is not set
1112# CONFIG_SOUND_ES1371 is not set 1117# CONFIG_SOUND_ES1371 is not set
1113CONFIG_SOUND_ICH=y 1118CONFIG_SOUND_ICH=y
1114# CONFIG_SOUND_TRIDENT is not set 1119# CONFIG_SOUND_TRIDENT is not set
@@ -1118,6 +1123,11 @@ CONFIG_SOUND_ICH=y
1118# CONFIG_SOUND_OSS is not set 1123# CONFIG_SOUND_OSS is not set
1119 1124
1120# 1125#
1126# HID Devices
1127#
1128CONFIG_HID=y
1129
1130#
1121# USB support 1131# USB support
1122# 1132#
1123CONFIG_USB_ARCH_HAS_HCD=y 1133CONFIG_USB_ARCH_HAS_HCD=y
@@ -1133,6 +1143,7 @@ CONFIG_USB_DEVICEFS=y
1133# CONFIG_USB_BANDWIDTH is not set 1143# CONFIG_USB_BANDWIDTH is not set
1134# CONFIG_USB_DYNAMIC_MINORS is not set 1144# CONFIG_USB_DYNAMIC_MINORS is not set
1135# CONFIG_USB_SUSPEND is not set 1145# CONFIG_USB_SUSPEND is not set
1146# CONFIG_USB_MULTITHREAD_PROBE is not set
1136# CONFIG_USB_OTG is not set 1147# CONFIG_USB_OTG is not set
1137 1148
1138# 1149#
@@ -1180,8 +1191,7 @@ CONFIG_USB_STORAGE=y
1180# USB Input Devices 1191# USB Input Devices
1181# 1192#
1182CONFIG_USB_HID=y 1193CONFIG_USB_HID=y
1183CONFIG_USB_HIDINPUT=y 1194# CONFIG_USB_HID_POWERBOOK is not set
1184# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1185# CONFIG_HID_FF is not set 1195# CONFIG_HID_FF is not set
1186# CONFIG_USB_HIDDEV is not set 1196# CONFIG_USB_HIDDEV is not set
1187# CONFIG_USB_AIPTEK is not set 1197# CONFIG_USB_AIPTEK is not set
@@ -1196,7 +1206,6 @@ CONFIG_USB_HIDINPUT=y
1196# CONFIG_USB_ATI_REMOTE2 is not set 1206# CONFIG_USB_ATI_REMOTE2 is not set
1197# CONFIG_USB_KEYSPAN_REMOTE is not set 1207# CONFIG_USB_KEYSPAN_REMOTE is not set
1198# CONFIG_USB_APPLETOUCH is not set 1208# CONFIG_USB_APPLETOUCH is not set
1199# CONFIG_USB_TRANCEVIBRATOR is not set
1200 1209
1201# 1210#
1202# USB Imaging devices 1211# USB Imaging devices
@@ -1211,6 +1220,7 @@ CONFIG_USB_HIDINPUT=y
1211# CONFIG_USB_KAWETH is not set 1220# CONFIG_USB_KAWETH is not set
1212# CONFIG_USB_PEGASUS is not set 1221# CONFIG_USB_PEGASUS is not set
1213# CONFIG_USB_RTL8150 is not set 1222# CONFIG_USB_RTL8150 is not set
1223# CONFIG_USB_USBNET_MII is not set
1214# CONFIG_USB_USBNET is not set 1224# CONFIG_USB_USBNET is not set
1215CONFIG_USB_MON=y 1225CONFIG_USB_MON=y
1216 1226
@@ -1242,6 +1252,7 @@ CONFIG_USB_MON=y
1242# CONFIG_USB_APPLEDISPLAY is not set 1252# CONFIG_USB_APPLEDISPLAY is not set
1243# CONFIG_USB_SISUSBVGA is not set 1253# CONFIG_USB_SISUSBVGA is not set
1244# CONFIG_USB_LD is not set 1254# CONFIG_USB_LD is not set
1255# CONFIG_USB_TRANCEVIBRATOR is not set
1245# CONFIG_USB_TEST is not set 1256# CONFIG_USB_TEST is not set
1246 1257
1247# 1258#
@@ -1318,6 +1329,7 @@ CONFIG_EXT3_FS=y
1318CONFIG_EXT3_FS_XATTR=y 1329CONFIG_EXT3_FS_XATTR=y
1319CONFIG_EXT3_FS_POSIX_ACL=y 1330CONFIG_EXT3_FS_POSIX_ACL=y
1320# CONFIG_EXT3_FS_SECURITY is not set 1331# CONFIG_EXT3_FS_SECURITY is not set
1332# CONFIG_EXT4DEV_FS is not set
1321CONFIG_JBD=y 1333CONFIG_JBD=y
1322# CONFIG_JBD_DEBUG is not set 1334# CONFIG_JBD_DEBUG is not set
1323CONFIG_FS_MBCACHE=y 1335CONFIG_FS_MBCACHE=y
@@ -1341,6 +1353,7 @@ CONFIG_DNOTIFY=y
1341# CONFIG_AUTOFS_FS is not set 1353# CONFIG_AUTOFS_FS is not set
1342CONFIG_AUTOFS4_FS=y 1354CONFIG_AUTOFS4_FS=y
1343# CONFIG_FUSE_FS is not set 1355# CONFIG_FUSE_FS is not set
1356CONFIG_GENERIC_ACL=y
1344 1357
1345# 1358#
1346# CD-ROM/DVD Filesystems 1359# CD-ROM/DVD Filesystems
@@ -1418,7 +1431,6 @@ CONFIG_SUNRPC=y
1418# CONFIG_CODA_FS is not set 1431# CONFIG_CODA_FS is not set
1419# CONFIG_AFS_FS is not set 1432# CONFIG_AFS_FS is not set
1420# CONFIG_9P_FS is not set 1433# CONFIG_9P_FS is not set
1421CONFIG_GENERIC_ACL=y
1422 1434
1423# 1435#
1424# Partition Types 1436# Partition Types
@@ -1473,6 +1485,7 @@ CONFIG_NLS_UTF8=y
1473# 1485#
1474# Distributed Lock Manager 1486# Distributed Lock Manager
1475# 1487#
1488# CONFIG_DLM is not set
1476 1489
1477# 1490#
1478# Instrumentation Support 1491# Instrumentation Support
@@ -1504,6 +1517,7 @@ CONFIG_DETECT_SOFTLOCKUP=y
1504# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 1517# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
1505# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set 1518# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
1506# CONFIG_DEBUG_KOBJECT is not set 1519# CONFIG_DEBUG_KOBJECT is not set
1520CONFIG_DEBUG_BUGVERBOSE=y
1507# CONFIG_DEBUG_INFO is not set 1521# CONFIG_DEBUG_INFO is not set
1508CONFIG_DEBUG_FS=y 1522CONFIG_DEBUG_FS=y
1509# CONFIG_DEBUG_VM is not set 1523# CONFIG_DEBUG_VM is not set
@@ -1512,6 +1526,7 @@ CONFIG_DEBUG_FS=y
1512CONFIG_UNWIND_INFO=y 1526CONFIG_UNWIND_INFO=y
1513CONFIG_STACK_UNWIND=y 1527CONFIG_STACK_UNWIND=y
1514# CONFIG_FORCED_INLINING is not set 1528# CONFIG_FORCED_INLINING is not set
1529# CONFIG_HEADERS_CHECK is not set
1515# CONFIG_RCU_TORTURE_TEST is not set 1530# CONFIG_RCU_TORTURE_TEST is not set
1516# CONFIG_LKDTM is not set 1531# CONFIG_LKDTM is not set
1517# CONFIG_DEBUG_RODATA is not set 1532# CONFIG_DEBUG_RODATA is not set
@@ -1533,6 +1548,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
1533# 1548#
1534# Library routines 1549# Library routines
1535# 1550#
1551CONFIG_BITREVERSE=y
1536# CONFIG_CRC_CCITT is not set 1552# CONFIG_CRC_CCITT is not set
1537# CONFIG_CRC16 is not set 1553# CONFIG_CRC16 is not set
1538CONFIG_CRC32=y 1554CONFIG_CRC32=y
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
index 396d3c100011..be87df506f39 100644
--- a/arch/x86_64/ia32/ia32_aout.c
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -272,7 +272,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
272 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && 272 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
273 N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || 273 N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
274 N_TRSIZE(ex) || N_DRSIZE(ex) || 274 N_TRSIZE(ex) || N_DRSIZE(ex) ||
275 i_size_read(bprm->file->f_dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { 275 i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
276 return -ENOEXEC; 276 return -ENOEXEC;
277 } 277 }
278 278
@@ -357,7 +357,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
357 { 357 {
358 printk(KERN_WARNING 358 printk(KERN_WARNING
359 "fd_offset is not page aligned. Please convert program: %s\n", 359 "fd_offset is not page aligned. Please convert program: %s\n",
360 bprm->file->f_dentry->d_name.name); 360 bprm->file->f_path.dentry->d_name.name);
361 error_time = jiffies; 361 error_time = jiffies;
362 } 362 }
363#endif 363#endif
@@ -440,7 +440,7 @@ static int load_aout_library(struct file *file)
440 int retval; 440 int retval;
441 struct exec ex; 441 struct exec ex;
442 442
443 inode = file->f_dentry->d_inode; 443 inode = file->f_path.dentry->d_inode;
444 444
445 retval = -ENOEXEC; 445 retval = -ENOEXEC;
446 error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); 446 error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
@@ -471,7 +471,7 @@ static int load_aout_library(struct file *file)
471 { 471 {
472 printk(KERN_WARNING 472 printk(KERN_WARNING
473 "N_TXTOFF is not page aligned. Please convert library: %s\n", 473 "N_TXTOFF is not page aligned. Please convert library: %s\n",
474 file->f_dentry->d_name.name); 474 file->f_path.dentry->d_name.name);
475 error_time = jiffies; 475 error_time = jiffies;
476 } 476 }
477#endif 477#endif
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 82ef182de6ae..543ef4f405e9 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -305,8 +305,6 @@ MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
305#undef MODULE_DESCRIPTION 305#undef MODULE_DESCRIPTION
306#undef MODULE_AUTHOR 306#undef MODULE_AUTHOR
307 307
308#define elf_addr_t __u32
309
310static void elf32_init(struct pt_regs *); 308static void elf32_init(struct pt_regs *);
311 309
312#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 310#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
@@ -351,7 +349,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
351 bprm->loader += stack_base; 349 bprm->loader += stack_base;
352 bprm->exec += stack_base; 350 bprm->exec += stack_base;
353 351
354 mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 352 mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
355 if (!mpnt) 353 if (!mpnt)
356 return -ENOMEM; 354 return -ENOMEM;
357 355
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index a6ba9951e86c..ff499ef2a1ba 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -579,6 +579,16 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
579 regs->rsp = (unsigned long) frame; 579 regs->rsp = (unsigned long) frame;
580 regs->rip = (unsigned long) ka->sa.sa_handler; 580 regs->rip = (unsigned long) ka->sa.sa_handler;
581 581
582 /* Make -mregparm=3 work */
583 regs->rax = sig;
584 regs->rdx = (unsigned long) &frame->info;
585 regs->rcx = (unsigned long) &frame->uc;
586
587 /* Make -mregparm=3 work */
588 regs->rax = sig;
589 regs->rdx = (unsigned long) &frame->info;
590 regs->rcx = (unsigned long) &frame->uc;
591
582 asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 592 asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
583 asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 593 asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
584 594
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
index 3a7561d4703e..04566fe5de49 100644
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -244,6 +244,8 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
244 case PTRACE_DETACH: 244 case PTRACE_DETACH:
245 case PTRACE_SYSCALL: 245 case PTRACE_SYSCALL:
246 case PTRACE_SETOPTIONS: 246 case PTRACE_SETOPTIONS:
247 case PTRACE_SET_THREAD_AREA:
248 case PTRACE_GET_THREAD_AREA:
247 return sys_ptrace(request, pid, addr, data); 249 return sys_ptrace(request, pid, addr, data);
248 250
249 default: 251 default:
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 3a01329473ab..3e5ed20cba45 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -49,7 +49,7 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
49 struct mm_struct *mm = current->mm; 49 struct mm_struct *mm = current->mm;
50 int ret; 50 int ret;
51 51
52 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 52 vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
53 if (!vma) 53 if (!vma)
54 return -ENOMEM; 54 return -ENOMEM;
55 55
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 4d9d5ed942b2..124b2d27b4ac 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -25,6 +25,7 @@
25#include <linux/kernel_stat.h> 25#include <linux/kernel_stat.h>
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/ioport.h>
28 29
29#include <asm/atomic.h> 30#include <asm/atomic.h>
30#include <asm/smp.h> 31#include <asm/smp.h>
@@ -45,6 +46,12 @@ int apic_calibrate_pmtmr __initdata;
45 46
46int disable_apic_timer __initdata; 47int disable_apic_timer __initdata;
47 48
49static struct resource *ioapic_resources;
50static struct resource lapic_resource = {
51 .name = "Local APIC",
52 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
53};
54
48/* 55/*
49 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as 56 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
50 * IPIs in place of local APIC timers 57 * IPIs in place of local APIC timers
@@ -133,7 +140,6 @@ void clear_local_APIC(void)
133 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 140 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
134 if (maxlvt >= 4) 141 if (maxlvt >= 4)
135 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 142 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
136 v = GET_APIC_VERSION(apic_read(APIC_LVR));
137 apic_write(APIC_ESR, 0); 143 apic_write(APIC_ESR, 0);
138 apic_read(APIC_ESR); 144 apic_read(APIC_ESR);
139} 145}
@@ -452,23 +458,30 @@ static struct {
452static int lapic_suspend(struct sys_device *dev, pm_message_t state) 458static int lapic_suspend(struct sys_device *dev, pm_message_t state)
453{ 459{
454 unsigned long flags; 460 unsigned long flags;
461 int maxlvt;
455 462
456 if (!apic_pm_state.active) 463 if (!apic_pm_state.active)
457 return 0; 464 return 0;
458 465
466 maxlvt = get_maxlvt();
467
459 apic_pm_state.apic_id = apic_read(APIC_ID); 468 apic_pm_state.apic_id = apic_read(APIC_ID);
460 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 469 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
461 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 470 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
462 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 471 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
463 apic_pm_state.apic_spiv = apic_read(APIC_SPIV); 472 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
464 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); 473 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
465 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); 474 if (maxlvt >= 4)
475 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
466 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); 476 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
467 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); 477 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
468 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 478 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
469 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 479 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
470 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 480 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
471 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 481#ifdef CONFIG_X86_MCE_INTEL
482 if (maxlvt >= 5)
483 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
484#endif
472 local_irq_save(flags); 485 local_irq_save(flags);
473 disable_local_APIC(); 486 disable_local_APIC();
474 local_irq_restore(flags); 487 local_irq_restore(flags);
@@ -479,10 +492,13 @@ static int lapic_resume(struct sys_device *dev)
479{ 492{
480 unsigned int l, h; 493 unsigned int l, h;
481 unsigned long flags; 494 unsigned long flags;
495 int maxlvt;
482 496
483 if (!apic_pm_state.active) 497 if (!apic_pm_state.active)
484 return 0; 498 return 0;
485 499
500 maxlvt = get_maxlvt();
501
486 local_irq_save(flags); 502 local_irq_save(flags);
487 rdmsr(MSR_IA32_APICBASE, l, h); 503 rdmsr(MSR_IA32_APICBASE, l, h);
488 l &= ~MSR_IA32_APICBASE_BASE; 504 l &= ~MSR_IA32_APICBASE_BASE;
@@ -496,8 +512,12 @@ static int lapic_resume(struct sys_device *dev)
496 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 512 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
497 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 513 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
498 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 514 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
499 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 515#ifdef CONFIG_X86_MCE_INTEL
500 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); 516 if (maxlvt >= 5)
517 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
518#endif
519 if (maxlvt >= 4)
520 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
501 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); 521 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
502 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); 522 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
503 apic_write(APIC_TMICT, apic_pm_state.apic_tmict); 523 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
@@ -585,6 +605,64 @@ static int __init detect_init_APIC (void)
585 return 0; 605 return 0;
586} 606}
587 607
608#ifdef CONFIG_X86_IO_APIC
609static struct resource * __init ioapic_setup_resources(void)
610{
611#define IOAPIC_RESOURCE_NAME_SIZE 11
612 unsigned long n;
613 struct resource *res;
614 char *mem;
615 int i;
616
617 if (nr_ioapics <= 0)
618 return NULL;
619
620 n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
621 n *= nr_ioapics;
622
623 mem = alloc_bootmem(n);
624 res = (void *)mem;
625
626 if (mem != NULL) {
627 memset(mem, 0, n);
628 mem += sizeof(struct resource) * nr_ioapics;
629
630 for (i = 0; i < nr_ioapics; i++) {
631 res[i].name = mem;
632 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
633 sprintf(mem, "IOAPIC %u", i);
634 mem += IOAPIC_RESOURCE_NAME_SIZE;
635 }
636 }
637
638 ioapic_resources = res;
639
640 return res;
641}
642
643static int __init ioapic_insert_resources(void)
644{
645 int i;
646 struct resource *r = ioapic_resources;
647
648 if (!r) {
649 printk("IO APIC resources could be not be allocated.\n");
650 return -1;
651 }
652
653 for (i = 0; i < nr_ioapics; i++) {
654 insert_resource(&iomem_resource, r);
655 r++;
656 }
657
658 return 0;
659}
660
661/* Insert the IO APIC resources after PCI initialization has occured to handle
662 * IO APICS that are mapped in on a BAR in PCI space. */
663late_initcall(ioapic_insert_resources);
664#endif
665
588void __init init_apic_mappings(void) 666void __init init_apic_mappings(void)
589{ 667{
590 unsigned long apic_phys; 668 unsigned long apic_phys;
@@ -604,6 +682,11 @@ void __init init_apic_mappings(void)
604 apic_mapped = 1; 682 apic_mapped = 1;
605 apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); 683 apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
606 684
685 /* Put local APIC into the resource map. */
686 lapic_resource.start = apic_phys;
687 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
688 insert_resource(&iomem_resource, &lapic_resource);
689
607 /* 690 /*
608 * Fetch the APIC ID of the BSP in case we have a 691 * Fetch the APIC ID of the BSP in case we have a
609 * default configuration (or the MP table is broken). 692 * default configuration (or the MP table is broken).
@@ -613,7 +696,9 @@ void __init init_apic_mappings(void)
613 { 696 {
614 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 697 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
615 int i; 698 int i;
699 struct resource *ioapic_res;
616 700
701 ioapic_res = ioapic_setup_resources();
617 for (i = 0; i < nr_ioapics; i++) { 702 for (i = 0; i < nr_ioapics; i++) {
618 if (smp_found_config) { 703 if (smp_found_config) {
619 ioapic_phys = mp_ioapics[i].mpc_apicaddr; 704 ioapic_phys = mp_ioapics[i].mpc_apicaddr;
@@ -625,6 +710,12 @@ void __init init_apic_mappings(void)
625 apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", 710 apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n",
626 __fix_to_virt(idx), ioapic_phys); 711 __fix_to_virt(idx), ioapic_phys);
627 idx++; 712 idx++;
713
714 if (ioapic_res != NULL) {
715 ioapic_res->start = ioapic_phys;
716 ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
717 ioapic_res++;
718 }
628 } 719 }
629 } 720 }
630} 721}
@@ -644,10 +735,9 @@ void __init init_apic_mappings(void)
644 735
645static void __setup_APIC_LVTT(unsigned int clocks) 736static void __setup_APIC_LVTT(unsigned int clocks)
646{ 737{
647 unsigned int lvtt_value, tmp_value, ver; 738 unsigned int lvtt_value, tmp_value;
648 int cpu = smp_processor_id(); 739 int cpu = smp_processor_id();
649 740
650 ver = GET_APIC_VERSION(apic_read(APIC_LVR));
651 lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; 741 lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
652 742
653 if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) 743 if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask))
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 3525f884af82..95a7a2c13131 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -28,71 +28,6 @@
28/* This keeps a track of which one is crashing cpu. */ 28/* This keeps a track of which one is crashing cpu. */
29static int crashing_cpu; 29static int crashing_cpu;
30 30
31static u32 *append_elf_note(u32 *buf, char *name, unsigned type,
32 void *data, size_t data_len)
33{
34 struct elf_note note;
35
36 note.n_namesz = strlen(name) + 1;
37 note.n_descsz = data_len;
38 note.n_type = type;
39 memcpy(buf, &note, sizeof(note));
40 buf += (sizeof(note) +3)/4;
41 memcpy(buf, name, note.n_namesz);
42 buf += (note.n_namesz + 3)/4;
43 memcpy(buf, data, note.n_descsz);
44 buf += (note.n_descsz + 3)/4;
45
46 return buf;
47}
48
49static void final_note(u32 *buf)
50{
51 struct elf_note note;
52
53 note.n_namesz = 0;
54 note.n_descsz = 0;
55 note.n_type = 0;
56 memcpy(buf, &note, sizeof(note));
57}
58
59static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
60{
61 struct elf_prstatus prstatus;
62 u32 *buf;
63
64 if ((cpu < 0) || (cpu >= NR_CPUS))
65 return;
66
67 /* Using ELF notes here is opportunistic.
68 * I need a well defined structure format
69 * for the data I pass, and I need tags
70 * on the data to indicate what information I have
71 * squirrelled away. ELF notes happen to provide
72 * all of that, no need to invent something new.
73 */
74
75 buf = (u32*)per_cpu_ptr(crash_notes, cpu);
76
77 if (!buf)
78 return;
79
80 memset(&prstatus, 0, sizeof(prstatus));
81 prstatus.pr_pid = current->pid;
82 elf_core_copy_regs(&prstatus.pr_reg, regs);
83 buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
84 sizeof(prstatus));
85 final_note(buf);
86}
87
88static void crash_save_self(struct pt_regs *regs)
89{
90 int cpu;
91
92 cpu = smp_processor_id();
93 crash_save_this_cpu(regs, cpu);
94}
95
96#ifdef CONFIG_SMP 31#ifdef CONFIG_SMP
97static atomic_t waiting_for_crash_ipi; 32static atomic_t waiting_for_crash_ipi;
98 33
@@ -117,7 +52,7 @@ static int crash_nmi_callback(struct notifier_block *self,
117 return NOTIFY_STOP; 52 return NOTIFY_STOP;
118 local_irq_disable(); 53 local_irq_disable();
119 54
120 crash_save_this_cpu(regs, cpu); 55 crash_save_cpu(regs, cpu);
121 disable_local_APIC(); 56 disable_local_APIC();
122 atomic_dec(&waiting_for_crash_ipi); 57 atomic_dec(&waiting_for_crash_ipi);
123 /* Assume hlt works */ 58 /* Assume hlt works */
@@ -196,5 +131,5 @@ void machine_crash_shutdown(struct pt_regs *regs)
196 131
197 disable_IO_APIC(); 132 disable_IO_APIC();
198 133
199 crash_save_self(regs); 134 crash_save_cpu(regs, smp_processor_id());
200} 135}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index b3f0908668ec..6fe191c58084 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -54,13 +54,13 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
54 54
55 /* various gunk below that needed for SMP startup */ 55 /* various gunk below that needed for SMP startup */
56 if (addr < 0x8000) { 56 if (addr < 0x8000) {
57 *addrp = 0x8000; 57 *addrp = PAGE_ALIGN(0x8000);
58 return 1; 58 return 1;
59 } 59 }
60 60
61 /* direct mapping tables of the kernel */ 61 /* direct mapping tables of the kernel */
62 if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { 62 if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
63 *addrp = table_end << PAGE_SHIFT; 63 *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
64 return 1; 64 return 1;
65 } 65 }
66 66
@@ -68,18 +68,18 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
68#ifdef CONFIG_BLK_DEV_INITRD 68#ifdef CONFIG_BLK_DEV_INITRD
69 if (LOADER_TYPE && INITRD_START && last >= INITRD_START && 69 if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
70 addr < INITRD_START+INITRD_SIZE) { 70 addr < INITRD_START+INITRD_SIZE) {
71 *addrp = INITRD_START + INITRD_SIZE; 71 *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
72 return 1; 72 return 1;
73 } 73 }
74#endif 74#endif
75 /* kernel code */ 75 /* kernel code */
76 if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) { 76 if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
77 *addrp = __pa_symbol(&_end); 77 *addrp = PAGE_ALIGN(__pa_symbol(&_end));
78 return 1; 78 return 1;
79 } 79 }
80 80
81 if (last >= ebda_addr && addr < ebda_addr + ebda_size) { 81 if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
82 *addrp = ebda_addr + ebda_size; 82 *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
83 return 1; 83 return 1;
84 } 84 }
85 85
@@ -152,7 +152,7 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi
152 continue; 152 continue;
153 while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) 153 while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
154 ; 154 ;
155 last = addr + size; 155 last = PAGE_ALIGN(addr) + size;
156 if (last > ei->addr + ei->size) 156 if (last > ei->addr + ei->size)
157 continue; 157 continue;
158 if (last > end) 158 if (last > end)
@@ -278,7 +278,7 @@ e820_register_active_regions(int nid, unsigned long start_pfn,
278 >> PAGE_SHIFT; 278 >> PAGE_SHIFT;
279 279
280 /* Skip map entries smaller than a page */ 280 /* Skip map entries smaller than a page */
281 if (ei_startpfn > ei_endpfn) 281 if (ei_startpfn >= ei_endpfn)
282 continue; 282 continue;
283 283
284 /* Check if end_pfn_map should be updated */ 284 /* Check if end_pfn_map should be updated */
@@ -594,7 +594,9 @@ static int __init parse_memmap_opt(char *p)
594 * size before original memory map is 594 * size before original memory map is
595 * reset. 595 * reset.
596 */ 596 */
597 e820_register_active_regions(0, 0, -1UL);
597 saved_max_pfn = e820_end_of_ram(); 598 saved_max_pfn = e820_end_of_ram();
599 remove_all_active_ranges();
598#endif 600#endif
599 end_pfn_map = 0; 601 end_pfn_map = 0;
600 e820.nr_map = 0; 602 e820.nr_map = 0;
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 208e38a372c1..829698f6d049 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -45,7 +45,13 @@ static void nvidia_bugs(void)
45 /* 45 /*
46 * All timer overrides on Nvidia are 46 * All timer overrides on Nvidia are
47 * wrong unless HPET is enabled. 47 * wrong unless HPET is enabled.
48 * Unfortunately that's not true on many Asus boards.
49 * We don't know yet how to detect this automatically, but
50 * at least allow a command line override.
48 */ 51 */
52 if (acpi_use_timer_override)
53 return;
54
49 nvidia_hpet_detected = 0; 55 nvidia_hpet_detected = 0;
50 acpi_table_parse(ACPI_HPET, nvidia_hpet_check); 56 acpi_table_parse(ACPI_HPET, nvidia_hpet_check);
51 if (nvidia_hpet_detected == 0) { 57 if (nvidia_hpet_detected == 0) {
@@ -53,6 +59,8 @@ static void nvidia_bugs(void)
53 printk(KERN_INFO "Nvidia board " 59 printk(KERN_INFO "Nvidia board "
54 "detected. Ignoring ACPI " 60 "detected. Ignoring ACPI "
55 "timer override.\n"); 61 "timer override.\n");
62 printk(KERN_INFO "If you got timer trouble "
63 "try acpi_use_timer_override\n");
56 } 64 }
57#endif 65#endif
58 /* RED-PEN skip them on mptables too? */ 66 /* RED-PEN skip them on mptables too? */
@@ -61,10 +69,18 @@ static void nvidia_bugs(void)
61 69
62static void ati_bugs(void) 70static void ati_bugs(void)
63{ 71{
64#if 1 /* for testing */ 72}
65 printk("ATI board detected\n"); 73
74static void intel_bugs(void)
75{
76 u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
77
78#ifdef CONFIG_SMP
79 if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
80 device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
81 device == PCI_DEVICE_ID_INTEL_E7525_MCH)
82 quirk_intel_irqbalance();
66#endif 83#endif
67 /* No bugs right now */
68} 84}
69 85
70struct chipset { 86struct chipset {
@@ -76,6 +92,7 @@ static struct chipset early_qrk[] = {
76 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, 92 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
77 { PCI_VENDOR_ID_VIA, via_bugs }, 93 { PCI_VENDOR_ID_VIA, via_bugs },
78 { PCI_VENDOR_ID_ATI, ati_bugs }, 94 { PCI_VENDOR_ID_ATI, ati_bugs },
95 { PCI_VENDOR_ID_INTEL, intel_bugs},
79 {} 96 {}
80}; 97};
81 98
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index e22ecd54870d..47b6d90349da 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -224,7 +224,7 @@ static int __init setup_early_printk(char *buf)
224 return 0; 224 return 0;
225 early_console_initialized = 1; 225 early_console_initialized = 1;
226 226
227 if (!strcmp(buf,"keep")) 227 if (strstr(buf, "keep"))
228 keep_early = 1; 228 keep_early = 1;
229 229
230 if (!strncmp(buf, "serial", 6)) { 230 if (!strncmp(buf, "serial", 6)) {
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 38a7b2d528e2..601d332c4b79 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -230,7 +230,6 @@ ENTRY(system_call)
230 CFI_REL_OFFSET rip,RIP-ARGOFFSET 230 CFI_REL_OFFSET rip,RIP-ARGOFFSET
231 GET_THREAD_INFO(%rcx) 231 GET_THREAD_INFO(%rcx)
232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) 232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
233 CFI_REMEMBER_STATE
234 jnz tracesys 233 jnz tracesys
235 cmpq $__NR_syscall_max,%rax 234 cmpq $__NR_syscall_max,%rax
236 ja badsys 235 ja badsys
@@ -241,7 +240,6 @@ ENTRY(system_call)
241 * Syscall return path ending with SYSRET (fast path) 240 * Syscall return path ending with SYSRET (fast path)
242 * Has incomplete stack frame and undefined top of stack. 241 * Has incomplete stack frame and undefined top of stack.
243 */ 242 */
244 .globl ret_from_sys_call
245ret_from_sys_call: 243ret_from_sys_call:
246 movl $_TIF_ALLWORK_MASK,%edi 244 movl $_TIF_ALLWORK_MASK,%edi
247 /* edi: flagmask */ 245 /* edi: flagmask */
@@ -251,8 +249,8 @@ sysret_check:
251 TRACE_IRQS_OFF 249 TRACE_IRQS_OFF
252 movl threadinfo_flags(%rcx),%edx 250 movl threadinfo_flags(%rcx),%edx
253 andl %edi,%edx 251 andl %edi,%edx
254 CFI_REMEMBER_STATE
255 jnz sysret_careful 252 jnz sysret_careful
253 CFI_REMEMBER_STATE
256 /* 254 /*
257 * sysretq will re-enable interrupts: 255 * sysretq will re-enable interrupts:
258 */ 256 */
@@ -265,10 +263,10 @@ sysret_check:
265 swapgs 263 swapgs
266 sysretq 264 sysretq
267 265
266 CFI_RESTORE_STATE
268 /* Handle reschedules */ 267 /* Handle reschedules */
269 /* edx: work, edi: workmask */ 268 /* edx: work, edi: workmask */
270sysret_careful: 269sysret_careful:
271 CFI_RESTORE_STATE
272 bt $TIF_NEED_RESCHED,%edx 270 bt $TIF_NEED_RESCHED,%edx
273 jnc sysret_signal 271 jnc sysret_signal
274 TRACE_IRQS_ON 272 TRACE_IRQS_ON
@@ -306,7 +304,6 @@ badsys:
306 304
307 /* Do syscall tracing */ 305 /* Do syscall tracing */
308tracesys: 306tracesys:
309 CFI_RESTORE_STATE
310 SAVE_REST 307 SAVE_REST
311 movq $-ENOSYS,RAX(%rsp) 308 movq $-ENOSYS,RAX(%rsp)
312 FIXUP_TOP_OF_STACK %rdi 309 FIXUP_TOP_OF_STACK %rdi
@@ -315,37 +312,20 @@ tracesys:
315 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ 312 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
316 RESTORE_REST 313 RESTORE_REST
317 cmpq $__NR_syscall_max,%rax 314 cmpq $__NR_syscall_max,%rax
315 movq $-ENOSYS,%rcx
316 cmova %rcx,%rax
318 ja 1f 317 ja 1f
319 movq %r10,%rcx /* fixup for C */ 318 movq %r10,%rcx /* fixup for C */
320 call *sys_call_table(,%rax,8) 319 call *sys_call_table(,%rax,8)
3211: movq %rax,RAX-ARGOFFSET(%rsp) 3201: movq %rax,RAX-ARGOFFSET(%rsp)
322 /* Use IRET because user could have changed frame */ 321 /* Use IRET because user could have changed frame */
323 jmp int_ret_from_sys_call
324 CFI_ENDPROC
325END(system_call)
326 322
327/* 323/*
328 * Syscall return path ending with IRET. 324 * Syscall return path ending with IRET.
329 * Has correct top of stack, but partial stack frame. 325 * Has correct top of stack, but partial stack frame.
330 */ 326 */
331ENTRY(int_ret_from_sys_call) 327 .globl int_ret_from_sys_call
332 CFI_STARTPROC simple 328int_ret_from_sys_call:
333 CFI_SIGNAL_FRAME
334 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
335 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
336 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
337 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
338 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
339 CFI_REL_OFFSET rip,RIP-ARGOFFSET
340 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
341 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
342 CFI_REL_OFFSET rax,RAX-ARGOFFSET
343 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
344 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
345 CFI_REL_OFFSET r8,R8-ARGOFFSET
346 CFI_REL_OFFSET r9,R9-ARGOFFSET
347 CFI_REL_OFFSET r10,R10-ARGOFFSET
348 CFI_REL_OFFSET r11,R11-ARGOFFSET
349 cli 329 cli
350 TRACE_IRQS_OFF 330 TRACE_IRQS_OFF
351 testl $3,CS-ARGOFFSET(%rsp) 331 testl $3,CS-ARGOFFSET(%rsp)
@@ -392,8 +372,6 @@ int_very_careful:
392 popq %rdi 372 popq %rdi
393 CFI_ADJUST_CFA_OFFSET -8 373 CFI_ADJUST_CFA_OFFSET -8
394 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 374 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
395 cli
396 TRACE_IRQS_OFF
397 jmp int_restore_rest 375 jmp int_restore_rest
398 376
399int_signal: 377int_signal:
@@ -409,7 +387,7 @@ int_restore_rest:
409 TRACE_IRQS_OFF 387 TRACE_IRQS_OFF
410 jmp int_with_check 388 jmp int_with_check
411 CFI_ENDPROC 389 CFI_ENDPROC
412END(int_ret_from_sys_call) 390END(system_call)
413 391
414/* 392/*
415 * Certain special system calls that need to save a complete full stack frame. 393 * Certain special system calls that need to save a complete full stack frame.
@@ -535,8 +513,6 @@ END(stub_rt_sigreturn)
5351: incl %gs:pda_irqcount 5131: incl %gs:pda_irqcount
536 cmoveq %gs:pda_irqstackptr,%rsp 514 cmoveq %gs:pda_irqstackptr,%rsp
537 push %rbp # backlink for old unwinder 515 push %rbp # backlink for old unwinder
538 CFI_ADJUST_CFA_OFFSET 8
539 CFI_REL_OFFSET rbp,0
540 /* 516 /*
541 * We entered an interrupt context - irqs are off: 517 * We entered an interrupt context - irqs are off:
542 */ 518 */
@@ -980,11 +956,6 @@ ENTRY(kernel_thread)
980 call do_fork 956 call do_fork
981 movq %rax,RAX(%rsp) 957 movq %rax,RAX(%rsp)
982 xorl %edi,%edi 958 xorl %edi,%edi
983 test %rax,%rax
984 jnz 1f
985 /* terminate stack in child */
986 movq %rdi,RIP(%rsp)
9871:
988 959
989 /* 960 /*
990 * It isn't worth to check for reschedule here, 961 * It isn't worth to check for reschedule here,
@@ -1176,7 +1147,6 @@ ENTRY(call_softirq)
1176 incl %gs:pda_irqcount 1147 incl %gs:pda_irqcount
1177 cmove %gs:pda_irqstackptr,%rsp 1148 cmove %gs:pda_irqstackptr,%rsp
1178 push %rbp # backlink for old unwinder 1149 push %rbp # backlink for old unwinder
1179 CFI_ADJUST_CFA_OFFSET 8
1180 call __do_softirq 1150 call __do_softirq
1181 leaveq 1151 leaveq
1182 CFI_DEF_CFA_REGISTER rsp 1152 CFI_DEF_CFA_REGISTER rsp
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 8e78a75d1866..b007433f96bb 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -33,7 +33,7 @@ extern struct genapic apic_flat;
33extern struct genapic apic_physflat; 33extern struct genapic apic_physflat;
34 34
35struct genapic *genapic = &apic_flat; 35struct genapic *genapic = &apic_flat;
36 36struct genapic *genapic_force;
37 37
38/* 38/*
39 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 39 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
@@ -46,6 +46,13 @@ void __init clustered_apic_check(void)
46 u8 cluster_cnt[NUM_APIC_CLUSTERS]; 46 u8 cluster_cnt[NUM_APIC_CLUSTERS];
47 int max_apic = 0; 47 int max_apic = 0;
48 48
49 /* genapic selection can be forced because of certain quirks.
50 */
51 if (genapic_force) {
52 genapic = genapic_force;
53 goto print;
54 }
55
49#if defined(CONFIG_ACPI) 56#if defined(CONFIG_ACPI)
50 /* 57 /*
51 * Some x86_64 machines use physical APIC mode regardless of how many 58 * Some x86_64 machines use physical APIC mode regardless of how many
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 0dfc223c1839..7c01db8fa9d1 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -153,7 +153,7 @@ struct genapic apic_flat = {
153 153
154static cpumask_t physflat_target_cpus(void) 154static cpumask_t physflat_target_cpus(void)
155{ 155{
156 return cpumask_of_cpu(0); 156 return cpu_online_map;
157} 157}
158 158
159static cpumask_t physflat_vector_allocation_domain(int cpu) 159static cpumask_t physflat_vector_allocation_domain(int cpu)
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 9561eb3c5b5c..cc230b93cd1c 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -57,10 +57,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
57{ 57{
58 int i; 58 int i;
59 59
60 for (i = 0; i < 256; i++) 60 /* clear bss before set_intr_gate with early_idt_handler */
61 clear_bss();
62
63 for (i = 0; i < IDT_ENTRIES; i++)
61 set_intr_gate(i, early_idt_handler); 64 set_intr_gate(i, early_idt_handler);
62 asm volatile("lidt %0" :: "m" (idt_descr)); 65 asm volatile("lidt %0" :: "m" (idt_descr));
63 clear_bss();
64 66
65 early_printk("Kernel alive\n"); 67 early_printk("Kernel alive\n");
66 68
diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
index 3aa1e9bb781d..1d58c13bc6bc 100644
--- a/arch/x86_64/kernel/i387.c
+++ b/arch/x86_64/kernel/i387.c
@@ -82,11 +82,8 @@ int save_i387(struct _fpstate __user *buf)
82 struct task_struct *tsk = current; 82 struct task_struct *tsk = current;
83 int err = 0; 83 int err = 0;
84 84
85 { 85 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
86 extern void bad_user_i387_struct(void); 86 sizeof(tsk->thread.i387.fxsave));
87 if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave))
88 bad_user_i387_struct();
89 }
90 87
91 if ((unsigned long)buf % 16) 88 if ((unsigned long)buf % 16)
92 printk("save_i387: bad fpstate %p\n",buf); 89 printk("save_i387: bad fpstate %p\n",buf);
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 0612a33bb896..d73c79e821f1 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -76,7 +76,8 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
76 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 76 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
77 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) 77 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
78 78
79void (*interrupt[NR_IRQS])(void) = { 79/* for the irq vectors */
80static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
80 IRQLIST_16(0x2), IRQLIST_16(0x3), 81 IRQLIST_16(0x2), IRQLIST_16(0x3),
81 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), 82 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
82 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), 83 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
@@ -178,7 +179,8 @@ void make_8259A_irq(unsigned int irq)
178{ 179{
179 disable_irq_nosync(irq); 180 disable_irq_nosync(irq);
180 io_apic_irqs &= ~(1<<irq); 181 io_apic_irqs &= ~(1<<irq);
181 set_irq_chip_and_handler(irq, &i8259A_chip, handle_level_irq); 182 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
183 "XT");
182 enable_irq(irq); 184 enable_irq(irq);
183} 185}
184 186
@@ -431,8 +433,8 @@ void __init init_ISA_irqs (void)
431 /* 433 /*
432 * 16 old-style INTA-cycle interrupts: 434 * 16 old-style INTA-cycle interrupts:
433 */ 435 */
434 set_irq_chip_and_handler(i, &i8259A_chip, 436 set_irq_chip_and_handler_name(i, &i8259A_chip,
435 handle_level_irq); 437 handle_level_irq, "XT");
436 } else { 438 } else {
437 /* 439 /*
438 * 'high' PCI IRQs filled in on demand 440 * 'high' PCI IRQs filled in on demand
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 44b55f833875..2a1dcd5f69c2 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -55,15 +55,11 @@ int sis_apic_bug; /* not actually supported, dummy for compile */
55 55
56static int no_timer_check; 56static int no_timer_check;
57 57
58static int disable_timer_pin_1 __initdata;
59
60int timer_over_8254 __initdata = 0;
61
62/* Where if anywhere is the i8259 connect in external int mode */ 58/* Where if anywhere is the i8259 connect in external int mode */
63static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 59static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
64 60
65static DEFINE_SPINLOCK(ioapic_lock); 61static DEFINE_SPINLOCK(ioapic_lock);
66static DEFINE_SPINLOCK(vector_lock); 62DEFINE_SPINLOCK(vector_lock);
67 63
68/* 64/*
69 * # of IRQ routing registers 65 * # of IRQ routing registers
@@ -88,6 +84,52 @@ static struct irq_pin_list {
88 short apic, pin, next; 84 short apic, pin, next;
89} irq_2_pin[PIN_MAP_SIZE]; 85} irq_2_pin[PIN_MAP_SIZE];
90 86
87struct io_apic {
88 unsigned int index;
89 unsigned int unused[3];
90 unsigned int data;
91};
92
93static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
94{
95 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
96 + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
97}
98
99static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
100{
101 struct io_apic __iomem *io_apic = io_apic_base(apic);
102 writel(reg, &io_apic->index);
103 return readl(&io_apic->data);
104}
105
106static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
107{
108 struct io_apic __iomem *io_apic = io_apic_base(apic);
109 writel(reg, &io_apic->index);
110 writel(value, &io_apic->data);
111}
112
113/*
114 * Re-write a value: to be used for read-modify-write
115 * cycles where the read already set up the index register.
116 */
117static inline void io_apic_modify(unsigned int apic, unsigned int value)
118{
119 struct io_apic __iomem *io_apic = io_apic_base(apic);
120 writel(value, &io_apic->data);
121}
122
123/*
124 * Synchronize the IO-APIC and the CPU by doing
125 * a dummy read from the IO-APIC
126 */
127static inline void io_apic_sync(unsigned int apic)
128{
129 struct io_apic __iomem *io_apic = io_apic_base(apic);
130 readl(&io_apic->data);
131}
132
91#define __DO_ACTION(R, ACTION, FINAL) \ 133#define __DO_ACTION(R, ACTION, FINAL) \
92 \ 134 \
93{ \ 135{ \
@@ -126,11 +168,39 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
126 return eu.entry; 168 return eu.entry;
127} 169}
128 170
129static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 171/*
172 * When we write a new IO APIC routing entry, we need to write the high
173 * word first! If the mask bit in the low word is clear, we will enable
174 * the interrupt, and we need to make sure the entry is fully populated
175 * before that happens.
176 */
177static void
178__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
130{ 179{
131 unsigned long flags;
132 union entry_union eu; 180 union entry_union eu;
133 eu.entry = e; 181 eu.entry = e;
182 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
183 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
184}
185
186static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
187{
188 unsigned long flags;
189 spin_lock_irqsave(&ioapic_lock, flags);
190 __ioapic_write_entry(apic, pin, e);
191 spin_unlock_irqrestore(&ioapic_lock, flags);
192}
193
194/*
195 * When we mask an IO APIC routing entry, we need to write the low
196 * word first, in order to set the mask bit before we change the
197 * high bits!
198 */
199static void ioapic_mask_entry(int apic, int pin)
200{
201 unsigned long flags;
202 union entry_union eu = { .entry.mask = 1 };
203
134 spin_lock_irqsave(&ioapic_lock, flags); 204 spin_lock_irqsave(&ioapic_lock, flags);
135 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 205 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
136 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 206 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
@@ -256,9 +326,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
256 /* 326 /*
257 * Disable it in the IO-APIC irq-routing table: 327 * Disable it in the IO-APIC irq-routing table:
258 */ 328 */
259 memset(&entry, 0, sizeof(entry)); 329 ioapic_mask_entry(apic, pin);
260 entry.mask = 1;
261 ioapic_write_entry(apic, pin, entry);
262} 330}
263 331
264static void clear_IO_APIC (void) 332static void clear_IO_APIC (void)
@@ -282,29 +350,6 @@ static int __init disable_ioapic_setup(char *str)
282} 350}
283early_param("noapic", disable_ioapic_setup); 351early_param("noapic", disable_ioapic_setup);
284 352
285/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
286static int __init disable_timer_pin_setup(char *arg)
287{
288 disable_timer_pin_1 = 1;
289 return 1;
290}
291__setup("disable_timer_pin_1", disable_timer_pin_setup);
292
293static int __init setup_disable_8254_timer(char *s)
294{
295 timer_over_8254 = -1;
296 return 1;
297}
298static int __init setup_enable_8254_timer(char *s)
299{
300 timer_over_8254 = 2;
301 return 1;
302}
303
304__setup("disable_8254_timer", setup_disable_8254_timer);
305__setup("enable_8254_timer", setup_enable_8254_timer);
306
307
308/* 353/*
309 * Find the IRQ entry number of a certain pin. 354 * Find the IRQ entry number of a certain pin.
310 */ 355 */
@@ -612,15 +657,15 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
612 * Also, we've got to be careful not to trash gate 657 * Also, we've got to be careful not to trash gate
613 * 0x80, because int 0x80 is hm, kind of importantish. ;) 658 * 0x80, because int 0x80 is hm, kind of importantish. ;)
614 */ 659 */
615 static struct { 660 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
616 int vector;
617 int offset;
618 } pos[NR_CPUS] = { [ 0 ... NR_CPUS - 1] = {FIRST_DEVICE_VECTOR, 0} };
619 int old_vector = -1; 661 int old_vector = -1;
620 int cpu; 662 int cpu;
621 663
622 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); 664 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
623 665
666 /* Only try and allocate irqs on cpus that are present */
667 cpus_and(mask, mask, cpu_online_map);
668
624 if (irq_vector[irq] > 0) 669 if (irq_vector[irq] > 0)
625 old_vector = irq_vector[irq]; 670 old_vector = irq_vector[irq];
626 if (old_vector > 0) { 671 if (old_vector > 0) {
@@ -630,15 +675,15 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
630 } 675 }
631 676
632 for_each_cpu_mask(cpu, mask) { 677 for_each_cpu_mask(cpu, mask) {
633 cpumask_t domain; 678 cpumask_t domain, new_mask;
634 int first, new_cpu; 679 int new_cpu;
635 int vector, offset; 680 int vector, offset;
636 681
637 domain = vector_allocation_domain(cpu); 682 domain = vector_allocation_domain(cpu);
638 first = first_cpu(domain); 683 cpus_and(new_mask, domain, cpu_online_map);
639 684
640 vector = pos[first].vector; 685 vector = current_vector;
641 offset = pos[first].offset; 686 offset = current_offset;
642next: 687next:
643 vector += 8; 688 vector += 8;
644 if (vector >= FIRST_SYSTEM_VECTOR) { 689 if (vector >= FIRST_SYSTEM_VECTOR) {
@@ -646,24 +691,24 @@ next:
646 offset = (offset + 1) % 8; 691 offset = (offset + 1) % 8;
647 vector = FIRST_DEVICE_VECTOR + offset; 692 vector = FIRST_DEVICE_VECTOR + offset;
648 } 693 }
649 if (unlikely(pos[first].vector == vector)) 694 if (unlikely(current_vector == vector))
650 continue; 695 continue;
651 if (vector == IA32_SYSCALL_VECTOR) 696 if (vector == IA32_SYSCALL_VECTOR)
652 goto next; 697 goto next;
653 for_each_cpu_mask(new_cpu, domain) 698 for_each_cpu_mask(new_cpu, new_mask)
654 if (per_cpu(vector_irq, cpu)[vector] != -1) 699 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
655 goto next; 700 goto next;
656 /* Found one! */ 701 /* Found one! */
657 for_each_cpu_mask(new_cpu, domain) { 702 current_vector = vector;
658 pos[cpu].vector = vector; 703 current_offset = offset;
659 pos[cpu].offset = offset;
660 }
661 if (old_vector >= 0) { 704 if (old_vector >= 0) {
705 cpumask_t old_mask;
662 int old_cpu; 706 int old_cpu;
663 for_each_cpu_mask(old_cpu, irq_domain[irq]) 707 cpus_and(old_mask, irq_domain[irq], cpu_online_map);
708 for_each_cpu_mask(old_cpu, old_mask)
664 per_cpu(vector_irq, old_cpu)[old_vector] = -1; 709 per_cpu(vector_irq, old_cpu)[old_vector] = -1;
665 } 710 }
666 for_each_cpu_mask(new_cpu, domain) 711 for_each_cpu_mask(new_cpu, new_mask)
667 per_cpu(vector_irq, new_cpu)[vector] = irq; 712 per_cpu(vector_irq, new_cpu)[vector] = irq;
668 irq_vector[irq] = vector; 713 irq_vector[irq] = vector;
669 irq_domain[irq] = domain; 714 irq_domain[irq] = domain;
@@ -684,6 +729,46 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
684 return vector; 729 return vector;
685} 730}
686 731
732static void __clear_irq_vector(int irq)
733{
734 cpumask_t mask;
735 int cpu, vector;
736
737 BUG_ON(!irq_vector[irq]);
738
739 vector = irq_vector[irq];
740 cpus_and(mask, irq_domain[irq], cpu_online_map);
741 for_each_cpu_mask(cpu, mask)
742 per_cpu(vector_irq, cpu)[vector] = -1;
743
744 irq_vector[irq] = 0;
745 irq_domain[irq] = CPU_MASK_NONE;
746}
747
748void __setup_vector_irq(int cpu)
749{
750 /* Initialize vector_irq on a new cpu */
751 /* This function must be called with vector_lock held */
752 int irq, vector;
753
754 /* Mark the inuse vectors */
755 for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
756 if (!cpu_isset(cpu, irq_domain[irq]))
757 continue;
758 vector = irq_vector[irq];
759 per_cpu(vector_irq, cpu)[vector] = irq;
760 }
761 /* Mark the free vectors */
762 for (vector = 0; vector < NR_VECTORS; ++vector) {
763 irq = per_cpu(vector_irq, cpu)[vector];
764 if (irq < 0)
765 continue;
766 if (!cpu_isset(cpu, irq_domain[irq]))
767 per_cpu(vector_irq, cpu)[vector] = -1;
768 }
769}
770
771
687extern void (*interrupt[NR_IRQS])(void); 772extern void (*interrupt[NR_IRQS])(void);
688 773
689static struct irq_chip ioapic_chip; 774static struct irq_chip ioapic_chip;
@@ -696,33 +781,73 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
696{ 781{
697 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 782 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
698 trigger == IOAPIC_LEVEL) 783 trigger == IOAPIC_LEVEL)
699 set_irq_chip_and_handler(irq, &ioapic_chip, 784 set_irq_chip_and_handler_name(irq, &ioapic_chip,
700 handle_fasteoi_irq); 785 handle_fasteoi_irq, "fasteoi");
701 else 786 else {
702 set_irq_chip_and_handler(irq, &ioapic_chip, 787 irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
703 handle_edge_irq); 788 set_irq_chip_and_handler_name(irq, &ioapic_chip,
789 handle_edge_irq, "edge");
790 }
704} 791}
705 792static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
706static void __init setup_IO_APIC_irqs(void)
707{ 793{
708 struct IO_APIC_route_entry entry; 794 struct IO_APIC_route_entry entry;
709 int apic, pin, idx, irq, first_notcon = 1, vector; 795 int vector;
710 unsigned long flags; 796 unsigned long flags;
711 797
712 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
713 798
714 for (apic = 0; apic < nr_ioapics; apic++) { 799 /*
715 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 800 * add it to the IO-APIC irq-routing table:
801 */
802 memset(&entry,0,sizeof(entry));
716 803
717 /* 804 entry.delivery_mode = INT_DELIVERY_MODE;
718 * add it to the IO-APIC irq-routing table: 805 entry.dest_mode = INT_DEST_MODE;
719 */ 806 entry.mask = 0; /* enable IRQ */
720 memset(&entry,0,sizeof(entry)); 807 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
808
809 entry.trigger = irq_trigger(idx);
810 entry.polarity = irq_polarity(idx);
721 811
722 entry.delivery_mode = INT_DELIVERY_MODE; 812 if (irq_trigger(idx)) {
723 entry.dest_mode = INT_DEST_MODE; 813 entry.trigger = 1;
724 entry.mask = 0; /* enable IRQ */ 814 entry.mask = 1;
725 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); 815 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
816 }
817
818 if (!apic && !IO_APIC_IRQ(irq))
819 return;
820
821 if (IO_APIC_IRQ(irq)) {
822 cpumask_t mask;
823 vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
824 if (vector < 0)
825 return;
826
827 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
828 entry.vector = vector;
829
830 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
831 if (!apic && (irq < 16))
832 disable_8259A_irq(irq);
833 }
834
835 ioapic_write_entry(apic, pin, entry);
836
837 spin_lock_irqsave(&ioapic_lock, flags);
838 set_native_irq_info(irq, TARGET_CPUS);
839 spin_unlock_irqrestore(&ioapic_lock, flags);
840
841}
842
843static void __init setup_IO_APIC_irqs(void)
844{
845 int apic, pin, idx, irq, first_notcon = 1;
846
847 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
848
849 for (apic = 0; apic < nr_ioapics; apic++) {
850 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
726 851
727 idx = find_irq_entry(apic,pin,mp_INT); 852 idx = find_irq_entry(apic,pin,mp_INT);
728 if (idx == -1) { 853 if (idx == -1) {
@@ -734,39 +859,11 @@ static void __init setup_IO_APIC_irqs(void)
734 continue; 859 continue;
735 } 860 }
736 861
737 entry.trigger = irq_trigger(idx);
738 entry.polarity = irq_polarity(idx);
739
740 if (irq_trigger(idx)) {
741 entry.trigger = 1;
742 entry.mask = 1;
743 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
744 }
745
746 irq = pin_2_irq(idx, apic, pin); 862 irq = pin_2_irq(idx, apic, pin);
747 add_pin_to_irq(irq, apic, pin); 863 add_pin_to_irq(irq, apic, pin);
748 864
749 if (!apic && !IO_APIC_IRQ(irq)) 865 setup_IO_APIC_irq(apic, pin, idx, irq);
750 continue;
751
752 if (IO_APIC_IRQ(irq)) {
753 cpumask_t mask;
754 vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
755 if (vector < 0)
756 continue;
757
758 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
759 entry.vector = vector;
760
761 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
762 if (!apic && (irq < 16))
763 disable_8259A_irq(irq);
764 }
765 ioapic_write_entry(apic, pin, entry);
766 866
767 spin_lock_irqsave(&ioapic_lock, flags);
768 set_native_irq_info(irq, TARGET_CPUS);
769 spin_unlock_irqrestore(&ioapic_lock, flags);
770 } 867 }
771 } 868 }
772 869
@@ -806,7 +903,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
806 * The timer IRQ doesn't have to know that behind the 903 * The timer IRQ doesn't have to know that behind the
807 * scene we have a 8259A-master in AEOI mode ... 904 * scene we have a 8259A-master in AEOI mode ...
808 */ 905 */
809 set_irq_chip_and_handler(0, &ioapic_chip, handle_edge_irq); 906 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
810 907
811 /* 908 /*
812 * Add it to the IO-APIC irq-routing table: 909 * Add it to the IO-APIC irq-routing table:
@@ -1255,12 +1352,15 @@ static int ioapic_retrigger_irq(unsigned int irq)
1255{ 1352{
1256 cpumask_t mask; 1353 cpumask_t mask;
1257 unsigned vector; 1354 unsigned vector;
1355 unsigned long flags;
1258 1356
1357 spin_lock_irqsave(&vector_lock, flags);
1259 vector = irq_vector[irq]; 1358 vector = irq_vector[irq];
1260 cpus_clear(mask); 1359 cpus_clear(mask);
1261 cpu_set(vector >> 8, mask); 1360 cpu_set(first_cpu(irq_domain[irq]), mask);
1262 1361
1263 send_IPI_mask(mask, vector & 0xff); 1362 send_IPI_mask(mask, vector);
1363 spin_unlock_irqrestore(&vector_lock, flags);
1264 1364
1265 return 1; 1365 return 1;
1266} 1366}
@@ -1468,10 +1568,33 @@ static inline void unlock_ExtINT_logic(void)
1468 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 1568 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
1469 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast 1569 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
1470 * fanatically on his truly buggy board. 1570 * fanatically on his truly buggy board.
1471 *
1472 * FIXME: really need to revamp this for modern platforms only.
1473 */ 1571 */
1474static inline void check_timer(void) 1572
1573static int try_apic_pin(int apic, int pin, char *msg)
1574{
1575 apic_printk(APIC_VERBOSE, KERN_INFO
1576 "..TIMER: trying IO-APIC=%d PIN=%d %s",
1577 apic, pin, msg);
1578
1579 /*
1580 * Ok, does IRQ0 through the IOAPIC work?
1581 */
1582 if (!no_timer_check && timer_irq_works()) {
1583 nmi_watchdog_default();
1584 if (nmi_watchdog == NMI_IO_APIC) {
1585 disable_8259A_irq(0);
1586 setup_nmi();
1587 enable_8259A_irq(0);
1588 }
1589 return 1;
1590 }
1591 clear_IO_APIC_pin(apic, pin);
1592 apic_printk(APIC_QUIET, KERN_ERR " .. failed\n");
1593 return 0;
1594}
1595
1596/* The function from hell */
1597static void check_timer(void)
1475{ 1598{
1476 int apic1, pin1, apic2, pin2; 1599 int apic1, pin1, apic2, pin2;
1477 int vector; 1600 int vector;
@@ -1492,61 +1615,43 @@ static inline void check_timer(void)
1492 */ 1615 */
1493 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 1616 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1494 init_8259A(1); 1617 init_8259A(1);
1495 if (timer_over_8254 > 0)
1496 enable_8259A_irq(0);
1497 1618
1498 pin1 = find_isa_irq_pin(0, mp_INT); 1619 pin1 = find_isa_irq_pin(0, mp_INT);
1499 apic1 = find_isa_irq_apic(0, mp_INT); 1620 apic1 = find_isa_irq_apic(0, mp_INT);
1500 pin2 = ioapic_i8259.pin; 1621 pin2 = ioapic_i8259.pin;
1501 apic2 = ioapic_i8259.apic; 1622 apic2 = ioapic_i8259.apic;
1502 1623
1503 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 1624 /* Do this first, otherwise we get double interrupts on ATI boards */
1504 vector, apic1, pin1, apic2, pin2); 1625 if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled"))
1626 return;
1505 1627
1506 if (pin1 != -1) { 1628 /* Now try again with IRQ0 8259A enabled.
1507 /* 1629 Assumes timer is on IO-APIC 0 ?!? */
1508 * Ok, does IRQ0 through the IOAPIC work? 1630 enable_8259A_irq(0);
1509 */ 1631 unmask_IO_APIC_irq(0);
1510 unmask_IO_APIC_irq(0); 1632 if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled"))
1511 if (!no_timer_check && timer_irq_works()) { 1633 return;
1512 nmi_watchdog_default(); 1634 disable_8259A_irq(0);
1513 if (nmi_watchdog == NMI_IO_APIC) {
1514 disable_8259A_irq(0);
1515 setup_nmi();
1516 enable_8259A_irq(0);
1517 }
1518 if (disable_timer_pin_1 > 0)
1519 clear_IO_APIC_pin(0, pin1);
1520 return;
1521 }
1522 clear_IO_APIC_pin(apic1, pin1);
1523 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
1524 "connected to IO-APIC\n");
1525 }
1526 1635
1527 apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " 1636 /* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides
1528 "through the 8259A ... "); 1637 on Nvidia boards */
1638 if (!(apic1 == 0 && pin1 == 0) &&
1639 try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled"))
1640 return;
1641 if (!(apic1 == 0 && pin1 == 2) &&
1642 try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled"))
1643 return;
1644
1645 /* Then try pure 8259A routing on the 8259 as reported by BIOS*/
1646 enable_8259A_irq(0);
1529 if (pin2 != -1) { 1647 if (pin2 != -1) {
1530 apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
1531 apic2, pin2);
1532 /*
1533 * legacy devices should be connected to IO APIC #0
1534 */
1535 setup_ExtINT_IRQ0_pin(apic2, pin2, vector); 1648 setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
1536 if (timer_irq_works()) { 1649 if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS"))
1537 apic_printk(APIC_VERBOSE," works.\n");
1538 nmi_watchdog_default();
1539 if (nmi_watchdog == NMI_IO_APIC) {
1540 setup_nmi();
1541 }
1542 return; 1650 return;
1543 }
1544 /*
1545 * Cleanup, just in case ...
1546 */
1547 clear_IO_APIC_pin(apic2, pin2);
1548 } 1651 }
1549 apic_printk(APIC_VERBOSE," failed.\n"); 1652
1653 /* Tried all possibilities to go through the IO-APIC. Now come the
1654 really cheesy fallbacks. */
1550 1655
1551 if (nmi_watchdog == NMI_IO_APIC) { 1656 if (nmi_watchdog == NMI_IO_APIC) {
1552 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 1657 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
@@ -1742,7 +1847,7 @@ void destroy_irq(unsigned int irq)
1742 dynamic_irq_cleanup(irq); 1847 dynamic_irq_cleanup(irq);
1743 1848
1744 spin_lock_irqsave(&vector_lock, flags); 1849 spin_lock_irqsave(&vector_lock, flags);
1745 irq_vector[irq] = 0; 1850 __clear_irq_vector(irq);
1746 spin_unlock_irqrestore(&vector_lock, flags); 1851 spin_unlock_irqrestore(&vector_lock, flags);
1747} 1852}
1748 1853
@@ -1839,7 +1944,7 @@ int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
1839 1944
1840 write_msi_msg(irq, &msg); 1945 write_msi_msg(irq, &msg);
1841 1946
1842 set_irq_chip_and_handler(irq, &msi_chip, handle_edge_irq); 1947 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
1843 1948
1844 return 0; 1949 return 0;
1845} 1950}
@@ -1860,18 +1965,16 @@ void arch_teardown_msi_irq(unsigned int irq)
1860 1965
1861static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) 1966static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
1862{ 1967{
1863 u32 low, high; 1968 struct ht_irq_msg msg;
1864 low = read_ht_irq_low(irq); 1969 fetch_ht_irq_msg(irq, &msg);
1865 high = read_ht_irq_high(irq);
1866 1970
1867 low &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); 1971 msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
1868 high &= ~(HT_IRQ_HIGH_DEST_ID_MASK); 1972 msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
1869 1973
1870 low |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); 1974 msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
1871 high |= HT_IRQ_HIGH_DEST_ID(dest); 1975 msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
1872 1976
1873 write_ht_irq_low(irq, low); 1977 write_ht_irq_msg(irq, &msg);
1874 write_ht_irq_high(irq, high);
1875} 1978}
1876 1979
1877static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 1980static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
@@ -1892,7 +1995,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1892 1995
1893 dest = cpu_mask_to_apicid(tmp); 1996 dest = cpu_mask_to_apicid(tmp);
1894 1997
1895 target_ht_irq(irq, dest, vector & 0xff); 1998 target_ht_irq(irq, dest, vector);
1896 set_native_irq_info(irq, mask); 1999 set_native_irq_info(irq, mask);
1897} 2000}
1898#endif 2001#endif
@@ -1915,14 +2018,15 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1915 2018
1916 vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); 2019 vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1917 if (vector >= 0) { 2020 if (vector >= 0) {
1918 u32 low, high; 2021 struct ht_irq_msg msg;
1919 unsigned dest; 2022 unsigned dest;
1920 2023
1921 dest = cpu_mask_to_apicid(tmp); 2024 dest = cpu_mask_to_apicid(tmp);
1922 2025
1923 high = HT_IRQ_HIGH_DEST_ID(dest); 2026 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
1924 2027
1925 low = HT_IRQ_LOW_BASE | 2028 msg.address_lo =
2029 HT_IRQ_LOW_BASE |
1926 HT_IRQ_LOW_DEST_ID(dest) | 2030 HT_IRQ_LOW_DEST_ID(dest) |
1927 HT_IRQ_LOW_VECTOR(vector) | 2031 HT_IRQ_LOW_VECTOR(vector) |
1928 ((INT_DEST_MODE == 0) ? 2032 ((INT_DEST_MODE == 0) ?
@@ -1931,12 +2035,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1931 HT_IRQ_LOW_RQEOI_EDGE | 2035 HT_IRQ_LOW_RQEOI_EDGE |
1932 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 2036 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1933 HT_IRQ_LOW_MT_FIXED : 2037 HT_IRQ_LOW_MT_FIXED :
1934 HT_IRQ_LOW_MT_ARBITRATED); 2038 HT_IRQ_LOW_MT_ARBITRATED) |
2039 HT_IRQ_LOW_IRQ_MASKED;
1935 2040
1936 write_ht_irq_low(irq, low); 2041 write_ht_irq_msg(irq, &msg);
1937 write_ht_irq_high(irq, high);
1938 2042
1939 set_irq_chip_and_handler(irq, &ht_irq_chip, handle_edge_irq); 2043 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2044 handle_edge_irq, "edge");
1940 } 2045 }
1941 return vector; 2046 return vector;
1942} 2047}
@@ -2044,7 +2149,15 @@ void __init setup_ioapic_dest(void)
2044 if (irq_entry == -1) 2149 if (irq_entry == -1)
2045 continue; 2150 continue;
2046 irq = pin_2_irq(irq_entry, ioapic, pin); 2151 irq = pin_2_irq(irq_entry, ioapic, pin);
2047 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2152
2153 /* setup_IO_APIC_irqs could fail to get vector for some device
2154 * when you have too many devices, because at that time only boot
2155 * cpu is online.
2156 */
2157 if(!irq_vector[irq])
2158 setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
2159 else
2160 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2048 } 2161 }
2049 2162
2050 } 2163 }
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index dff68eb2b787..0c06af6c13bc 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -75,7 +75,7 @@ int show_interrupts(struct seq_file *p, void *v)
75 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); 75 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
76#endif 76#endif
77 seq_printf(p, " %8s", irq_desc[i].chip->name); 77 seq_printf(p, " %8s", irq_desc[i].chip->name);
78 seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq)); 78 seq_printf(p, "-%-8s", irq_desc[i].name);
79 79
80 seq_printf(p, " %s", action->name); 80 seq_printf(p, " %s", action->name);
81 for (action=action->next; action; action = action->next) 81 for (action=action->next; action; action = action->next)
@@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
120 120
121 if (likely(irq < NR_IRQS)) 121 if (likely(irq < NR_IRQS))
122 generic_handle_irq(irq); 122 generic_handle_irq(irq);
123 else 123 else if (printk_ratelimit())
124 printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", 124 printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
125 __func__, smp_processor_id(), vector); 125 __func__, smp_processor_id(), vector);
126 126
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index ac241567e682..209c8c0bec71 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -224,7 +224,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
224void __kprobes arch_remove_kprobe(struct kprobe *p) 224void __kprobes arch_remove_kprobe(struct kprobe *p)
225{ 225{
226 mutex_lock(&kprobe_mutex); 226 mutex_lock(&kprobe_mutex);
227 free_insn_slot(p->ainsn.insn); 227 free_insn_slot(p->ainsn.insn, 0);
228 mutex_unlock(&kprobe_mutex); 228 mutex_unlock(&kprobe_mutex);
229} 229}
230 230
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index bbea88801d88..ac085038af29 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -306,8 +306,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
306 */ 306 */
307 307
308static int check_interval = 5 * 60; /* 5 minutes */ 308static int check_interval = 5 * 60; /* 5 minutes */
309static void mcheck_timer(void *data); 309static void mcheck_timer(struct work_struct *work);
310static DECLARE_WORK(mcheck_work, mcheck_timer, NULL); 310static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
311 311
312static void mcheck_check_cpu(void *info) 312static void mcheck_check_cpu(void *info)
313{ 313{
@@ -315,7 +315,7 @@ static void mcheck_check_cpu(void *info)
315 do_machine_check(NULL, 0); 315 do_machine_check(NULL, 0);
316} 316}
317 317
318static void mcheck_timer(void *data) 318static void mcheck_timer(struct work_struct *work)
319{ 319{
320 on_each_cpu(mcheck_check_cpu, NULL, 1, 1); 320 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
321 schedule_delayed_work(&mcheck_work, check_interval * HZ); 321 schedule_delayed_work(&mcheck_work, check_interval * HZ);
@@ -641,7 +641,6 @@ static __cpuinit int mce_create_device(unsigned int cpu)
641 return err; 641 return err;
642} 642}
643 643
644#ifdef CONFIG_HOTPLUG_CPU
645static void mce_remove_device(unsigned int cpu) 644static void mce_remove_device(unsigned int cpu)
646{ 645{
647 int i; 646 int i;
@@ -652,6 +651,7 @@ static void mce_remove_device(unsigned int cpu)
652 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); 651 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
653 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); 652 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
654 sysdev_unregister(&per_cpu(device_mce,cpu)); 653 sysdev_unregister(&per_cpu(device_mce,cpu));
654 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
655} 655}
656 656
657/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 657/* Get notified when a cpu comes on/off. Be hotplug friendly. */
@@ -674,7 +674,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
674static struct notifier_block mce_cpu_notifier = { 674static struct notifier_block mce_cpu_notifier = {
675 .notifier_call = mce_cpu_callback, 675 .notifier_call = mce_cpu_callback,
676}; 676};
677#endif
678 677
679static __init int mce_init_device(void) 678static __init int mce_init_device(void)
680{ 679{
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index 883fe747f64c..fa09debad4b7 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -551,7 +551,6 @@ out:
551 return err; 551 return err;
552} 552}
553 553
554#ifdef CONFIG_HOTPLUG_CPU
555/* 554/*
556 * let's be hotplug friendly. 555 * let's be hotplug friendly.
557 * in case of multiple core processors, the first core always takes ownership 556 * in case of multiple core processors, the first core always takes ownership
@@ -594,12 +593,14 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
594 593
595 sprintf(name, "threshold_bank%i", bank); 594 sprintf(name, "threshold_bank%i", bank);
596 595
596#ifdef CONFIG_SMP
597 /* sibling symlink */ 597 /* sibling symlink */
598 if (shared_bank[bank] && b->blocks->cpu != cpu) { 598 if (shared_bank[bank] && b->blocks->cpu != cpu) {
599 sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); 599 sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
600 per_cpu(threshold_banks, cpu)[bank] = NULL; 600 per_cpu(threshold_banks, cpu)[bank] = NULL;
601 return; 601 return;
602 } 602 }
603#endif
603 604
604 /* remove all sibling symlinks before unregistering */ 605 /* remove all sibling symlinks before unregistering */
605 for_each_cpu_mask(i, b->cpus) { 606 for_each_cpu_mask(i, b->cpus) {
@@ -656,7 +657,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
656static struct notifier_block threshold_cpu_notifier = { 657static struct notifier_block threshold_cpu_notifier = {
657 .notifier_call = threshold_cpu_callback, 658 .notifier_call = threshold_cpu_callback,
658}; 659};
659#endif /* CONFIG_HOTPLUG_CPU */
660 660
661static __init int threshold_init_device(void) 661static __init int threshold_init_device(void)
662{ 662{
diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c
index 9d0958ff547f..a888e67f5874 100644
--- a/arch/x86_64/kernel/module.c
+++ b/arch/x86_64/kernel/module.c
@@ -23,6 +23,7 @@
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/bug.h>
26 27
27#include <asm/system.h> 28#include <asm/system.h>
28#include <asm/page.h> 29#include <asm/page.h>
@@ -173,10 +174,12 @@ int module_finalize(const Elf_Ehdr *hdr,
173 lseg, lseg + locks->sh_size, 174 lseg, lseg + locks->sh_size,
174 tseg, tseg + text->sh_size); 175 tseg, tseg + text->sh_size);
175 } 176 }
176 return 0; 177
178 return module_bug_finalize(hdr, sechdrs, me);
177} 179}
178 180
179void module_arch_cleanup(struct module *mod) 181void module_arch_cleanup(struct module *mod)
180{ 182{
181 alternatives_smp_module_del(mod); 183 alternatives_smp_module_del(mod);
184 module_bug_cleanup(mod);
182} 185}
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index b147ab19fbd4..08072568847d 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -35,8 +35,6 @@
35int smp_found_config; 35int smp_found_config;
36unsigned int __initdata maxcpus = NR_CPUS; 36unsigned int __initdata maxcpus = NR_CPUS;
37 37
38int acpi_found_madt;
39
40/* 38/*
41 * Various Linux-internal data structures created from the 39 * Various Linux-internal data structures created from the
42 * MP-table. 40 * MP-table.
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 7af9cb3e2d99..186aebbae32d 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -12,14 +12,15 @@
12 * Mikael Pettersson : PM converted to driver model. Disable/enable API. 12 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
13 */ 13 */
14 14
15#include <linux/nmi.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
16#include <linux/delay.h> 17#include <linux/delay.h>
17#include <linux/interrupt.h> 18#include <linux/interrupt.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/sysdev.h> 20#include <linux/sysdev.h>
20#include <linux/nmi.h>
21#include <linux/sysctl.h> 21#include <linux/sysctl.h>
22#include <linux/kprobes.h> 22#include <linux/kprobes.h>
23#include <linux/cpumask.h>
23 24
24#include <asm/smp.h> 25#include <asm/smp.h>
25#include <asm/nmi.h> 26#include <asm/nmi.h>
@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi;
41static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); 42static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
42static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); 43static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
43 44
45static cpumask_t backtrace_mask = CPU_MASK_NONE;
46
44/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 47/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) 48 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
46 */ 49 */
@@ -190,6 +193,8 @@ void nmi_watchdog_default(void)
190 nmi_watchdog = NMI_IO_APIC; 193 nmi_watchdog = NMI_IO_APIC;
191} 194}
192 195
196static int endflag __initdata = 0;
197
193#ifdef CONFIG_SMP 198#ifdef CONFIG_SMP
194/* The performance counters used by NMI_LOCAL_APIC don't trigger when 199/* The performance counters used by NMI_LOCAL_APIC don't trigger when
195 * the CPU is idle. To make sure the NMI watchdog really ticks on all 200 * the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -197,7 +202,6 @@ void nmi_watchdog_default(void)
197 */ 202 */
198static __init void nmi_cpu_busy(void *data) 203static __init void nmi_cpu_busy(void *data)
199{ 204{
200 volatile int *endflag = data;
201 local_irq_enable_in_hardirq(); 205 local_irq_enable_in_hardirq();
202 /* Intentionally don't use cpu_relax here. This is 206 /* Intentionally don't use cpu_relax here. This is
203 to make sure that the performance counter really ticks, 207 to make sure that the performance counter really ticks,
@@ -205,14 +209,13 @@ static __init void nmi_cpu_busy(void *data)
205 pause instruction. On a real HT machine this is fine because 209 pause instruction. On a real HT machine this is fine because
206 all other CPUs are busy with "useless" delay loops and don't 210 all other CPUs are busy with "useless" delay loops and don't
207 care if they get somewhat less cycles. */ 211 care if they get somewhat less cycles. */
208 while (*endflag == 0) 212 while (endflag == 0)
209 barrier(); 213 mb();
210} 214}
211#endif 215#endif
212 216
213int __init check_nmi_watchdog (void) 217int __init check_nmi_watchdog (void)
214{ 218{
215 volatile int endflag = 0;
216 int *counts; 219 int *counts;
217 int cpu; 220 int cpu;
218 221
@@ -253,6 +256,7 @@ int __init check_nmi_watchdog (void)
253 if (!atomic_read(&nmi_active)) { 256 if (!atomic_read(&nmi_active)) {
254 kfree(counts); 257 kfree(counts);
255 atomic_set(&nmi_active, -1); 258 atomic_set(&nmi_active, -1);
259 endflag = 1;
256 return -1; 260 return -1;
257 } 261 }
258 endflag = 1; 262 endflag = 1;
@@ -782,6 +786,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
782{ 786{
783 int sum; 787 int sum;
784 int touched = 0; 788 int touched = 0;
789 int cpu = smp_processor_id();
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 790 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 dummy; 791 u64 dummy;
787 int rc=0; 792 int rc=0;
@@ -799,6 +804,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
799 touched = 1; 804 touched = 1;
800 } 805 }
801 806
807 if (cpu_isset(cpu, backtrace_mask)) {
808 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
809
810 spin_lock(&lock);
811 printk("NMI backtrace for cpu %d\n", cpu);
812 dump_stack();
813 spin_unlock(&lock);
814 cpu_clear(cpu, backtrace_mask);
815 }
816
802#ifdef CONFIG_X86_MCE 817#ifdef CONFIG_X86_MCE
803 /* Could check oops_in_progress here too, but it's safer 818 /* Could check oops_in_progress here too, but it's safer
804 not too */ 819 not too */
@@ -931,6 +946,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
931 946
932#endif 947#endif
933 948
949void __trigger_all_cpu_backtrace(void)
950{
951 int i;
952
953 backtrace_mask = cpu_online_map;
954 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
955 for (i = 0; i < 10 * 1000; i++) {
956 if (cpus_empty(backtrace_mask))
957 break;
958 mdelay(1);
959 }
960}
961
934EXPORT_SYMBOL(nmi_active); 962EXPORT_SYMBOL(nmi_active);
935EXPORT_SYMBOL(nmi_watchdog); 963EXPORT_SYMBOL(nmi_watchdog);
936EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); 964EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index b3296cc2f2f2..3215675ab128 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -41,6 +41,13 @@
41#include <asm/pci-direct.h> 41#include <asm/pci-direct.h>
42#include <asm/system.h> 42#include <asm/system.h>
43#include <asm/dma.h> 43#include <asm/dma.h>
44#include <asm/rio.h>
45
46#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
47int use_calgary __read_mostly = 1;
48#else
49int use_calgary __read_mostly = 0;
50#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
44 51
45#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 52#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
46#define PCI_VENDOR_DEVICE_ID_CALGARY \ 53#define PCI_VENDOR_DEVICE_ID_CALGARY \
@@ -52,7 +59,8 @@
52#define ONE_BASED_CHASSIS_NUM 1 59#define ONE_BASED_CHASSIS_NUM 1
53 60
54/* register offsets inside the host bridge space */ 61/* register offsets inside the host bridge space */
55#define PHB_CSR_OFFSET 0x0110 62#define CALGARY_CONFIG_REG 0x0108
63#define PHB_CSR_OFFSET 0x0110 /* Channel Status */
56#define PHB_PLSSR_OFFSET 0x0120 64#define PHB_PLSSR_OFFSET 0x0120
57#define PHB_CONFIG_RW_OFFSET 0x0160 65#define PHB_CONFIG_RW_OFFSET 0x0160
58#define PHB_IOBASE_BAR_LOW 0x0170 66#define PHB_IOBASE_BAR_LOW 0x0170
@@ -83,6 +91,8 @@
83#define TAR_VALID 0x0000000000000008UL 91#define TAR_VALID 0x0000000000000008UL
84/* CSR (Channel/DMA Status Register) */ 92/* CSR (Channel/DMA Status Register) */
85#define CSR_AGENT_MASK 0xffe0ffff 93#define CSR_AGENT_MASK 0xffe0ffff
94/* CCR (Calgary Configuration Register) */
95#define CCR_2SEC_TIMEOUT 0x000000000000000EUL
86 96
87#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */ 97#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
88#define MAX_NUM_CHASSIS 8 /* max number of chassis */ 98#define MAX_NUM_CHASSIS 8 /* max number of chassis */
@@ -112,14 +122,35 @@ static const unsigned long phb_offsets[] = {
112 0xB000 /* PHB3 */ 122 0xB000 /* PHB3 */
113}; 123};
114 124
125/* PHB debug registers */
126
127static const unsigned long phb_debug_offsets[] = {
128 0x4000 /* PHB 0 DEBUG */,
129 0x5000 /* PHB 1 DEBUG */,
130 0x6000 /* PHB 2 DEBUG */,
131 0x7000 /* PHB 3 DEBUG */
132};
133
134/*
135 * STUFF register for each debug PHB,
136 * byte 1 = start bus number, byte 2 = end bus number
137 */
138
139#define PHB_DEBUG_STUFF_OFFSET 0x0020
140
115unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; 141unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
116static int translate_empty_slots __read_mostly = 0; 142static int translate_empty_slots __read_mostly = 0;
117static int calgary_detected __read_mostly = 0; 143static int calgary_detected __read_mostly = 0;
118 144
145static struct rio_table_hdr *rio_table_hdr __initdata;
146static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
147static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata;
148
119struct calgary_bus_info { 149struct calgary_bus_info {
120 void *tce_space; 150 void *tce_space;
121 unsigned char translation_disabled; 151 unsigned char translation_disabled;
122 signed char phbid; 152 signed char phbid;
153 void __iomem *bbar;
123}; 154};
124 155
125static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; 156static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
@@ -472,6 +503,11 @@ static struct dma_mapping_ops calgary_dma_ops = {
472 .unmap_sg = calgary_unmap_sg, 503 .unmap_sg = calgary_unmap_sg,
473}; 504};
474 505
506static inline void __iomem * busno_to_bbar(unsigned char num)
507{
508 return bus_info[num].bbar;
509}
510
475static inline int busno_to_phbid(unsigned char num) 511static inline int busno_to_phbid(unsigned char num)
476{ 512{
477 return bus_info[num].phbid; 513 return bus_info[num].phbid;
@@ -617,14 +653,9 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
617static void __init calgary_reserve_regions(struct pci_dev *dev) 653static void __init calgary_reserve_regions(struct pci_dev *dev)
618{ 654{
619 unsigned int npages; 655 unsigned int npages;
620 void __iomem *bbar;
621 unsigned char busnum;
622 u64 start; 656 u64 start;
623 struct iommu_table *tbl = dev->sysdata; 657 struct iommu_table *tbl = dev->sysdata;
624 658
625 bbar = tbl->bbar;
626 busnum = dev->bus->number;
627
628 /* reserve bad_dma_address in case it's a legal address */ 659 /* reserve bad_dma_address in case it's a legal address */
629 iommu_range_reserve(tbl, bad_dma_address, 1); 660 iommu_range_reserve(tbl, bad_dma_address, 1);
630 661
@@ -732,6 +763,38 @@ static void calgary_watchdog(unsigned long data)
732 } 763 }
733} 764}
734 765
766static void __init calgary_increase_split_completion_timeout(void __iomem *bbar,
767 unsigned char busnum)
768{
769 u64 val64;
770 void __iomem *target;
771 unsigned int phb_shift = ~0; /* silence gcc */
772 u64 mask;
773
774 switch (busno_to_phbid(busnum)) {
775 case 0: phb_shift = (63 - 19);
776 break;
777 case 1: phb_shift = (63 - 23);
778 break;
779 case 2: phb_shift = (63 - 27);
780 break;
781 case 3: phb_shift = (63 - 35);
782 break;
783 default:
784 BUG_ON(busno_to_phbid(busnum));
785 }
786
787 target = calgary_reg(bbar, CALGARY_CONFIG_REG);
788 val64 = be64_to_cpu(readq(target));
789
790 /* zero out this PHB's timer bits */
791 mask = ~(0xFUL << phb_shift);
792 val64 &= mask;
793 val64 |= (CCR_2SEC_TIMEOUT << phb_shift);
794 writeq(cpu_to_be64(val64), target);
795 readq(target); /* flush */
796}
797
735static void __init calgary_enable_translation(struct pci_dev *dev) 798static void __init calgary_enable_translation(struct pci_dev *dev)
736{ 799{
737 u32 val32; 800 u32 val32;
@@ -756,6 +819,13 @@ static void __init calgary_enable_translation(struct pci_dev *dev)
756 writel(cpu_to_be32(val32), target); 819 writel(cpu_to_be32(val32), target);
757 readl(target); /* flush */ 820 readl(target); /* flush */
758 821
822 /*
823 * Give split completion a longer timeout on bus 1 for aic94xx
824 * http://bugzilla.kernel.org/show_bug.cgi?id=7180
825 */
826 if (busnum == 1)
827 calgary_increase_split_completion_timeout(bbar, busnum);
828
759 init_timer(&tbl->watchdog_timer); 829 init_timer(&tbl->watchdog_timer);
760 tbl->watchdog_timer.function = &calgary_watchdog; 830 tbl->watchdog_timer.function = &calgary_watchdog;
761 tbl->watchdog_timer.data = (unsigned long)dev; 831 tbl->watchdog_timer.data = (unsigned long)dev;
@@ -786,33 +856,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev)
786 del_timer_sync(&tbl->watchdog_timer); 856 del_timer_sync(&tbl->watchdog_timer);
787} 857}
788 858
789static inline unsigned int __init locate_register_space(struct pci_dev *dev)
790{
791 int rionodeid;
792 u32 address;
793
794 /*
795 * Each Calgary has four busses. The first four busses (first Calgary)
796 * have RIO node ID 2, then the next four (second Calgary) have RIO
797 * node ID 3, the next four (third Calgary) have node ID 2 again, etc.
798 * We use a gross hack - relying on the dev->bus->number ordering,
799 * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1,
800 * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the
801 * second (id 3), and then it repeats modulo 14.
802 */
803 rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2;
804 /*
805 * register space address calculation as follows:
806 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
807 * ChassisBase is always zero for x366/x260/x460
808 * RioNodeId is 2 for first Calgary, 3 for second Calgary
809 */
810 address = START_ADDRESS -
811 (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) +
812 (0x100000) * (rionodeid - CHASSIS_BASE);
813 return address;
814}
815
816static void __init calgary_init_one_nontraslated(struct pci_dev *dev) 859static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
817{ 860{
818 pci_dev_get(dev); 861 pci_dev_get(dev);
@@ -822,23 +865,15 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
822 865
823static int __init calgary_init_one(struct pci_dev *dev) 866static int __init calgary_init_one(struct pci_dev *dev)
824{ 867{
825 u32 address;
826 void __iomem *bbar; 868 void __iomem *bbar;
827 int ret; 869 int ret;
828 870
829 BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); 871 BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
830 872
831 address = locate_register_space(dev); 873 bbar = busno_to_bbar(dev->bus->number);
832 /* map entire 1MB of Calgary config space */
833 bbar = ioremap_nocache(address, 1024 * 1024);
834 if (!bbar) {
835 ret = -ENODATA;
836 goto done;
837 }
838
839 ret = calgary_setup_tar(dev, bbar); 874 ret = calgary_setup_tar(dev, bbar);
840 if (ret) 875 if (ret)
841 goto iounmap; 876 goto done;
842 877
843 pci_dev_get(dev); 878 pci_dev_get(dev);
844 dev->bus->self = dev; 879 dev->bus->self = dev;
@@ -846,17 +881,66 @@ static int __init calgary_init_one(struct pci_dev *dev)
846 881
847 return 0; 882 return 0;
848 883
849iounmap:
850 iounmap(bbar);
851done: 884done:
852 return ret; 885 return ret;
853} 886}
854 887
888static int __init calgary_locate_bbars(void)
889{
890 int ret;
891 int rioidx, phb, bus;
892 void __iomem *bbar;
893 void __iomem *target;
894 unsigned long offset;
895 u8 start_bus, end_bus;
896 u32 val;
897
898 ret = -ENODATA;
899 for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) {
900 struct rio_detail *rio = rio_devs[rioidx];
901
902 if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY))
903 continue;
904
905 /* map entire 1MB of Calgary config space */
906 bbar = ioremap_nocache(rio->BBAR, 1024 * 1024);
907 if (!bbar)
908 goto error;
909
910 for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
911 offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET;
912 target = calgary_reg(bbar, offset);
913
914 val = be32_to_cpu(readl(target));
915 start_bus = (u8)((val & 0x00FF0000) >> 16);
916 end_bus = (u8)((val & 0x0000FF00) >> 8);
917 for (bus = start_bus; bus <= end_bus; bus++) {
918 bus_info[bus].bbar = bbar;
919 bus_info[bus].phbid = phb;
920 }
921 }
922 }
923
924 return 0;
925
926error:
927 /* scan bus_info and iounmap any bbars we previously ioremap'd */
928 for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++)
929 if (bus_info[bus].bbar)
930 iounmap(bus_info[bus].bbar);
931
932 return ret;
933}
934
855static int __init calgary_init(void) 935static int __init calgary_init(void)
856{ 936{
857 int ret = -ENODEV; 937 int ret;
858 struct pci_dev *dev = NULL; 938 struct pci_dev *dev = NULL;
859 939
940 ret = calgary_locate_bbars();
941 if (ret)
942 return ret;
943
860 do { 944 do {
861 dev = pci_get_device(PCI_VENDOR_ID_IBM, 945 dev = pci_get_device(PCI_VENDOR_ID_IBM,
862 PCI_DEVICE_ID_IBM_CALGARY, 946 PCI_DEVICE_ID_IBM_CALGARY,
@@ -879,7 +963,7 @@ static int __init calgary_init(void)
879 963
880error: 964error:
881 do { 965 do {
882 dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, 966 dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM,
883 PCI_DEVICE_ID_IBM_CALGARY, 967 PCI_DEVICE_ID_IBM_CALGARY,
884 dev); 968 dev);
885 if (!dev) 969 if (!dev)
@@ -920,13 +1004,56 @@ static inline int __init determine_tce_table_size(u64 ram)
920 return ret; 1004 return ret;
921} 1005}
922 1006
1007static int __init build_detail_arrays(void)
1008{
1009 unsigned long ptr;
1010 int i, scal_detail_size, rio_detail_size;
1011
1012 if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
1013 printk(KERN_WARNING
1014 "Calgary: MAX_NUMNODES too low! Defined as %d, "
1015 "but system has %d nodes.\n",
1016 MAX_NUMNODES, rio_table_hdr->num_scal_dev);
1017 return -ENODEV;
1018 }
1019
1020 switch (rio_table_hdr->version){
1021 case 2:
1022 scal_detail_size = 11;
1023 rio_detail_size = 13;
1024 break;
1025 case 3:
1026 scal_detail_size = 12;
1027 rio_detail_size = 15;
1028 break;
1029 default:
1030 printk(KERN_WARNING
1031 "Calgary: Invalid Rio Grande Table Version: %d\n",
1032 rio_table_hdr->version);
1033 return -EPROTO;
1034 }
1035
1036 ptr = ((unsigned long)rio_table_hdr) + 3;
1037 for (i = 0; i < rio_table_hdr->num_scal_dev;
1038 i++, ptr += scal_detail_size)
1039 scal_devs[i] = (struct scal_detail *)ptr;
1040
1041 for (i = 0; i < rio_table_hdr->num_rio_dev;
1042 i++, ptr += rio_detail_size)
1043 rio_devs[i] = (struct rio_detail *)ptr;
1044
1045 return 0;
1046}
1047
923void __init detect_calgary(void) 1048void __init detect_calgary(void)
924{ 1049{
925 u32 val; 1050 u32 val;
926 int bus; 1051 int bus;
927 void *tbl; 1052 void *tbl;
928 int calgary_found = 0; 1053 int calgary_found = 0;
929 int phb = -1; 1054 unsigned long ptr;
1055 int offset;
1056 int ret;
930 1057
931 /* 1058 /*
932 * if the user specified iommu=off or iommu=soft or we found 1059 * if the user specified iommu=off or iommu=soft or we found
@@ -935,25 +1062,47 @@ void __init detect_calgary(void)
935 if (swiotlb || no_iommu || iommu_detected) 1062 if (swiotlb || no_iommu || iommu_detected)
936 return; 1063 return;
937 1064
1065 if (!use_calgary)
1066 return;
1067
938 if (!early_pci_allowed()) 1068 if (!early_pci_allowed())
939 return; 1069 return;
940 1070
1071 ptr = (unsigned long)phys_to_virt(get_bios_ebda());
1072
1073 rio_table_hdr = NULL;
1074 offset = 0x180;
1075 while (offset) {
1076 /* The block id is stored in the 2nd word */
1077 if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
1078 /* set the pointer past the offset & block id */
1079 rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
1080 break;
1081 }
1082 /* The next offset is stored in the 1st word. 0 means no more */
1083 offset = *((unsigned short *)(ptr + offset));
1084 }
1085 if (!rio_table_hdr) {
1086 printk(KERN_ERR "Calgary: Unable to locate "
1087 "Rio Grande Table in EBDA - bailing!\n");
1088 return;
1089 }
1090
1091 ret = build_detail_arrays();
1092 if (ret) {
1093 printk(KERN_ERR "Calgary: build_detail_arrays ret %d\n", ret);
1094 return;
1095 }
1096
941 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); 1097 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
942 1098
943 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { 1099 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
944 int dev; 1100 int dev;
945 struct calgary_bus_info *info = &bus_info[bus]; 1101 struct calgary_bus_info *info = &bus_info[bus];
946 info->phbid = -1;
947 1102
948 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) 1103 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
949 continue; 1104 continue;
950 1105
951 /*
952 * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3)
953 * it is connected to releative to the clagary chip.
954 */
955 phb = (phb + 1) % PHBS_PER_CALGARY;
956
957 if (info->translation_disabled) 1106 if (info->translation_disabled)
958 continue; 1107 continue;
959 1108
@@ -968,7 +1117,6 @@ void __init detect_calgary(void)
968 if (!tbl) 1117 if (!tbl)
969 goto cleanup; 1118 goto cleanup;
970 info->tce_space = tbl; 1119 info->tce_space = tbl;
971 info->phbid = phb;
972 calgary_found = 1; 1120 calgary_found = 1;
973 break; 1121 break;
974 } 1122 }
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index f8d857453f8a..683b7a5c1ab3 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -296,6 +296,11 @@ __init int iommu_setup(char *p)
296 gart_parse_options(p); 296 gart_parse_options(p);
297#endif 297#endif
298 298
299#ifdef CONFIG_CALGARY_IOMMU
300 if (!strncmp(p, "calgary", 7))
301 use_calgary = 1;
302#endif /* CONFIG_CALGARY_IOMMU */
303
299 p += strcspn(p, ","); 304 p += strcspn(p, ",");
300 if (*p == ',') 305 if (*p == ',')
301 ++p; 306 ++p;
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 16261a8a3303..fc1960f1f243 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -601,10 +601,9 @@ void __init gart_iommu_init(void)
601 (!force_iommu && end_pfn <= MAX_DMA32_PFN) || 601 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
602 !iommu_aperture || 602 !iommu_aperture ||
603 (no_agp && init_k8_gatt(&info) < 0)) { 603 (no_agp && init_k8_gatt(&info) < 0)) {
604 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
605 if (end_pfn > MAX_DMA32_PFN) { 604 if (end_pfn > MAX_DMA32_PFN) {
606 printk(KERN_ERR "WARNING more than 4GB of memory " 605 printk(KERN_ERR "WARNING more than 4GB of memory "
607 "but IOMMU not available.\n" 606 "but GART IOMMU not available.\n"
608 KERN_ERR "WARNING 32bit PCI may malfunction.\n"); 607 KERN_ERR "WARNING 32bit PCI may malfunction.\n");
609 } 608 }
610 return; 609 return;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 49f7fac6229e..a418ee4c8c62 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -88,9 +88,8 @@ void enter_idle(void)
88 88
89static void __exit_idle(void) 89static void __exit_idle(void)
90{ 90{
91 if (read_pda(isidle) == 0) 91 if (test_and_clear_bit_pda(0, isidle) == 0)
92 return; 92 return;
93 write_pda(isidle, 0);
94 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 93 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95} 94}
96 95
@@ -109,17 +108,15 @@ void exit_idle(void)
109 */ 108 */
110static void default_idle(void) 109static void default_idle(void)
111{ 110{
112 local_irq_enable();
113
114 current_thread_info()->status &= ~TS_POLLING; 111 current_thread_info()->status &= ~TS_POLLING;
115 smp_mb__after_clear_bit(); 112 smp_mb__after_clear_bit();
116 while (!need_resched()) { 113 local_irq_disable();
117 local_irq_disable(); 114 if (!need_resched()) {
118 if (!need_resched()) 115 /* Enables interrupts one instruction before HLT.
119 safe_halt(); 116 x86 special cases this so there is no race. */
120 else 117 safe_halt();
121 local_irq_enable(); 118 } else
122 } 119 local_irq_enable();
123 current_thread_info()->status |= TS_POLLING; 120 current_thread_info()->status |= TS_POLLING;
124} 121}
125 122
@@ -131,21 +128,13 @@ static void default_idle(void)
131static void poll_idle (void) 128static void poll_idle (void)
132{ 129{
133 local_irq_enable(); 130 local_irq_enable();
134 131 cpu_relax();
135 asm volatile(
136 "2:"
137 "testl %0,%1;"
138 "rep; nop;"
139 "je 2b;"
140 : :
141 "i" (_TIF_NEED_RESCHED),
142 "m" (current_thread_info()->flags));
143} 132}
144 133
145void cpu_idle_wait(void) 134void cpu_idle_wait(void)
146{ 135{
147 unsigned int cpu, this_cpu = get_cpu(); 136 unsigned int cpu, this_cpu = get_cpu();
148 cpumask_t map; 137 cpumask_t map, tmp = current->cpus_allowed;
149 138
150 set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); 139 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
151 put_cpu(); 140 put_cpu();
@@ -168,6 +157,8 @@ void cpu_idle_wait(void)
168 } 157 }
169 cpus_and(map, map, cpu_online_map); 158 cpus_and(map, map, cpu_online_map);
170 } while (!cpus_empty(map)); 159 } while (!cpus_empty(map));
160
161 set_cpus_allowed(current, tmp);
171} 162}
172EXPORT_SYMBOL_GPL(cpu_idle_wait); 163EXPORT_SYMBOL_GPL(cpu_idle_wait);
173 164
@@ -218,6 +209,12 @@ void cpu_idle (void)
218 idle = default_idle; 209 idle = default_idle;
219 if (cpu_is_offline(smp_processor_id())) 210 if (cpu_is_offline(smp_processor_id()))
220 play_dead(); 211 play_dead();
212 /*
213 * Idle routines should keep interrupts disabled
214 * from here on, until they go to idle.
215 * Otherwise, idle callbacks can misfire.
216 */
217 local_irq_disable();
221 enter_idle(); 218 enter_idle();
222 idle(); 219 idle();
223 /* In many cases the interrupt that ended idle 220 /* In many cases the interrupt that ended idle
@@ -255,9 +252,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
255/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 252/* Default MONITOR/MWAIT with no hints, used for default C1 state */
256static void mwait_idle(void) 253static void mwait_idle(void)
257{ 254{
258 local_irq_enable(); 255 if (!need_resched()) {
259 while (!need_resched()) 256 __monitor((void *)&current_thread_info()->flags, 0, 0);
260 mwait_idle_with_hints(0,0); 257 smp_mb();
258 if (!need_resched())
259 __sti_mwait(0, 0);
260 else
261 local_irq_enable();
262 } else {
263 local_irq_enable();
264 }
261} 265}
262 266
263void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 267void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index fc944b5e8f4a..af425a8049fb 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -471,8 +471,7 @@ void __init setup_arch(char **cmdline_p)
471 if (LOADER_TYPE && INITRD_START) { 471 if (LOADER_TYPE && INITRD_START) {
472 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { 472 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
473 reserve_bootmem_generic(INITRD_START, INITRD_SIZE); 473 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
474 initrd_start = 474 initrd_start = INITRD_START + PAGE_OFFSET;
475 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
476 initrd_end = initrd_start+INITRD_SIZE; 475 initrd_end = initrd_start+INITRD_SIZE;
477 } 476 }
478 else { 477 else {
@@ -732,11 +731,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
732 /* Fix cpuid4 emulation for more */ 731 /* Fix cpuid4 emulation for more */
733 num_cache_leaves = 3; 732 num_cache_leaves = 3;
734 733
735 /* When there is only one core no need to synchronize RDTSC */ 734 /* RDTSC can be speculated around */
736 if (num_possible_cpus() == 1) 735 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
737 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
738 else
739 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
740} 736}
741 737
742static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 738static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -835,6 +831,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
835 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); 831 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
836 } 832 }
837 833
834 if (cpu_has_ds) {
835 unsigned int l1, l2;
836 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
837 if (!(l1 & (1<<11)))
838 set_bit(X86_FEATURE_BTS, c->x86_capability);
839 if (!(l1 & (1<<12)))
840 set_bit(X86_FEATURE_PEBS, c->x86_capability);
841 }
842
838 n = c->extended_cpuid_level; 843 n = c->extended_cpuid_level;
839 if (n >= 0x80000008) { 844 if (n >= 0x80000008) {
840 unsigned eax = cpuid_eax(0x80000008); 845 unsigned eax = cpuid_eax(0x80000008);
@@ -854,7 +859,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
854 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 859 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
855 if (c->x86 == 6) 860 if (c->x86 == 6)
856 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); 861 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
857 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); 862 if (c->x86 == 15)
863 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
864 else
865 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
858 c->x86_max_cores = intel_num_cpu_cores(c); 866 c->x86_max_cores = intel_num_cpu_cores(c);
859 867
860 srat_detect_node(); 868 srat_detect_node();
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 4f67697f5036..af1ec4d23cf8 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -376,16 +376,20 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
376 /* prevent preemption and reschedule on another processor */ 376 /* prevent preemption and reschedule on another processor */
377 int me = get_cpu(); 377 int me = get_cpu();
378 if (cpu == me) { 378 if (cpu == me) {
379 WARN_ON(1);
380 put_cpu(); 379 put_cpu();
381 return -EBUSY; 380 return 0;
382 } 381 }
382
383 /* Can deadlock when called with interrupts disabled */
384 WARN_ON(irqs_disabled());
385
383 spin_lock_bh(&call_lock); 386 spin_lock_bh(&call_lock);
384 __smp_call_function_single(cpu, func, info, nonatomic, wait); 387 __smp_call_function_single(cpu, func, info, nonatomic, wait);
385 spin_unlock_bh(&call_lock); 388 spin_unlock_bh(&call_lock);
386 put_cpu(); 389 put_cpu();
387 return 0; 390 return 0;
388} 391}
392EXPORT_SYMBOL(smp_call_function_single);
389 393
390/* 394/*
391 * this function sends a 'generic call function' IPI to all other CPUs 395 * this function sends a 'generic call function' IPI to all other CPUs
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 7b7a6870288a..daf19332f0dd 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,6 +60,7 @@
60#include <asm/irq.h> 60#include <asm/irq.h>
61#include <asm/hw_irq.h> 61#include <asm/hw_irq.h>
62#include <asm/numa.h> 62#include <asm/numa.h>
63#include <asm/genapic.h>
63 64
64/* Number of siblings per CPU package */ 65/* Number of siblings per CPU package */
65int smp_num_siblings = 1; 66int smp_num_siblings = 1;
@@ -581,12 +582,16 @@ void __cpuinit start_secondary(void)
581 * smp_call_function(). 582 * smp_call_function().
582 */ 583 */
583 lock_ipi_call_lock(); 584 lock_ipi_call_lock();
585 spin_lock(&vector_lock);
584 586
587 /* Setup the per cpu irq handling data structures */
588 __setup_vector_irq(smp_processor_id());
585 /* 589 /*
586 * Allow the master to continue. 590 * Allow the master to continue.
587 */ 591 */
588 cpu_set(smp_processor_id(), cpu_online_map); 592 cpu_set(smp_processor_id(), cpu_online_map);
589 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 593 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
594 spin_unlock(&vector_lock);
590 unlock_ipi_call_lock(); 595 unlock_ipi_call_lock();
591 596
592 cpu_idle(); 597 cpu_idle();
@@ -749,14 +754,16 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
749} 754}
750 755
751struct create_idle { 756struct create_idle {
757 struct work_struct work;
752 struct task_struct *idle; 758 struct task_struct *idle;
753 struct completion done; 759 struct completion done;
754 int cpu; 760 int cpu;
755}; 761};
756 762
757void do_fork_idle(void *_c_idle) 763void do_fork_idle(struct work_struct *work)
758{ 764{
759 struct create_idle *c_idle = _c_idle; 765 struct create_idle *c_idle =
766 container_of(work, struct create_idle, work);
760 767
761 c_idle->idle = fork_idle(c_idle->cpu); 768 c_idle->idle = fork_idle(c_idle->cpu);
762 complete(&c_idle->done); 769 complete(&c_idle->done);
@@ -771,10 +778,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
771 int timeout; 778 int timeout;
772 unsigned long start_rip; 779 unsigned long start_rip;
773 struct create_idle c_idle = { 780 struct create_idle c_idle = {
781 .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
774 .cpu = cpu, 782 .cpu = cpu,
775 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 783 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
776 }; 784 };
777 DECLARE_WORK(work, do_fork_idle, &c_idle);
778 785
779 /* allocate memory for gdts of secondary cpus. Hotplug is considered */ 786 /* allocate memory for gdts of secondary cpus. Hotplug is considered */
780 if (!cpu_gdt_descr[cpu].address && 787 if (!cpu_gdt_descr[cpu].address &&
@@ -799,7 +806,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
799 cpu, node); 806 cpu, node);
800 } 807 }
801 808
802
803 alternatives_smp_switch(1); 809 alternatives_smp_switch(1);
804 810
805 c_idle.idle = get_idle_for_cpu(cpu); 811 c_idle.idle = get_idle_for_cpu(cpu);
@@ -822,9 +828,9 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
822 * thread. 828 * thread.
823 */ 829 */
824 if (!keventd_up() || current_is_keventd()) 830 if (!keventd_up() || current_is_keventd())
825 work.func(work.data); 831 c_idle.work.func(&c_idle.work);
826 else { 832 else {
827 schedule_work(&work); 833 schedule_work(&c_idle.work);
828 wait_for_completion(&c_idle.done); 834 wait_for_completion(&c_idle.done);
829 } 835 }
830 836
@@ -1164,6 +1170,13 @@ int __cpuinit __cpu_up(unsigned int cpu)
1164 1170
1165 while (!cpu_isset(cpu, cpu_online_map)) 1171 while (!cpu_isset(cpu, cpu_online_map))
1166 cpu_relax(); 1172 cpu_relax();
1173
1174 if (num_online_cpus() > 8 && genapic == &apic_flat) {
1175 printk(KERN_WARNING
1176 "flat APIC routing can't be used with > 8 cpus\n");
1177 BUG();
1178 }
1179
1167 err = 0; 1180 err = 0;
1168 1181
1169 return err; 1182 return err;
@@ -1246,8 +1259,10 @@ int __cpu_disable(void)
1246 local_irq_disable(); 1259 local_irq_disable();
1247 remove_siblinginfo(cpu); 1260 remove_siblinginfo(cpu);
1248 1261
1262 spin_lock(&vector_lock);
1249 /* It's now safe to remove this processor from the online map */ 1263 /* It's now safe to remove this processor from the online map */
1250 cpu_clear(cpu, cpu_online_map); 1264 cpu_clear(cpu, cpu_online_map);
1265 spin_unlock(&vector_lock);
1251 remove_cpu_from_maps(); 1266 remove_cpu_from_maps();
1252 fixup_irqs(cpu_online_map); 1267 fixup_irqs(cpu_online_map);
1253 return 0; 1268 return 0;
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 1ba5a442ac32..9f05bc9b2dad 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -563,7 +563,7 @@ static unsigned int cpufreq_delayed_issched = 0;
563static unsigned int cpufreq_init = 0; 563static unsigned int cpufreq_init = 0;
564static struct work_struct cpufreq_delayed_get_work; 564static struct work_struct cpufreq_delayed_get_work;
565 565
566static void handle_cpufreq_delayed_get(void *v) 566static void handle_cpufreq_delayed_get(struct work_struct *v)
567{ 567{
568 unsigned int cpu; 568 unsigned int cpu;
569 for_each_online_cpu(cpu) { 569 for_each_online_cpu(cpu) {
@@ -639,7 +639,7 @@ static struct notifier_block time_cpufreq_notifier_block = {
639 639
640static int __init cpufreq_tsc(void) 640static int __init cpufreq_tsc(void)
641{ 641{
642 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); 642 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
643 if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, 643 if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
644 CPUFREQ_TRANSITION_NOTIFIER)) 644 CPUFREQ_TRANSITION_NOTIFIER))
645 cpufreq_init = 1; 645 cpufreq_init = 1;
@@ -876,15 +876,6 @@ static struct irqaction irq0 = {
876 timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL 876 timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
877}; 877};
878 878
879static int __cpuinit
880time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
881{
882 unsigned cpu = (unsigned long) hcpu;
883 if (action == CPU_ONLINE)
884 vsyscall_set_cpu(cpu);
885 return NOTIFY_DONE;
886}
887
888void __init time_init(void) 879void __init time_init(void)
889{ 880{
890 if (nohpet) 881 if (nohpet)
@@ -925,8 +916,6 @@ void __init time_init(void)
925 vxtime.last_tsc = get_cycles_sync(); 916 vxtime.last_tsc = get_cycles_sync();
926 set_cyc2ns_scale(cpu_khz); 917 set_cyc2ns_scale(cpu_khz);
927 setup_irq(0, &irq0); 918 setup_irq(0, &irq0);
928 hotcpu_notifier(time_cpu_notifier, 0);
929 time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id());
930 919
931#ifndef CONFIG_SMP 920#ifndef CONFIG_SMP
932 time_init_gtod(); 921 time_init_gtod();
@@ -948,7 +937,7 @@ __cpuinit int unsynchronized_tsc(void)
948 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { 937 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
949#ifdef CONFIG_ACPI 938#ifdef CONFIG_ACPI
950 /* But TSC doesn't tick in C3 so don't use it there */ 939 /* But TSC doesn't tick in C3 so don't use it there */
951 if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 100) 940 if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 1000)
952 return 1; 941 return 1;
953#endif 942#endif
954 return 0; 943 return 0;
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 7819022a8db5..b54ccc07f379 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -30,9 +30,10 @@
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/kexec.h> 31#include <linux/kexec.h>
32#include <linux/unwind.h> 32#include <linux/unwind.h>
33#include <linux/uaccess.h>
34#include <linux/bug.h>
33 35
34#include <asm/system.h> 36#include <asm/system.h>
35#include <asm/uaccess.h>
36#include <asm/io.h> 37#include <asm/io.h>
37#include <asm/atomic.h> 38#include <asm/atomic.h>
38#include <asm/debugreg.h> 39#include <asm/debugreg.h>
@@ -108,7 +109,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
108 preempt_enable_no_resched(); 109 preempt_enable_no_resched();
109} 110}
110 111
111static int kstack_depth_to_print = 12; 112int kstack_depth_to_print = 12;
112#ifdef CONFIG_STACK_UNWIND 113#ifdef CONFIG_STACK_UNWIND
113static int call_trace = 1; 114static int call_trace = 1;
114#else 115#else
@@ -225,16 +226,25 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
225{ 226{
226 struct ops_and_data *oad = (struct ops_and_data *)context; 227 struct ops_and_data *oad = (struct ops_and_data *)context;
227 int n = 0; 228 int n = 0;
229 unsigned long sp = UNW_SP(info);
228 230
231 if (arch_unw_user_mode(info))
232 return -1;
229 while (unwind(info) == 0 && UNW_PC(info)) { 233 while (unwind(info) == 0 && UNW_PC(info)) {
230 n++; 234 n++;
231 oad->ops->address(oad->data, UNW_PC(info)); 235 oad->ops->address(oad->data, UNW_PC(info));
232 if (arch_unw_user_mode(info)) 236 if (arch_unw_user_mode(info))
233 break; 237 break;
238 if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
239 && sp > UNW_SP(info))
240 break;
241 sp = UNW_SP(info);
234 } 242 }
235 return n; 243 return n;
236} 244}
237 245
246#define MSG(txt) ops->warning(data, txt)
247
238/* 248/*
239 * x86-64 can have upto three kernel stacks: 249 * x86-64 can have upto three kernel stacks:
240 * process stack 250 * process stack
@@ -242,12 +252,20 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
242 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 252 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
243 */ 253 */
244 254
245void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, 255static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
256{
257 void *t = (void *)tinfo;
258 return p > t && p < t + THREAD_SIZE - 3;
259}
260
261void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
262 unsigned long *stack,
246 struct stacktrace_ops *ops, void *data) 263 struct stacktrace_ops *ops, void *data)
247{ 264{
248 const unsigned cpu = smp_processor_id(); 265 const unsigned cpu = get_cpu();
249 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 266 unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
250 unsigned used = 0; 267 unsigned used = 0;
268 struct thread_info *tinfo;
251 269
252 if (!tsk) 270 if (!tsk)
253 tsk = current; 271 tsk = current;
@@ -261,28 +279,30 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
261 if (unwind_init_frame_info(&info, tsk, regs) == 0) 279 if (unwind_init_frame_info(&info, tsk, regs) == 0)
262 unw_ret = dump_trace_unwind(&info, &oad); 280 unw_ret = dump_trace_unwind(&info, &oad);
263 } else if (tsk == current) 281 } else if (tsk == current)
264 unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); 282 unw_ret = unwind_init_running(&info, dump_trace_unwind,
283 &oad);
265 else { 284 else {
266 if (unwind_init_blocked(&info, tsk) == 0) 285 if (unwind_init_blocked(&info, tsk) == 0)
267 unw_ret = dump_trace_unwind(&info, &oad); 286 unw_ret = dump_trace_unwind(&info, &oad);
268 } 287 }
269 if (unw_ret > 0) { 288 if (unw_ret > 0) {
270 if (call_trace == 1 && !arch_unw_user_mode(&info)) { 289 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
271 ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", 290 ops->warning_symbol(data,
291 "DWARF2 unwinder stuck at %s",
272 UNW_PC(&info)); 292 UNW_PC(&info));
273 if ((long)UNW_SP(&info) < 0) { 293 if ((long)UNW_SP(&info) < 0) {
274 ops->warning(data, "Leftover inexact backtrace:\n"); 294 MSG("Leftover inexact backtrace:");
275 stack = (unsigned long *)UNW_SP(&info); 295 stack = (unsigned long *)UNW_SP(&info);
276 if (!stack) 296 if (!stack)
277 return; 297 goto out;
278 } else 298 } else
279 ops->warning(data, "Full inexact backtrace again:\n"); 299 MSG("Full inexact backtrace again:");
280 } else if (call_trace >= 1) 300 } else if (call_trace >= 1)
281 return; 301 goto out;
282 else 302 else
283 ops->warning(data, "Full inexact backtrace again:\n"); 303 MSG("Full inexact backtrace again:");
284 } else 304 } else
285 ops->warning(data, "Inexact backtrace:\n"); 305 MSG("Inexact backtrace:");
286 } 306 }
287 if (!stack) { 307 if (!stack) {
288 unsigned long dummy; 308 unsigned long dummy;
@@ -299,9 +319,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
299#define HANDLE_STACK(cond) \ 319#define HANDLE_STACK(cond) \
300 do while (cond) { \ 320 do while (cond) { \
301 unsigned long addr = *stack++; \ 321 unsigned long addr = *stack++; \
302 if (oops_in_progress ? \ 322 /* Use unlocked access here because except for NMIs \
303 __kernel_text_address(addr) : \ 323 we should be already protected against module unloads */ \
304 kernel_text_address(addr)) { \ 324 if (__kernel_text_address(addr)) { \
305 /* \ 325 /* \
306 * If the address is either in the text segment of the \ 326 * If the address is either in the text segment of the \
307 * kernel, or in the region which contains vmalloc'ed \ 327 * kernel, or in the region which contains vmalloc'ed \
@@ -364,8 +384,11 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
364 /* 384 /*
365 * This handles the process stack: 385 * This handles the process stack:
366 */ 386 */
367 HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); 387 tinfo = current_thread_info();
388 HANDLE_STACK (valid_stack_ptr(tinfo, stack));
368#undef HANDLE_STACK 389#undef HANDLE_STACK
390out:
391 put_cpu();
369} 392}
370EXPORT_SYMBOL(dump_trace); 393EXPORT_SYMBOL(dump_trace);
371 394
@@ -502,30 +525,15 @@ bad:
502 printk("\n"); 525 printk("\n");
503} 526}
504 527
505void handle_BUG(struct pt_regs *regs) 528int is_valid_bugaddr(unsigned long rip)
506{ 529{
507 struct bug_frame f; 530 unsigned short ud2;
508 long len;
509 const char *prefix = "";
510 531
511 if (user_mode(regs)) 532 if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
512 return; 533 return 0;
513 if (__copy_from_user(&f, (const void __user *) regs->rip, 534
514 sizeof(struct bug_frame))) 535 return ud2 == 0x0b0f;
515 return; 536}
516 if (f.filename >= 0 ||
517 f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
518 return;
519 len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1;
520 if (len < 0 || len >= PATH_MAX)
521 f.filename = (int)(long)"unmapped filename";
522 else if (len > 50) {
523 f.filename += len - 50;
524 prefix = "...";
525 }
526 printk("----------- [cut here ] --------- [please bite here ] ---------\n");
527 printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line);
528}
529 537
530#ifdef CONFIG_BUG 538#ifdef CONFIG_BUG
531void out_of_line_bug(void) 539void out_of_line_bug(void)
@@ -605,7 +613,9 @@ void die(const char * str, struct pt_regs * regs, long err)
605{ 613{
606 unsigned long flags = oops_begin(); 614 unsigned long flags = oops_begin();
607 615
608 handle_BUG(regs); 616 if (!user_mode(regs))
617 report_bug(regs->rip);
618
609 __die(str, regs, err); 619 __die(str, regs, err);
610 oops_end(flags); 620 oops_end(flags);
611 do_exit(SIGSEGV); 621 do_exit(SIGSEGV);
@@ -772,8 +782,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
772{ 782{
773 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 783 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
774 reason); 784 reason);
775 printk(KERN_EMERG "You probably have a hardware problem with your " 785 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
776 "RAM chips\n");
777 786
778 if (panic_on_unrecovered_nmi) 787 if (panic_on_unrecovered_nmi)
779 panic("NMI: Not continuing"); 788 panic("NMI: Not continuing");
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index b9df2ab6529f..514be5dd2303 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -13,10 +13,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 13OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 14ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 15jiffies_64 = jiffies;
16_proxy_pda = 0;
16PHDRS { 17PHDRS {
17 text PT_LOAD FLAGS(5); /* R_E */ 18 text PT_LOAD FLAGS(5); /* R_E */
18 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
19 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */
20 note PT_NOTE FLAGS(4); /* R__ */ 22 note PT_NOTE FLAGS(4); /* R__ */
21} 23}
22SECTIONS 24SECTIONS
@@ -50,15 +52,9 @@ SECTIONS
50 52
51 RODATA 53 RODATA
52 54
53#ifdef CONFIG_STACK_UNWIND 55 BUG_TABLE
54 . = ALIGN(8);
55 .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
56 __start_unwind = .;
57 *(.eh_frame)
58 __end_unwind = .;
59 }
60#endif
61 56
57 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
62 /* Data */ 58 /* Data */
63 .data : AT(ADDR(.data) - LOAD_OFFSET) { 59 .data : AT(ADDR(.data) - LOAD_OFFSET) {
64 *(.data) 60 *(.data)
@@ -131,7 +127,7 @@ SECTIONS
131 . = ALIGN(8192); /* init_task */ 127 . = ALIGN(8192); /* init_task */
132 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 128 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
133 *(.data.init_task) 129 *(.data.init_task)
134 } :data 130 }:data.init
135 131
136 . = ALIGN(4096); 132 . = ALIGN(4096);
137 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 133 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
@@ -174,13 +170,7 @@ SECTIONS
174 __setup_end = .; 170 __setup_end = .;
175 __initcall_start = .; 171 __initcall_start = .;
176 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { 172 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
177 *(.initcall1.init) 173 INITCALLS
178 *(.initcall2.init)
179 *(.initcall3.init)
180 *(.initcall4.init)
181 *(.initcall5.init)
182 *(.initcall6.init)
183 *(.initcall7.init)
184 } 174 }
185 __initcall_end = .; 175 __initcall_end = .;
186 __con_initcall_start = .; 176 __con_initcall_start = .;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index a98b460af6a1..2433d6fc68b1 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -27,6 +27,9 @@
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28#include <linux/sysctl.h> 28#include <linux/sysctl.h>
29#include <linux/getcpu.h> 29#include <linux/getcpu.h>
30#include <linux/cpu.h>
31#include <linux/smp.h>
32#include <linux/notifier.h>
30 33
31#include <asm/vsyscall.h> 34#include <asm/vsyscall.h>
32#include <asm/pgtable.h> 35#include <asm/pgtable.h>
@@ -39,6 +42,7 @@
39#include <asm/topology.h> 42#include <asm/topology.h>
40 43
41#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 44#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
45#define __syscall_clobber "r11","rcx","memory"
42 46
43int __sysctl_vsyscall __section_sysctl_vsyscall = 1; 47int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
44seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; 48seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
@@ -221,8 +225,7 @@ out:
221 225
222static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, 226static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
223 void __user *oldval, size_t __user *oldlenp, 227 void __user *oldval, size_t __user *oldlenp,
224 void __user *newval, size_t newlen, 228 void __user *newval, size_t newlen)
225 void **context)
226{ 229{
227 return -ENOSYS; 230 return -ENOSYS;
228} 231}
@@ -243,32 +246,17 @@ static ctl_table kernel_root_table2[] = {
243 246
244#endif 247#endif
245 248
246static void __cpuinit write_rdtscp_cb(void *info) 249/* Assume __initcall executes before all user space. Hopefully kmod
247{ 250 doesn't violate that. We'll find out if it does. */
248 write_rdtscp_aux((unsigned long)info); 251static void __cpuinit vsyscall_set_cpu(int cpu)
249}
250
251void __cpuinit vsyscall_set_cpu(int cpu)
252{ 252{
253 unsigned long *d; 253 unsigned long *d;
254 unsigned long node = 0; 254 unsigned long node = 0;
255#ifdef CONFIG_NUMA 255#ifdef CONFIG_NUMA
256 node = cpu_to_node[cpu]; 256 node = cpu_to_node[cpu];
257#endif 257#endif
258 if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { 258 if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
259 void *info = (void *)((node << 12) | cpu); 259 write_rdtscp_aux((node << 12) | cpu);
260 /* Can happen on preemptive kernel */
261 if (get_cpu() == cpu)
262 write_rdtscp_cb(info);
263#ifdef CONFIG_SMP
264 else {
265 /* the notifier is unfortunately not executed on the
266 target CPU */
267 smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
268 }
269#endif
270 put_cpu();
271 }
272 260
273 /* Store cpu number in limit so that it can be loaded quickly 261 /* Store cpu number in limit so that it can be loaded quickly
274 in user space in vgetcpu. 262 in user space in vgetcpu.
@@ -280,11 +268,27 @@ void __cpuinit vsyscall_set_cpu(int cpu)
280 *d |= (node >> 4) << 48; 268 *d |= (node >> 4) << 48;
281} 269}
282 270
271static void __cpuinit cpu_vsyscall_init(void *arg)
272{
273 /* preemption should be already off */
274 vsyscall_set_cpu(raw_smp_processor_id());
275}
276
277static int __cpuinit
278cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
279{
280 long cpu = (long)arg;
281 if (action == CPU_ONLINE)
282 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
283 return NOTIFY_DONE;
284}
285
283static void __init map_vsyscall(void) 286static void __init map_vsyscall(void)
284{ 287{
285 extern char __vsyscall_0; 288 extern char __vsyscall_0;
286 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); 289 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
287 290
291 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
288 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 292 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
289} 293}
290 294
@@ -299,6 +303,8 @@ static int __init vsyscall_init(void)
299#ifdef CONFIG_SYSCTL 303#ifdef CONFIG_SYSCTL
300 register_sysctl_table(kernel_root_table2, 0); 304 register_sysctl_table(kernel_root_table2, 0);
301#endif 305#endif
306 on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
307 hotcpu_notifier(cpu_vsyscall_notifier, 0);
302 return 0; 308 return 0;
303} 309}
304 310
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
index c493735218da..bc503f506903 100644
--- a/arch/x86_64/lib/csum-partial.c
+++ b/arch/x86_64/lib/csum-partial.c
@@ -9,8 +9,6 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <asm/checksum.h> 10#include <asm/checksum.h>
11 11
12#define __force_inline inline __attribute__((always_inline))
13
14static inline unsigned short from32to16(unsigned a) 12static inline unsigned short from32to16(unsigned a)
15{ 13{
16 unsigned short b = a >> 16; 14 unsigned short b = a >> 16;
@@ -33,7 +31,7 @@ static inline unsigned short from32to16(unsigned a)
33 * Unrolling to an 128 bytes inner loop. 31 * Unrolling to an 128 bytes inner loop.
34 * Using interleaving with more registers to break the carry chains. 32 * Using interleaving with more registers to break the carry chains.
35 */ 33 */
36static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len) 34static unsigned do_csum(const unsigned char *buff, unsigned len)
37{ 35{
38 unsigned odd, count; 36 unsigned odd, count;
39 unsigned long result = 0; 37 unsigned long result = 0;
@@ -132,9 +130,10 @@ static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len)
132 * 130 *
133 * it's best to have buff aligned on a 64-bit boundary 131 * it's best to have buff aligned on a 64-bit boundary
134 */ 132 */
135unsigned csum_partial(const unsigned char *buff, unsigned len, unsigned sum) 133__wsum csum_partial(const void *buff, int len, __wsum sum)
136{ 134{
137 return add32_with_carry(do_csum(buff, len), sum); 135 return (__force __wsum)add32_with_carry(do_csum(buff, len),
136 (__force u32)sum);
138} 137}
139 138
140EXPORT_SYMBOL(csum_partial); 139EXPORT_SYMBOL(csum_partial);
@@ -143,7 +142,7 @@ EXPORT_SYMBOL(csum_partial);
143 * this routine is used for miscellaneous IP-like checksums, mainly 142 * this routine is used for miscellaneous IP-like checksums, mainly
144 * in icmp.c 143 * in icmp.c
145 */ 144 */
146unsigned short ip_compute_csum(unsigned char * buff, int len) 145__sum16 ip_compute_csum(const void *buff, int len)
147{ 146{
148 return csum_fold(csum_partial(buff,len,0)); 147 return csum_fold(csum_partial(buff,len,0));
149} 148}
diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c
index b1320ec58428..fd42a4a095fc 100644
--- a/arch/x86_64/lib/csum-wrappers.c
+++ b/arch/x86_64/lib/csum-wrappers.c
@@ -18,9 +18,9 @@
18 * Returns an 32bit unfolded checksum of the buffer. 18 * Returns an 32bit unfolded checksum of the buffer.
19 * src and dst are best aligned to 64bits. 19 * src and dst are best aligned to 64bits.
20 */ 20 */
21unsigned int 21__wsum
22csum_partial_copy_from_user(const unsigned char __user *src, unsigned char *dst, 22csum_partial_copy_from_user(const void __user *src, void *dst,
23 int len, unsigned int isum, int *errp) 23 int len, __wsum isum, int *errp)
24{ 24{
25 might_sleep(); 25 might_sleep();
26 *errp = 0; 26 *errp = 0;
@@ -34,17 +34,19 @@ csum_partial_copy_from_user(const unsigned char __user *src, unsigned char *dst,
34 if (unlikely((unsigned long)src & 6)) { 34 if (unlikely((unsigned long)src & 6)) {
35 while (((unsigned long)src & 6) && len >= 2) { 35 while (((unsigned long)src & 6) && len >= 2) {
36 __u16 val16; 36 __u16 val16;
37 *errp = __get_user(val16, (__u16 __user *)src); 37 *errp = __get_user(val16, (const __u16 __user *)src);
38 if (*errp) 38 if (*errp)
39 return isum; 39 return isum;
40 *(__u16 *)dst = val16; 40 *(__u16 *)dst = val16;
41 isum = add32_with_carry(isum, val16); 41 isum = (__force __wsum)add32_with_carry(
42 (__force unsigned)isum, val16);
42 src += 2; 43 src += 2;
43 dst += 2; 44 dst += 2;
44 len -= 2; 45 len -= 2;
45 } 46 }
46 } 47 }
47 isum = csum_partial_copy_generic((__force void *)src,dst,len,isum,errp,NULL); 48 isum = csum_partial_copy_generic((__force const void *)src,
49 dst, len, isum, errp, NULL);
48 if (likely(*errp == 0)) 50 if (likely(*errp == 0))
49 return isum; 51 return isum;
50 } 52 }
@@ -66,9 +68,9 @@ EXPORT_SYMBOL(csum_partial_copy_from_user);
66 * Returns an 32bit unfolded checksum of the buffer. 68 * Returns an 32bit unfolded checksum of the buffer.
67 * src and dst are best aligned to 64bits. 69 * src and dst are best aligned to 64bits.
68 */ 70 */
69unsigned int 71__wsum
70csum_partial_copy_to_user(unsigned const char *src, unsigned char __user *dst, 72csum_partial_copy_to_user(const void *src, void __user *dst,
71 int len, unsigned int isum, int *errp) 73 int len, __wsum isum, int *errp)
72{ 74{
73 might_sleep(); 75 might_sleep();
74 if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) { 76 if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
@@ -79,7 +81,8 @@ csum_partial_copy_to_user(unsigned const char *src, unsigned char __user *dst,
79 if (unlikely((unsigned long)dst & 6)) { 81 if (unlikely((unsigned long)dst & 6)) {
80 while (((unsigned long)dst & 6) && len >= 2) { 82 while (((unsigned long)dst & 6) && len >= 2) {
81 __u16 val16 = *(__u16 *)src; 83 __u16 val16 = *(__u16 *)src;
82 isum = add32_with_carry(isum, val16); 84 isum = (__force __wsum)add32_with_carry(
85 (__force unsigned)isum, val16);
83 *errp = __put_user(val16, (__u16 __user *)dst); 86 *errp = __put_user(val16, (__u16 __user *)dst);
84 if (*errp) 87 if (*errp)
85 return isum; 88 return isum;
@@ -104,19 +107,21 @@ EXPORT_SYMBOL(csum_partial_copy_to_user);
104 * 107 *
105 * Returns an 32bit unfolded checksum of the buffer. 108 * Returns an 32bit unfolded checksum of the buffer.
106 */ 109 */
107unsigned int 110__wsum
108csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len, unsigned int sum) 111csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
109{ 112{
110 return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL); 113 return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
111} 114}
112EXPORT_SYMBOL(csum_partial_copy_nocheck); 115EXPORT_SYMBOL(csum_partial_copy_nocheck);
113 116
114unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr, 117__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
115 __u32 len, unsigned short proto, unsigned int sum) 118 const struct in6_addr *daddr,
119 __u32 len, unsigned short proto, __wsum sum)
116{ 120{
117 __u64 rest, sum64; 121 __u64 rest, sum64;
118 122
119 rest = (__u64)htonl(len) + (__u64)htons(proto) + (__u64)sum; 123 rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
124 (__force __u64)sum;
120 asm(" addq (%[saddr]),%[sum]\n" 125 asm(" addq (%[saddr]),%[sum]\n"
121 " adcq 8(%[saddr]),%[sum]\n" 126 " adcq 8(%[saddr]),%[sum]\n"
122 " adcq (%[daddr]),%[sum]\n" 127 " adcq (%[daddr]),%[sum]\n"
@@ -124,7 +129,7 @@ unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
124 " adcq $0,%[sum]\n" 129 " adcq $0,%[sum]\n"
125 : [sum] "=r" (sum64) 130 : [sum] "=r" (sum64)
126 : "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr)); 131 : "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr));
127 return csum_fold(add32_with_carry(sum64 & 0xffffffff, sum64>>32)); 132 return csum_fold((__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
128} 133}
129 134
130EXPORT_SYMBOL(csum_ipv6_magic); 135EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 50be90975d04..2dbebd308347 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -40,13 +40,13 @@ EXPORT_SYMBOL(__delay);
40 40
41inline void __const_udelay(unsigned long xloops) 41inline void __const_udelay(unsigned long xloops)
42{ 42{
43 __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32); 43 __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1);
44} 44}
45EXPORT_SYMBOL(__const_udelay); 45EXPORT_SYMBOL(__const_udelay);
46 46
47void __udelay(unsigned long usecs) 47void __udelay(unsigned long usecs)
48{ 48{
49 __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ 49 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
50} 50}
51EXPORT_SYMBOL(__udelay); 51EXPORT_SYMBOL(__udelay);
52 52
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 3751b4788e28..a65fc6f1dcaf 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -23,9 +23,9 @@
23#include <linux/compiler.h> 23#include <linux/compiler.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/kprobes.h> 25#include <linux/kprobes.h>
26#include <linux/uaccess.h>
26 27
27#include <asm/system.h> 28#include <asm/system.h>
28#include <asm/uaccess.h>
29#include <asm/pgalloc.h> 29#include <asm/pgalloc.h>
30#include <asm/smp.h> 30#include <asm/smp.h>
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
@@ -96,7 +96,7 @@ void bust_spinlocks(int yes)
96static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, 96static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
97 unsigned long error_code) 97 unsigned long error_code)
98{ 98{
99 unsigned char __user *instr; 99 unsigned char *instr;
100 int scan_more = 1; 100 int scan_more = 1;
101 int prefetch = 0; 101 int prefetch = 0;
102 unsigned char *max_instr; 102 unsigned char *max_instr;
@@ -116,7 +116,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
116 unsigned char instr_hi; 116 unsigned char instr_hi;
117 unsigned char instr_lo; 117 unsigned char instr_lo;
118 118
119 if (__get_user(opcode, (char __user *)instr)) 119 if (probe_kernel_address(instr, opcode))
120 break; 120 break;
121 121
122 instr_hi = opcode & 0xf0; 122 instr_hi = opcode & 0xf0;
@@ -154,7 +154,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
154 case 0x00: 154 case 0x00:
155 /* Prefetch instruction is 0x0F0D or 0x0F18 */ 155 /* Prefetch instruction is 0x0F0D or 0x0F18 */
156 scan_more = 0; 156 scan_more = 0;
157 if (__get_user(opcode, (char __user *)instr)) 157 if (probe_kernel_address(instr, opcode))
158 break; 158 break;
159 prefetch = (instr_lo == 0xF) && 159 prefetch = (instr_lo == 0xF) &&
160 (opcode == 0x0D || opcode == 0x18); 160 (opcode == 0x0D || opcode == 0x18);
@@ -170,7 +170,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
170static int bad_address(void *p) 170static int bad_address(void *p)
171{ 171{
172 unsigned long dummy; 172 unsigned long dummy;
173 return __get_user(dummy, (unsigned long __user *)p); 173 return probe_kernel_address((unsigned long *)p, dummy);
174} 174}
175 175
176void dump_pagetable(unsigned long address) 176void dump_pagetable(unsigned long address)
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 971dc1181e69..2968b90ef8ad 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -496,7 +496,7 @@ int remove_memory(u64 start, u64 size)
496} 496}
497EXPORT_SYMBOL_GPL(remove_memory); 497EXPORT_SYMBOL_GPL(remove_memory);
498 498
499#ifndef CONFIG_ACPI_NUMA 499#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
500int memory_add_physaddr_to_nid(u64 start) 500int memory_add_physaddr_to_nid(u64 start)
501{ 501{
502 return 0; 502 return 0;
@@ -504,13 +504,6 @@ int memory_add_physaddr_to_nid(u64 start)
504EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); 504EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
505#endif 505#endif
506 506
507#ifndef CONFIG_ACPI_NUMA
508int memory_add_physaddr_to_nid(u64 start)
509{
510 return 0;
511}
512#endif
513
514#endif /* CONFIG_MEMORY_HOTPLUG */ 507#endif /* CONFIG_MEMORY_HOTPLUG */
515 508
516#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE 509#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
@@ -655,9 +648,22 @@ void free_initrd_mem(unsigned long start, unsigned long end)
655 648
656void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 649void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
657{ 650{
658 /* Should check here against the e820 map to avoid double free */
659#ifdef CONFIG_NUMA 651#ifdef CONFIG_NUMA
660 int nid = phys_to_nid(phys); 652 int nid = phys_to_nid(phys);
653#endif
654 unsigned long pfn = phys >> PAGE_SHIFT;
655 if (pfn >= end_pfn) {
656 /* This can happen with kdump kernels when accessing firmware
657 tables. */
658 if (pfn < end_pfn_map)
659 return;
660 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
661 phys, len);
662 return;
663 }
664
665 /* Should check here against the e820 map to avoid double free */
666#ifdef CONFIG_NUMA
661 reserve_bootmem_node(NODE_DATA(nid), phys, len); 667 reserve_bootmem_node(NODE_DATA(nid), phys, len);
662#else 668#else
663 reserve_bootmem(phys, len); 669 reserve_bootmem(phys, len);
@@ -724,14 +730,15 @@ static __init int x8664_sysctl_init(void)
724__initcall(x8664_sysctl_init); 730__initcall(x8664_sysctl_init);
725#endif 731#endif
726 732
727/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only 733/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
728 covers the 64bit vsyscall page now. 32bit has a real VMA now and does 734 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
729 not need special handling anymore. */ 735 not need special handling anymore. */
730 736
731static struct vm_area_struct gate_vma = { 737static struct vm_area_struct gate_vma = {
732 .vm_start = VSYSCALL_START, 738 .vm_start = VSYSCALL_START,
733 .vm_end = VSYSCALL_END, 739 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
734 .vm_page_prot = PAGE_READONLY 740 .vm_page_prot = PAGE_READONLY_EXEC,
741 .vm_flags = VM_READ | VM_EXEC
735}; 742};
736 743
737struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 744struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 3e231d762aaa..ccb91dd996a9 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -61,34 +61,40 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
61 return base; 61 return base;
62} 62}
63 63
64 64static void cache_flush_page(void *adr)
65static void flush_kernel_map(void *address)
66{ 65{
67 if (0 && address && cpu_has_clflush) { 66 int i;
68 /* is this worth it? */ 67 for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
69 int i; 68 asm volatile("clflush (%0)" :: "r" (adr + i));
70 for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
71 asm volatile("clflush (%0)" :: "r" (address + i));
72 } else
73 asm volatile("wbinvd":::"memory");
74 if (address)
75 __flush_tlb_one(address);
76 else
77 __flush_tlb_all();
78} 69}
79 70
71static void flush_kernel_map(void *arg)
72{
73 struct list_head *l = (struct list_head *)arg;
74 struct page *pg;
75
76 /* When clflush is available always use it because it is
77 much cheaper than WBINVD */
78 if (!cpu_has_clflush)
79 asm volatile("wbinvd" ::: "memory");
80 list_for_each_entry(pg, l, lru) {
81 void *adr = page_address(pg);
82 if (cpu_has_clflush)
83 cache_flush_page(adr);
84 __flush_tlb_one(adr);
85 }
86}
80 87
81static inline void flush_map(unsigned long address) 88static inline void flush_map(struct list_head *l)
82{ 89{
83 on_each_cpu(flush_kernel_map, (void *)address, 1, 1); 90 on_each_cpu(flush_kernel_map, l, 1, 1);
84} 91}
85 92
86static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ 93static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
87 94
88static inline void save_page(struct page *fpage) 95static inline void save_page(struct page *fpage)
89{ 96{
90 fpage->lru.next = (struct list_head *)deferred_pages; 97 list_add(&fpage->lru, &deferred_pages);
91 deferred_pages = fpage;
92} 98}
93 99
94/* 100/*
@@ -207,18 +213,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
207 213
208void global_flush_tlb(void) 214void global_flush_tlb(void)
209{ 215{
210 struct page *dpage; 216 struct page *pg, *next;
217 struct list_head l;
211 218
212 down_read(&init_mm.mmap_sem); 219 down_read(&init_mm.mmap_sem);
213 dpage = xchg(&deferred_pages, NULL); 220 list_replace_init(&deferred_pages, &l);
214 up_read(&init_mm.mmap_sem); 221 up_read(&init_mm.mmap_sem);
215 222
216 flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); 223 flush_map(&l);
217 while (dpage) { 224
218 struct page *tmp = dpage; 225 list_for_each_entry_safe(pg, next, &l, lru) {
219 dpage = (struct page *)dpage->lru.next; 226 ClearPagePrivate(pg);
220 ClearPagePrivate(tmp); 227 __free_page(pg);
221 __free_page(tmp);
222 } 228 }
223} 229}
224 230
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 3cc0544e25f5..1087e150a218 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -207,7 +207,7 @@ static inline int save_add_info(void)
207 return hotadd_percent > 0; 207 return hotadd_percent > 0;
208} 208}
209#else 209#else
210int update_end_of_memory(unsigned long end) {return 0;} 210int update_end_of_memory(unsigned long end) {return -1;}
211static int hotadd_enough_memory(struct bootnode *nd) {return 1;} 211static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
212#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 212#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
213static inline int save_add_info(void) {return 1;} 213static inline int save_add_info(void) {return 1;}
@@ -337,7 +337,7 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
337 push_node_boundaries(node, nd->start >> PAGE_SHIFT, 337 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
338 nd->end >> PAGE_SHIFT); 338 nd->end >> PAGE_SHIFT);
339 339
340 if (ma->flags.hot_pluggable && !reserve_hotadd(node, start, end) < 0) { 340 if (ma->flags.hot_pluggable && (reserve_hotadd(node, start, end) < 0)) {
341 /* Ignore hotadd region. Undo damage */ 341 /* Ignore hotadd region. Undo damage */
342 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); 342 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
343 *nd = oldnode; 343 *nd = oldnode;
diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile
index 1eb18f421edf..149aba05a5b8 100644
--- a/arch/x86_64/pci/Makefile
+++ b/arch/x86_64/pci/Makefile
@@ -3,7 +3,7 @@
3# 3#
4# Reuse the i386 PCI subsystem 4# Reuse the i386 PCI subsystem
5# 5#
6CFLAGS += -Iarch/i386/pci 6EXTRA_CFLAGS += -Iarch/i386/pci
7 7
8obj-y := i386.o 8obj-y := i386.o
9obj-$(CONFIG_PCI_DIRECT)+= direct.o 9obj-$(CONFIG_PCI_DIRECT)+= direct.o
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index 7732f4254d21..f8b6b2800a62 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -163,37 +163,6 @@ static __init void unreachable_devices(void)
163 } 163 }
164} 164}
165 165
166static __init void pci_mmcfg_insert_resources(void)
167{
168#define PCI_MMCFG_RESOURCE_NAME_LEN 19
169 int i;
170 struct resource *res;
171 char *names;
172 unsigned num_buses;
173
174 res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
175 pci_mmcfg_config_num, GFP_KERNEL);
176
177 if (!res) {
178 printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
179 return;
180 }
181
182 names = (void *)&res[pci_mmcfg_config_num];
183 for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
184 num_buses = pci_mmcfg_config[i].end_bus_number -
185 pci_mmcfg_config[i].start_bus_number + 1;
186 res->name = names;
187 snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
188 pci_mmcfg_config[i].pci_segment_group_number);
189 res->start = pci_mmcfg_config[i].base_address;
190 res->end = res->start + (num_buses << 20) - 1;
191 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
192 insert_resource(&iomem_resource, res);
193 names += PCI_MMCFG_RESOURCE_NAME_LEN;
194 }
195}
196
197void __init pci_mmcfg_init(int type) 166void __init pci_mmcfg_init(int type)
198{ 167{
199 int i; 168 int i;
@@ -220,7 +189,7 @@ void __init pci_mmcfg_init(int type)
220 189
221 pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); 190 pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
222 if (pci_mmcfg_virt == NULL) { 191 if (pci_mmcfg_virt == NULL) {
223 printk("PCI: Can not allocate memory for mmconfig structures\n"); 192 printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n");
224 return; 193 return;
225 } 194 }
226 for (i = 0; i < pci_mmcfg_config_num; ++i) { 195 for (i = 0; i < pci_mmcfg_config_num; ++i) {
@@ -228,7 +197,8 @@ void __init pci_mmcfg_init(int type)
228 pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, 197 pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address,
229 MMCONFIG_APER_MAX); 198 MMCONFIG_APER_MAX);
230 if (!pci_mmcfg_virt[i].virt) { 199 if (!pci_mmcfg_virt[i].virt) {
231 printk("PCI: Cannot map mmconfig aperture for segment %d\n", 200 printk(KERN_ERR "PCI: Cannot map mmconfig aperture for "
201 "segment %d\n",
232 pci_mmcfg_config[i].pci_segment_group_number); 202 pci_mmcfg_config[i].pci_segment_group_number);
233 return; 203 return;
234 } 204 }
@@ -236,7 +206,6 @@ void __init pci_mmcfg_init(int type)
236 } 206 }
237 207
238 unreachable_devices(); 208 unreachable_devices();
239 pci_mmcfg_insert_resources();
240 209
241 raw_pci_ops = &pci_mmcfg; 210 raw_pci_ops = &pci_mmcfg;
242 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 211 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;