diff options
author | Jeff Garzik <jeff@garzik.org> | 2006-04-11 12:51:40 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-04-11 12:51:40 -0400 |
commit | 10a5fd5e6b7e2d464c9f95f67cade4ddbd63f4e1 (patch) | |
tree | eddf856286234f28cac747d20eb59d918e1bc8b5 /arch/x86_64 | |
parent | c2a6585296009379e0f4eff39cdcb108b457ebf2 (diff) | |
parent | a145410dccdb44f81d3b56763ef9b6f721f4e47c (diff) |
Merge branch 'master'
Conflicts:
drivers/scsi/libata-scsi.c
include/linux/libata.h
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 10 | ||||
-rw-r--r-- | arch/x86_64/Makefile | 24 | ||||
-rw-r--r-- | arch/x86_64/boot/video.S | 5 | ||||
-rw-r--r-- | arch/x86_64/defconfig | 42 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 23 | ||||
-rw-r--r-- | arch/x86_64/kernel/aperture.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/e820.c | 36 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 28 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce.c | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 7 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-dma.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/process.c | 10 | ||||
-rw-r--r-- | arch/x86_64/kernel/ptrace.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/x8664_ksyms.c | 3 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 37 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 46 | ||||
-rw-r--r-- | arch/x86_64/mm/srat.c | 170 | ||||
-rw-r--r-- | arch/x86_64/pci/mmconfig.c | 53 |
21 files changed, 412 insertions, 109 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 4310b4a311a5..408d44a59756 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -136,6 +136,11 @@ config X86_L1_CACHE_SHIFT | |||
136 | default "7" if GENERIC_CPU || MPSC | 136 | default "7" if GENERIC_CPU || MPSC |
137 | default "6" if MK8 | 137 | default "6" if MK8 |
138 | 138 | ||
139 | config X86_INTERNODE_CACHE_BYTES | ||
140 | int | ||
141 | default "4096" if X86_VSMP | ||
142 | default X86_L1_CACHE_BYTES if !X86_VSMP | ||
143 | |||
139 | config X86_TSC | 144 | config X86_TSC |
140 | bool | 145 | bool |
141 | default y | 146 | default y |
@@ -283,6 +288,11 @@ config K8_NUMA | |||
283 | Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA | 288 | Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA |
284 | instead, which also takes priority if both are compiled in. | 289 | instead, which also takes priority if both are compiled in. |
285 | 290 | ||
291 | config NODES_SHIFT | ||
292 | int | ||
293 | default "6" | ||
294 | depends on NEED_MULTIPLE_NODES | ||
295 | |||
286 | # Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig. | 296 | # Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig. |
287 | 297 | ||
288 | config X86_64_ACPI_NUMA | 298 | config X86_64_ACPI_NUMA |
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 585fd4a559c8..e573e2ab5510 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile | |||
@@ -24,37 +24,37 @@ | |||
24 | LDFLAGS := -m elf_x86_64 | 24 | LDFLAGS := -m elf_x86_64 |
25 | OBJCOPYFLAGS := -O binary -R .note -R .comment -S | 25 | OBJCOPYFLAGS := -O binary -R .note -R .comment -S |
26 | LDFLAGS_vmlinux := | 26 | LDFLAGS_vmlinux := |
27 | |||
28 | CHECKFLAGS += -D__x86_64__ -m64 | 27 | CHECKFLAGS += -D__x86_64__ -m64 |
29 | 28 | ||
29 | cflags-y := | ||
30 | cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) | 30 | cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) |
31 | cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) | 31 | cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) |
32 | cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) | 32 | cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) |
33 | CFLAGS += $(cflags-y) | ||
34 | 33 | ||
35 | CFLAGS += -m64 | 34 | cflags-y += -m64 |
36 | CFLAGS += -mno-red-zone | 35 | cflags-y += -mno-red-zone |
37 | CFLAGS += -mcmodel=kernel | 36 | cflags-y += -mcmodel=kernel |
38 | CFLAGS += -pipe | 37 | cflags-y += -pipe |
39 | cflags-$(CONFIG_REORDER) += -ffunction-sections | 38 | cflags-$(CONFIG_REORDER) += -ffunction-sections |
40 | # this makes reading assembly source easier, but produces worse code | 39 | # this makes reading assembly source easier, but produces worse code |
41 | # actually it makes the kernel smaller too. | 40 | # actually it makes the kernel smaller too. |
42 | CFLAGS += -fno-reorder-blocks | 41 | cflags-y += -fno-reorder-blocks |
43 | CFLAGS += -Wno-sign-compare | 42 | cflags-y += -Wno-sign-compare |
44 | ifneq ($(CONFIG_UNWIND_INFO),y) | 43 | ifneq ($(CONFIG_UNWIND_INFO),y) |
45 | CFLAGS += -fno-asynchronous-unwind-tables | 44 | cflags-y += -fno-asynchronous-unwind-tables |
46 | endif | 45 | endif |
47 | ifneq ($(CONFIG_DEBUG_INFO),y) | 46 | ifneq ($(CONFIG_DEBUG_INFO),y) |
48 | # -fweb shrinks the kernel a bit, but the difference is very small | 47 | # -fweb shrinks the kernel a bit, but the difference is very small |
49 | # it also messes up debugging, so don't use it for now. | 48 | # it also messes up debugging, so don't use it for now. |
50 | #CFLAGS += $(call cc-option,-fweb) | 49 | #cflags-y += $(call cc-option,-fweb) |
51 | endif | 50 | endif |
52 | # -funit-at-a-time shrinks the kernel .text considerably | 51 | # -funit-at-a-time shrinks the kernel .text considerably |
53 | # unfortunately it makes reading oopses harder. | 52 | # unfortunately it makes reading oopses harder. |
54 | CFLAGS += $(call cc-option,-funit-at-a-time) | 53 | cflags-y += $(call cc-option,-funit-at-a-time) |
55 | # prevent gcc from generating any FP code by mistake | 54 | # prevent gcc from generating any FP code by mistake |
56 | CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) | 55 | cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) |
57 | 56 | ||
57 | CFLAGS += $(cflags-y) | ||
58 | AFLAGS += -m64 | 58 | AFLAGS += -m64 |
59 | 59 | ||
60 | head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o | 60 | head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o |
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S index 0587477c99f2..32327bb37aff 100644 --- a/arch/x86_64/boot/video.S +++ b/arch/x86_64/boot/video.S | |||
@@ -97,6 +97,7 @@ | |||
97 | #define PARAM_VESAPM_OFF 0x30 | 97 | #define PARAM_VESAPM_OFF 0x30 |
98 | #define PARAM_LFB_PAGES 0x32 | 98 | #define PARAM_LFB_PAGES 0x32 |
99 | #define PARAM_VESA_ATTRIB 0x34 | 99 | #define PARAM_VESA_ATTRIB 0x34 |
100 | #define PARAM_CAPABILITIES 0x36 | ||
100 | 101 | ||
101 | /* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ | 102 | /* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ |
102 | #ifdef CONFIG_VIDEO_RETAIN | 103 | #ifdef CONFIG_VIDEO_RETAIN |
@@ -233,6 +234,10 @@ mopar_gr: | |||
233 | movw 18(%di), %ax | 234 | movw 18(%di), %ax |
234 | movl %eax, %fs:(PARAM_LFB_SIZE) | 235 | movl %eax, %fs:(PARAM_LFB_SIZE) |
235 | 236 | ||
237 | # store mode capabilities | ||
238 | movl 10(%di), %eax | ||
239 | movl %eax, %fs:(PARAM_CAPABILITIES) | ||
240 | |||
236 | # switching the DAC to 8-bit is for <= 8 bpp only | 241 | # switching the DAC to 8-bit is for <= 8 bpp only |
237 | movw %fs:(PARAM_LFB_DEPTH), %ax | 242 | movw %fs:(PARAM_LFB_DEPTH), %ax |
238 | cmpw $8, %ax | 243 | cmpw $8, %ax |
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 566ecc97ee5a..3c45ec22b3fe 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.16-git9 | 3 | # Linux kernel version: 2.6.17-rc1 |
4 | # Sat Mar 25 15:18:40 2006 | 4 | # Mon Apr 3 16:11:14 2006 |
5 | # | 5 | # |
6 | CONFIG_X86_64=y | 6 | CONFIG_X86_64=y |
7 | CONFIG_64BIT=y | 7 | CONFIG_64BIT=y |
@@ -9,6 +9,7 @@ CONFIG_X86=y | |||
9 | CONFIG_SEMAPHORE_SLEEPERS=y | 9 | CONFIG_SEMAPHORE_SLEEPERS=y |
10 | CONFIG_MMU=y | 10 | CONFIG_MMU=y |
11 | CONFIG_RWSEM_GENERIC_SPINLOCK=y | 11 | CONFIG_RWSEM_GENERIC_SPINLOCK=y |
12 | CONFIG_GENERIC_HWEIGHT=y | ||
12 | CONFIG_GENERIC_CALIBRATE_DELAY=y | 13 | CONFIG_GENERIC_CALIBRATE_DELAY=y |
13 | CONFIG_X86_CMPXCHG=y | 14 | CONFIG_X86_CMPXCHG=y |
14 | CONFIG_EARLY_PRINTK=y | 15 | CONFIG_EARLY_PRINTK=y |
@@ -55,10 +56,6 @@ CONFIG_BASE_FULL=y | |||
55 | CONFIG_FUTEX=y | 56 | CONFIG_FUTEX=y |
56 | CONFIG_EPOLL=y | 57 | CONFIG_EPOLL=y |
57 | CONFIG_SHMEM=y | 58 | CONFIG_SHMEM=y |
58 | CONFIG_CC_ALIGN_FUNCTIONS=0 | ||
59 | CONFIG_CC_ALIGN_LABELS=0 | ||
60 | CONFIG_CC_ALIGN_LOOPS=0 | ||
61 | CONFIG_CC_ALIGN_JUMPS=0 | ||
62 | CONFIG_SLAB=y | 59 | CONFIG_SLAB=y |
63 | # CONFIG_TINY_SHMEM is not set | 60 | # CONFIG_TINY_SHMEM is not set |
64 | CONFIG_BASE_SMALL=0 | 61 | CONFIG_BASE_SMALL=0 |
@@ -70,7 +67,6 @@ CONFIG_BASE_SMALL=0 | |||
70 | CONFIG_MODULES=y | 67 | CONFIG_MODULES=y |
71 | CONFIG_MODULE_UNLOAD=y | 68 | CONFIG_MODULE_UNLOAD=y |
72 | CONFIG_MODULE_FORCE_UNLOAD=y | 69 | CONFIG_MODULE_FORCE_UNLOAD=y |
73 | CONFIG_OBSOLETE_MODPARM=y | ||
74 | # CONFIG_MODVERSIONS is not set | 70 | # CONFIG_MODVERSIONS is not set |
75 | # CONFIG_MODULE_SRCVERSION_ALL is not set | 71 | # CONFIG_MODULE_SRCVERSION_ALL is not set |
76 | # CONFIG_KMOD is not set | 72 | # CONFIG_KMOD is not set |
@@ -81,6 +77,7 @@ CONFIG_STOP_MACHINE=y | |||
81 | # | 77 | # |
82 | CONFIG_LBD=y | 78 | CONFIG_LBD=y |
83 | # CONFIG_BLK_DEV_IO_TRACE is not set | 79 | # CONFIG_BLK_DEV_IO_TRACE is not set |
80 | # CONFIG_LSF is not set | ||
84 | 81 | ||
85 | # | 82 | # |
86 | # IO Schedulers | 83 | # IO Schedulers |
@@ -105,6 +102,7 @@ CONFIG_X86_PC=y | |||
105 | CONFIG_GENERIC_CPU=y | 102 | CONFIG_GENERIC_CPU=y |
106 | CONFIG_X86_L1_CACHE_BYTES=128 | 103 | CONFIG_X86_L1_CACHE_BYTES=128 |
107 | CONFIG_X86_L1_CACHE_SHIFT=7 | 104 | CONFIG_X86_L1_CACHE_SHIFT=7 |
105 | CONFIG_X86_INTERNODE_CACHE_BYTES=128 | ||
108 | CONFIG_X86_TSC=y | 106 | CONFIG_X86_TSC=y |
109 | CONFIG_X86_GOOD_APIC=y | 107 | CONFIG_X86_GOOD_APIC=y |
110 | # CONFIG_MICROCODE is not set | 108 | # CONFIG_MICROCODE is not set |
@@ -116,6 +114,7 @@ CONFIG_X86_LOCAL_APIC=y | |||
116 | CONFIG_MTRR=y | 114 | CONFIG_MTRR=y |
117 | CONFIG_SMP=y | 115 | CONFIG_SMP=y |
118 | CONFIG_SCHED_SMT=y | 116 | CONFIG_SCHED_SMT=y |
117 | CONFIG_SCHED_MC=y | ||
119 | # CONFIG_PREEMPT_NONE is not set | 118 | # CONFIG_PREEMPT_NONE is not set |
120 | CONFIG_PREEMPT_VOLUNTARY=y | 119 | CONFIG_PREEMPT_VOLUNTARY=y |
121 | # CONFIG_PREEMPT is not set | 120 | # CONFIG_PREEMPT is not set |
@@ -138,6 +137,7 @@ CONFIG_NEED_MULTIPLE_NODES=y | |||
138 | CONFIG_SPLIT_PTLOCK_CPUS=4 | 137 | CONFIG_SPLIT_PTLOCK_CPUS=4 |
139 | CONFIG_MIGRATION=y | 138 | CONFIG_MIGRATION=y |
140 | CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y | 139 | CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y |
140 | CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y | ||
141 | CONFIG_NR_CPUS=32 | 141 | CONFIG_NR_CPUS=32 |
142 | CONFIG_HOTPLUG_CPU=y | 142 | CONFIG_HOTPLUG_CPU=y |
143 | CONFIG_HPET_TIMER=y | 143 | CONFIG_HPET_TIMER=y |
@@ -289,6 +289,7 @@ CONFIG_IP_PNP_DHCP=y | |||
289 | # CONFIG_INET_AH is not set | 289 | # CONFIG_INET_AH is not set |
290 | # CONFIG_INET_ESP is not set | 290 | # CONFIG_INET_ESP is not set |
291 | # CONFIG_INET_IPCOMP is not set | 291 | # CONFIG_INET_IPCOMP is not set |
292 | # CONFIG_INET_XFRM_TUNNEL is not set | ||
292 | # CONFIG_INET_TUNNEL is not set | 293 | # CONFIG_INET_TUNNEL is not set |
293 | CONFIG_INET_DIAG=y | 294 | CONFIG_INET_DIAG=y |
294 | CONFIG_INET_TCP_DIAG=y | 295 | CONFIG_INET_TCP_DIAG=y |
@@ -300,6 +301,7 @@ CONFIG_IPV6=y | |||
300 | # CONFIG_INET6_AH is not set | 301 | # CONFIG_INET6_AH is not set |
301 | # CONFIG_INET6_ESP is not set | 302 | # CONFIG_INET6_ESP is not set |
302 | # CONFIG_INET6_IPCOMP is not set | 303 | # CONFIG_INET6_IPCOMP is not set |
304 | # CONFIG_INET6_XFRM_TUNNEL is not set | ||
303 | # CONFIG_INET6_TUNNEL is not set | 305 | # CONFIG_INET6_TUNNEL is not set |
304 | # CONFIG_IPV6_TUNNEL is not set | 306 | # CONFIG_IPV6_TUNNEL is not set |
305 | # CONFIG_NETFILTER is not set | 307 | # CONFIG_NETFILTER is not set |
@@ -704,7 +706,6 @@ CONFIG_S2IO=m | |||
704 | # Wireless LAN (non-hamradio) | 706 | # Wireless LAN (non-hamradio) |
705 | # | 707 | # |
706 | # CONFIG_NET_RADIO is not set | 708 | # CONFIG_NET_RADIO is not set |
707 | # CONFIG_NET_WIRELESS_RTNETLINK is not set | ||
708 | 709 | ||
709 | # | 710 | # |
710 | # Wan interfaces | 711 | # Wan interfaces |
@@ -791,7 +792,7 @@ CONFIG_HW_CONSOLE=y | |||
791 | # | 792 | # |
792 | CONFIG_SERIAL_8250=y | 793 | CONFIG_SERIAL_8250=y |
793 | CONFIG_SERIAL_8250_CONSOLE=y | 794 | CONFIG_SERIAL_8250_CONSOLE=y |
794 | # CONFIG_SERIAL_8250_ACPI is not set | 795 | CONFIG_SERIAL_8250_PCI=y |
795 | CONFIG_SERIAL_8250_NR_UARTS=4 | 796 | CONFIG_SERIAL_8250_NR_UARTS=4 |
796 | CONFIG_SERIAL_8250_RUNTIME_UARTS=4 | 797 | CONFIG_SERIAL_8250_RUNTIME_UARTS=4 |
797 | # CONFIG_SERIAL_8250_EXTENDED is not set | 798 | # CONFIG_SERIAL_8250_EXTENDED is not set |
@@ -921,6 +922,7 @@ CONFIG_HWMON=y | |||
921 | # Digital Video Broadcasting Devices | 922 | # Digital Video Broadcasting Devices |
922 | # | 923 | # |
923 | # CONFIG_DVB is not set | 924 | # CONFIG_DVB is not set |
925 | # CONFIG_USB_DABUSB is not set | ||
924 | 926 | ||
925 | # | 927 | # |
926 | # Graphics support | 928 | # Graphics support |
@@ -932,6 +934,8 @@ CONFIG_VIDEO_SELECT=y | |||
932 | # Console display driver support | 934 | # Console display driver support |
933 | # | 935 | # |
934 | CONFIG_VGA_CONSOLE=y | 936 | CONFIG_VGA_CONSOLE=y |
937 | CONFIG_VGACON_SOFT_SCROLLBACK=y | ||
938 | CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=256 | ||
935 | CONFIG_DUMMY_CONSOLE=y | 939 | CONFIG_DUMMY_CONSOLE=y |
936 | 940 | ||
937 | # | 941 | # |
@@ -1058,15 +1062,6 @@ CONFIG_USB_HIDINPUT=y | |||
1058 | # CONFIG_USB_MICROTEK is not set | 1062 | # CONFIG_USB_MICROTEK is not set |
1059 | 1063 | ||
1060 | # | 1064 | # |
1061 | # USB Multimedia devices | ||
1062 | # | ||
1063 | # CONFIG_USB_DABUSB is not set | ||
1064 | |||
1065 | # | ||
1066 | # Video4Linux support is needed for USB Multimedia device support | ||
1067 | # | ||
1068 | |||
1069 | # | ||
1070 | # USB Network Adapters | 1065 | # USB Network Adapters |
1071 | # | 1066 | # |
1072 | # CONFIG_USB_CATC is not set | 1067 | # CONFIG_USB_CATC is not set |
@@ -1118,9 +1113,15 @@ CONFIG_USB_MON=y | |||
1118 | # CONFIG_MMC is not set | 1113 | # CONFIG_MMC is not set |
1119 | 1114 | ||
1120 | # | 1115 | # |
1116 | # LED devices | ||
1117 | # | ||
1118 | # CONFIG_NEW_LEDS is not set | ||
1119 | |||
1120 | # | ||
1121 | # InfiniBand support | 1121 | # InfiniBand support |
1122 | # | 1122 | # |
1123 | # CONFIG_INFINIBAND is not set | 1123 | # CONFIG_INFINIBAND is not set |
1124 | # CONFIG_IPATH_CORE is not set | ||
1124 | 1125 | ||
1125 | # | 1126 | # |
1126 | # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) | 1127 | # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) |
@@ -1128,6 +1129,11 @@ CONFIG_USB_MON=y | |||
1128 | # CONFIG_EDAC is not set | 1129 | # CONFIG_EDAC is not set |
1129 | 1130 | ||
1130 | # | 1131 | # |
1132 | # Real Time Clock | ||
1133 | # | ||
1134 | # CONFIG_RTC_CLASS is not set | ||
1135 | |||
1136 | # | ||
1131 | # Firmware Drivers | 1137 | # Firmware Drivers |
1132 | # | 1138 | # |
1133 | # CONFIG_EDD is not set | 1139 | # CONFIG_EDD is not set |
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 35b2faccdc6c..5a9802676689 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <asm/vsyscall32.h> | 15 | #include <asm/vsyscall32.h> |
16 | #include <linux/linkage.h> | 16 | #include <linux/linkage.h> |
17 | 17 | ||
18 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | ||
19 | |||
18 | .macro IA32_ARG_FIXUP noebp=0 | 20 | .macro IA32_ARG_FIXUP noebp=0 |
19 | movl %edi,%r8d | 21 | movl %edi,%r8d |
20 | .if \noebp | 22 | .if \noebp |
@@ -109,8 +111,8 @@ ENTRY(ia32_sysenter_target) | |||
109 | CFI_REMEMBER_STATE | 111 | CFI_REMEMBER_STATE |
110 | jnz sysenter_tracesys | 112 | jnz sysenter_tracesys |
111 | sysenter_do_call: | 113 | sysenter_do_call: |
112 | cmpl $(IA32_NR_syscalls),%eax | 114 | cmpl $(IA32_NR_syscalls-1),%eax |
113 | jae ia32_badsys | 115 | ja ia32_badsys |
114 | IA32_ARG_FIXUP 1 | 116 | IA32_ARG_FIXUP 1 |
115 | call *ia32_sys_call_table(,%rax,8) | 117 | call *ia32_sys_call_table(,%rax,8) |
116 | movq %rax,RAX-ARGOFFSET(%rsp) | 118 | movq %rax,RAX-ARGOFFSET(%rsp) |
@@ -210,8 +212,8 @@ ENTRY(ia32_cstar_target) | |||
210 | CFI_REMEMBER_STATE | 212 | CFI_REMEMBER_STATE |
211 | jnz cstar_tracesys | 213 | jnz cstar_tracesys |
212 | cstar_do_call: | 214 | cstar_do_call: |
213 | cmpl $IA32_NR_syscalls,%eax | 215 | cmpl $IA32_NR_syscalls-1,%eax |
214 | jae ia32_badsys | 216 | ja ia32_badsys |
215 | IA32_ARG_FIXUP 1 | 217 | IA32_ARG_FIXUP 1 |
216 | call *ia32_sys_call_table(,%rax,8) | 218 | call *ia32_sys_call_table(,%rax,8) |
217 | movq %rax,RAX-ARGOFFSET(%rsp) | 219 | movq %rax,RAX-ARGOFFSET(%rsp) |
@@ -296,8 +298,8 @@ ENTRY(ia32_syscall) | |||
296 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | 298 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) |
297 | jnz ia32_tracesys | 299 | jnz ia32_tracesys |
298 | ia32_do_syscall: | 300 | ia32_do_syscall: |
299 | cmpl $(IA32_NR_syscalls),%eax | 301 | cmpl $(IA32_NR_syscalls-1),%eax |
300 | jae ia32_badsys | 302 | ja ia32_badsys |
301 | IA32_ARG_FIXUP | 303 | IA32_ARG_FIXUP |
302 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative | 304 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative |
303 | ia32_sysret: | 305 | ia32_sysret: |
@@ -685,12 +687,11 @@ ia32_sys_call_table: | |||
685 | .quad sys_readlinkat /* 305 */ | 687 | .quad sys_readlinkat /* 305 */ |
686 | .quad sys_fchmodat | 688 | .quad sys_fchmodat |
687 | .quad sys_faccessat | 689 | .quad sys_faccessat |
688 | .quad sys_ni_syscall /* pselect6 for now */ | 690 | .quad quiet_ni_syscall /* pselect6 for now */ |
689 | .quad sys_ni_syscall /* ppoll for now */ | 691 | .quad quiet_ni_syscall /* ppoll for now */ |
690 | .quad sys_unshare /* 310 */ | 692 | .quad sys_unshare /* 310 */ |
691 | .quad compat_sys_set_robust_list | 693 | .quad compat_sys_set_robust_list |
692 | .quad compat_sys_get_robust_list | 694 | .quad compat_sys_get_robust_list |
695 | .quad sys_splice | ||
696 | .quad sys_sync_file_range | ||
693 | ia32_syscall_end: | 697 | ia32_syscall_end: |
694 | .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 | ||
695 | .quad ni_syscall | ||
696 | .endr | ||
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index fffd6b0a2fab..70b9d21ed675 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c | |||
@@ -80,7 +80,7 @@ static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) | |||
80 | printk("Aperture from %s beyond 4GB. Ignoring.\n",name); | 80 | printk("Aperture from %s beyond 4GB. Ignoring.\n",name); |
81 | return 0; | 81 | return 0; |
82 | } | 82 | } |
83 | if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) { | 83 | if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { |
84 | printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); | 84 | printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); |
85 | return 0; | 85 | return 0; |
86 | } | 86 | } |
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 293cd71a266a..62776c07cff1 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -80,7 +80,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) | |||
80 | return 0; | 80 | return 0; |
81 | } | 81 | } |
82 | 82 | ||
83 | int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) | 83 | /* |
84 | * This function checks if any part of the range <start,end> is mapped | ||
85 | * with type. | ||
86 | */ | ||
87 | int __meminit | ||
88 | e820_any_mapped(unsigned long start, unsigned long end, unsigned type) | ||
84 | { | 89 | { |
85 | int i; | 90 | int i; |
86 | for (i = 0; i < e820.nr_map; i++) { | 91 | for (i = 0; i < e820.nr_map; i++) { |
@@ -94,6 +99,35 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) | |||
94 | return 0; | 99 | return 0; |
95 | } | 100 | } |
96 | 101 | ||
102 | /* | ||
103 | * This function checks if the entire range <start,end> is mapped with type. | ||
104 | * | ||
105 | * Note: this function only works correct if the e820 table is sorted and | ||
106 | * not-overlapping, which is the case | ||
107 | */ | ||
108 | int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) | ||
109 | { | ||
110 | int i; | ||
111 | for (i = 0; i < e820.nr_map; i++) { | ||
112 | struct e820entry *ei = &e820.map[i]; | ||
113 | if (type && ei->type != type) | ||
114 | continue; | ||
115 | /* is the region (part) in overlap with the current region ?*/ | ||
116 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
117 | continue; | ||
118 | |||
119 | /* if the region is at the beginning of <start,end> we move | ||
120 | * start to the end of the region since it's ok until there | ||
121 | */ | ||
122 | if (ei->addr <= start) | ||
123 | start = ei->addr + ei->size; | ||
124 | /* if start is now at or beyond end, we're done, full coverage */ | ||
125 | if (start >= end) | ||
126 | return 1; /* we're done */ | ||
127 | } | ||
128 | return 0; | ||
129 | } | ||
130 | |||
97 | /* | 131 | /* |
98 | * Find a free area in a specific range. | 132 | * Find a free area in a specific range. |
99 | */ | 133 | */ |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 8538bfea30e6..c946e4fe67a7 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -180,6 +180,10 @@ rff_trace: | |||
180 | * | 180 | * |
181 | * XXX if we had a free scratch register we could save the RSP into the stack frame | 181 | * XXX if we had a free scratch register we could save the RSP into the stack frame |
182 | * and report it properly in ps. Unfortunately we haven't. | 182 | * and report it properly in ps. Unfortunately we haven't. |
183 | * | ||
184 | * When user can change the frames always force IRET. That is because | ||
185 | * it deals with uncanonical addresses better. SYSRET has trouble | ||
186 | * with them due to bugs in both AMD and Intel CPUs. | ||
183 | */ | 187 | */ |
184 | 188 | ||
185 | ENTRY(system_call) | 189 | ENTRY(system_call) |
@@ -254,7 +258,10 @@ sysret_signal: | |||
254 | xorl %esi,%esi # oldset -> arg2 | 258 | xorl %esi,%esi # oldset -> arg2 |
255 | call ptregscall_common | 259 | call ptregscall_common |
256 | 1: movl $_TIF_NEED_RESCHED,%edi | 260 | 1: movl $_TIF_NEED_RESCHED,%edi |
257 | jmp sysret_check | 261 | /* Use IRET because user could have changed frame. This |
262 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | ||
263 | cli | ||
264 | jmp int_with_check | ||
258 | 265 | ||
259 | badsys: | 266 | badsys: |
260 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 267 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
@@ -280,7 +287,8 @@ tracesys: | |||
280 | call syscall_trace_leave | 287 | call syscall_trace_leave |
281 | RESTORE_TOP_OF_STACK %rbx | 288 | RESTORE_TOP_OF_STACK %rbx |
282 | RESTORE_REST | 289 | RESTORE_REST |
283 | jmp ret_from_sys_call | 290 | /* Use IRET because user could have changed frame */ |
291 | jmp int_ret_from_sys_call | ||
284 | CFI_ENDPROC | 292 | CFI_ENDPROC |
285 | 293 | ||
286 | /* | 294 | /* |
@@ -408,25 +416,9 @@ ENTRY(stub_execve) | |||
408 | CFI_ADJUST_CFA_OFFSET -8 | 416 | CFI_ADJUST_CFA_OFFSET -8 |
409 | CFI_REGISTER rip, r11 | 417 | CFI_REGISTER rip, r11 |
410 | SAVE_REST | 418 | SAVE_REST |
411 | movq %r11, %r15 | ||
412 | CFI_REGISTER rip, r15 | ||
413 | FIXUP_TOP_OF_STACK %r11 | 419 | FIXUP_TOP_OF_STACK %r11 |
414 | call sys_execve | 420 | call sys_execve |
415 | GET_THREAD_INFO(%rcx) | ||
416 | bt $TIF_IA32,threadinfo_flags(%rcx) | ||
417 | CFI_REMEMBER_STATE | ||
418 | jc exec_32bit | ||
419 | RESTORE_TOP_OF_STACK %r11 | 421 | RESTORE_TOP_OF_STACK %r11 |
420 | movq %r15, %r11 | ||
421 | CFI_REGISTER rip, r11 | ||
422 | RESTORE_REST | ||
423 | pushq %r11 | ||
424 | CFI_ADJUST_CFA_OFFSET 8 | ||
425 | CFI_REL_OFFSET rip, 0 | ||
426 | ret | ||
427 | |||
428 | exec_32bit: | ||
429 | CFI_RESTORE_STATE | ||
430 | movq %rax,RAX(%rsp) | 422 | movq %rax,RAX(%rsp) |
431 | RESTORE_REST | 423 | RESTORE_REST |
432 | jmp int_ret_from_sys_call | 424 | jmp int_ret_from_sys_call |
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 10b3e348fc99..6f0790e8b6d3 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #define MISC_MCELOG_MINOR 227 | 29 | #define MISC_MCELOG_MINOR 227 |
30 | #define NR_BANKS 6 | 30 | #define NR_BANKS 6 |
31 | 31 | ||
32 | atomic_t mce_entry; | ||
33 | |||
32 | static int mce_dont_init; | 34 | static int mce_dont_init; |
33 | 35 | ||
34 | /* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, | 36 | /* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, |
@@ -172,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
172 | int i; | 174 | int i; |
173 | int panicm_found = 0; | 175 | int panicm_found = 0; |
174 | 176 | ||
177 | atomic_inc(&mce_entry); | ||
178 | |||
175 | if (regs) | 179 | if (regs) |
176 | notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); | 180 | notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); |
177 | if (!banks) | 181 | if (!banks) |
178 | return; | 182 | goto out2; |
179 | 183 | ||
180 | memset(&m, 0, sizeof(struct mce)); | 184 | memset(&m, 0, sizeof(struct mce)); |
181 | m.cpu = safe_smp_processor_id(); | 185 | m.cpu = safe_smp_processor_id(); |
@@ -266,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
266 | out: | 270 | out: |
267 | /* Last thing done in the machine check exception to clear state. */ | 271 | /* Last thing done in the machine check exception to clear state. */ |
268 | wrmsrl(MSR_IA32_MCG_STATUS, 0); | 272 | wrmsrl(MSR_IA32_MCG_STATUS, 0); |
273 | out2: | ||
274 | atomic_dec(&mce_entry); | ||
269 | } | 275 | } |
270 | 276 | ||
271 | /* | 277 | /* |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index d9e4067faf05..4e6357fe0ec3 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
35 | #include <asm/kdebug.h> | 35 | #include <asm/kdebug.h> |
36 | #include <asm/local.h> | 36 | #include <asm/local.h> |
37 | #include <asm/mce.h> | ||
37 | 38 | ||
38 | /* | 39 | /* |
39 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 40 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: |
@@ -480,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
480 | __get_cpu_var(nmi_touch) = 0; | 481 | __get_cpu_var(nmi_touch) = 0; |
481 | touched = 1; | 482 | touched = 1; |
482 | } | 483 | } |
484 | #ifdef CONFIG_X86_MCE | ||
485 | /* Could check oops_in_progress here too, but it's safer | ||
486 | not too */ | ||
487 | if (atomic_read(&mce_entry) > 0) | ||
488 | touched = 1; | ||
489 | #endif | ||
483 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { | 490 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { |
484 | /* | 491 | /* |
485 | * Ayiee, looks like this CPU is stuck ... | 492 | * Ayiee, looks like this CPU is stuck ... |
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 03c9eeedb0f3..af035ede70cd 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c | |||
@@ -48,9 +48,11 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | |||
48 | { | 48 | { |
49 | struct page *page; | 49 | struct page *page; |
50 | int node; | 50 | int node; |
51 | #ifdef CONFIG_PCI | ||
51 | if (dev->bus == &pci_bus_type) | 52 | if (dev->bus == &pci_bus_type) |
52 | node = pcibus_to_node(to_pci_dev(dev)->bus); | 53 | node = pcibus_to_node(to_pci_dev(dev)->bus); |
53 | else | 54 | else |
55 | #endif | ||
54 | node = numa_node_id(); | 56 | node = numa_node_id(); |
55 | page = alloc_pages_node(node, gfp, order); | 57 | page = alloc_pages_node(node, gfp, order); |
56 | return page ? page_address(page) : NULL; | 58 | return page ? page_address(page) : NULL; |
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 70dd8e5c6889..1c44b53cb15b 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -781,10 +781,16 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
781 | } | 781 | } |
782 | case ARCH_GET_GS: { | 782 | case ARCH_GET_GS: { |
783 | unsigned long base; | 783 | unsigned long base; |
784 | unsigned gsindex; | ||
784 | if (task->thread.gsindex == GS_TLS_SEL) | 785 | if (task->thread.gsindex == GS_TLS_SEL) |
785 | base = read_32bit_tls(task, GS_TLS); | 786 | base = read_32bit_tls(task, GS_TLS); |
786 | else if (doit) | 787 | else if (doit) { |
787 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 788 | asm("movl %%gs,%0" : "=r" (gsindex)); |
789 | if (gsindex) | ||
790 | rdmsrl(MSR_KERNEL_GS_BASE, base); | ||
791 | else | ||
792 | base = task->thread.gs; | ||
793 | } | ||
788 | else | 794 | else |
789 | base = task->thread.gs; | 795 | base = task->thread.gs; |
790 | ret = put_user(base, (unsigned long __user *)addr); | 796 | ret = put_user(base, (unsigned long __user *)addr); |
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index d44b2c1e63a6..da8e7903d817 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c | |||
@@ -274,11 +274,6 @@ static int putreg(struct task_struct *child, | |||
274 | return -EIO; | 274 | return -EIO; |
275 | value &= 0xffff; | 275 | value &= 0xffff; |
276 | break; | 276 | break; |
277 | case offsetof(struct user_regs_struct, rip): | ||
278 | /* Check if the new RIP address is canonical */ | ||
279 | if (value >= TASK_SIZE_OF(child)) | ||
280 | return -EIO; | ||
281 | break; | ||
282 | } | 277 | } |
283 | put_stack_long(child, regno - sizeof(struct pt_regs), value); | 278 | put_stack_long(child, regno - sizeof(struct pt_regs), value); |
284 | return 0; | 279 | return 0; |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 0856ad444f90..c50b06765a80 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -353,8 +353,10 @@ static __init void parse_cmdline_early (char ** cmdline_p) | |||
353 | if (fullarg(from, "enable_timer_pin_1")) | 353 | if (fullarg(from, "enable_timer_pin_1")) |
354 | disable_timer_pin_1 = -1; | 354 | disable_timer_pin_1 = -1; |
355 | 355 | ||
356 | if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) | 356 | if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) { |
357 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | ||
357 | disable_apic = 1; | 358 | disable_apic = 1; |
359 | } | ||
358 | 360 | ||
359 | if (fullarg(from, "noapic")) | 361 | if (fullarg(from, "noapic")) |
360 | skip_ioapic_setup = 1; | 362 | skip_ioapic_setup = 1; |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index ef8bc46dc140..7392570f975d 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -726,7 +726,7 @@ static __init int late_hpet_init(void) | |||
726 | unsigned int ntimer; | 726 | unsigned int ntimer; |
727 | 727 | ||
728 | if (!vxtime.hpet_address) | 728 | if (!vxtime.hpet_address) |
729 | return -1; | 729 | return 0; |
730 | 730 | ||
731 | memset(&hd, 0, sizeof (hd)); | 731 | memset(&hd, 0, sizeof (hd)); |
732 | 732 | ||
@@ -917,6 +917,8 @@ void __init time_init(void) | |||
917 | vxtime.hpet_address = 0; | 917 | vxtime.hpet_address = 0; |
918 | 918 | ||
919 | if (hpet_use_timer) { | 919 | if (hpet_use_timer) { |
920 | /* set tick_nsec to use the proper rate for HPET */ | ||
921 | tick_nsec = TICK_NSEC_HPET; | ||
920 | cpu_khz = hpet_calibrate_tsc(); | 922 | cpu_khz = hpet_calibrate_tsc(); |
921 | timename = "HPET"; | 923 | timename = "HPET"; |
922 | #ifdef CONFIG_X86_PM_TIMER | 924 | #ifdef CONFIG_X86_PM_TIMER |
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 39ff0708f803..b81f473c4a19 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -65,7 +65,7 @@ SECTIONS | |||
65 | .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { | 65 | .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { |
66 | *(.data.cacheline_aligned) | 66 | *(.data.cacheline_aligned) |
67 | } | 67 | } |
68 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 68 | . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); |
69 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { | 69 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { |
70 | *(.data.read_mostly) | 70 | *(.data.read_mostly) |
71 | } | 71 | } |
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index d78f46056bda..1def21c9f7cd 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c | |||
@@ -112,7 +112,6 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); | |||
112 | #undef memcpy | 112 | #undef memcpy |
113 | #undef memset | 113 | #undef memset |
114 | #undef memmove | 114 | #undef memmove |
115 | #undef strlen | ||
116 | 115 | ||
117 | extern void * memset(void *,int,__kernel_size_t); | 116 | extern void * memset(void *,int,__kernel_size_t); |
118 | extern size_t strlen(const char *); | 117 | extern size_t strlen(const char *); |
@@ -121,8 +120,6 @@ extern void * memcpy(void *,const void *,__kernel_size_t); | |||
121 | extern void * __memcpy(void *,const void *,__kernel_size_t); | 120 | extern void * __memcpy(void *,const void *,__kernel_size_t); |
122 | 121 | ||
123 | EXPORT_SYMBOL(memset); | 122 | EXPORT_SYMBOL(memset); |
124 | EXPORT_SYMBOL(strlen); | ||
125 | EXPORT_SYMBOL(strpbrk); | ||
126 | EXPORT_SYMBOL(memmove); | 123 | EXPORT_SYMBOL(memmove); |
127 | EXPORT_SYMBOL(memcpy); | 124 | EXPORT_SYMBOL(memcpy); |
128 | EXPORT_SYMBOL(__memcpy); | 125 | EXPORT_SYMBOL(__memcpy); |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index e5f7f1c34462..4ba34e95d835 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -305,7 +305,7 @@ static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned | |||
305 | if (paddr >= end) | 305 | if (paddr >= end) |
306 | break; | 306 | break; |
307 | 307 | ||
308 | if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) { | 308 | if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) { |
309 | set_pud(pud, __pud(0)); | 309 | set_pud(pud, __pud(0)); |
310 | continue; | 310 | continue; |
311 | } | 311 | } |
@@ -507,9 +507,8 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) | |||
507 | 507 | ||
508 | /* | 508 | /* |
509 | * Memory hotplug specific functions | 509 | * Memory hotplug specific functions |
510 | * These are only for non-NUMA machines right now. | ||
511 | */ | 510 | */ |
512 | #ifdef CONFIG_MEMORY_HOTPLUG | 511 | #if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE) |
513 | 512 | ||
514 | void online_page(struct page *page) | 513 | void online_page(struct page *page) |
515 | { | 514 | { |
@@ -520,6 +519,38 @@ void online_page(struct page *page) | |||
520 | num_physpages++; | 519 | num_physpages++; |
521 | } | 520 | } |
522 | 521 | ||
522 | #ifndef CONFIG_MEMORY_HOTPLUG | ||
523 | /* | ||
524 | * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, | ||
525 | * just online the pages. | ||
526 | */ | ||
527 | int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) | ||
528 | { | ||
529 | int err = -EIO; | ||
530 | unsigned long pfn; | ||
531 | unsigned long total = 0, mem = 0; | ||
532 | for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { | ||
533 | if (pfn_valid(pfn)) { | ||
534 | online_page(pfn_to_page(pfn)); | ||
535 | err = 0; | ||
536 | mem++; | ||
537 | } | ||
538 | total++; | ||
539 | } | ||
540 | if (!err) { | ||
541 | z->spanned_pages += total; | ||
542 | z->present_pages += mem; | ||
543 | z->zone_pgdat->node_spanned_pages += total; | ||
544 | z->zone_pgdat->node_present_pages += mem; | ||
545 | } | ||
546 | return err; | ||
547 | } | ||
548 | #endif | ||
549 | |||
550 | /* | ||
551 | * Memory is added always to NORMAL zone. This means you will never get | ||
552 | * additional DMA/DMA32 memory. | ||
553 | */ | ||
523 | int add_memory(u64 start, u64 size) | 554 | int add_memory(u64 start, u64 size) |
524 | { | 555 | { |
525 | struct pglist_data *pgdat = NODE_DATA(0); | 556 | struct pglist_data *pgdat = NODE_DATA(0); |
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 4be82d6e2b48..cc02573a3271 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -100,11 +100,30 @@ int early_pfn_to_nid(unsigned long pfn) | |||
100 | } | 100 | } |
101 | #endif | 101 | #endif |
102 | 102 | ||
103 | static void * __init | ||
104 | early_node_mem(int nodeid, unsigned long start, unsigned long end, | ||
105 | unsigned long size) | ||
106 | { | ||
107 | unsigned long mem = find_e820_area(start, end, size); | ||
108 | void *ptr; | ||
109 | if (mem != -1L) | ||
110 | return __va(mem); | ||
111 | ptr = __alloc_bootmem_nopanic(size, | ||
112 | SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)); | ||
113 | if (ptr == 0) { | ||
114 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", | ||
115 | size, nodeid); | ||
116 | return NULL; | ||
117 | } | ||
118 | return ptr; | ||
119 | } | ||
120 | |||
103 | /* Initialize bootmem allocator for a node */ | 121 | /* Initialize bootmem allocator for a node */ |
104 | void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | 122 | void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
105 | { | 123 | { |
106 | unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; | 124 | unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; |
107 | unsigned long nodedata_phys; | 125 | unsigned long nodedata_phys; |
126 | void *bootmap; | ||
108 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); | 127 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); |
109 | 128 | ||
110 | start = round_up(start, ZONE_ALIGN); | 129 | start = round_up(start, ZONE_ALIGN); |
@@ -114,13 +133,11 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en | |||
114 | start_pfn = start >> PAGE_SHIFT; | 133 | start_pfn = start >> PAGE_SHIFT; |
115 | end_pfn = end >> PAGE_SHIFT; | 134 | end_pfn = end >> PAGE_SHIFT; |
116 | 135 | ||
117 | nodedata_phys = find_e820_area(start, end, pgdat_size); | 136 | node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size); |
118 | if (nodedata_phys == -1L) | 137 | if (node_data[nodeid] == NULL) |
119 | panic("Cannot find memory pgdat in node %d\n", nodeid); | 138 | return; |
120 | 139 | nodedata_phys = __pa(node_data[nodeid]); | |
121 | Dprintk("nodedata_phys %lx\n", nodedata_phys); | ||
122 | 140 | ||
123 | node_data[nodeid] = phys_to_virt(nodedata_phys); | ||
124 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 141 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
125 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; | 142 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; |
126 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 143 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
@@ -129,9 +146,15 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en | |||
129 | /* Find a place for the bootmem map */ | 146 | /* Find a place for the bootmem map */ |
130 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 147 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); |
131 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | 148 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); |
132 | bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT); | 149 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
133 | if (bootmap_start == -1L) | 150 | bootmap_pages<<PAGE_SHIFT); |
134 | panic("Not enough continuous space for bootmap on node %d", nodeid); | 151 | if (bootmap == NULL) { |
152 | if (nodedata_phys < start || nodedata_phys >= end) | ||
153 | free_bootmem((unsigned long)node_data[nodeid],pgdat_size); | ||
154 | node_data[nodeid] = NULL; | ||
155 | return; | ||
156 | } | ||
157 | bootmap_start = __pa(bootmap); | ||
135 | Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); | 158 | Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); |
136 | 159 | ||
137 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | 160 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), |
@@ -142,6 +165,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en | |||
142 | 165 | ||
143 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); | 166 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); |
144 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); | 167 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); |
168 | #ifdef CONFIG_ACPI_NUMA | ||
169 | srat_reserve_add_area(nodeid); | ||
170 | #endif | ||
145 | node_set_online(nodeid); | 171 | node_set_online(nodeid); |
146 | } | 172 | } |
147 | 173 | ||
@@ -335,6 +361,8 @@ __init int numa_setup(char *opt) | |||
335 | #ifdef CONFIG_ACPI_NUMA | 361 | #ifdef CONFIG_ACPI_NUMA |
336 | if (!strncmp(opt,"noacpi",6)) | 362 | if (!strncmp(opt,"noacpi",6)) |
337 | acpi_numa = -1; | 363 | acpi_numa = -1; |
364 | if (!strncmp(opt,"hotadd=", 7)) | ||
365 | hotadd_percent = simple_strtoul(opt+7, NULL, 10); | ||
338 | #endif | 366 | #endif |
339 | return 1; | 367 | return 1; |
340 | } | 368 | } |
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 2eb879590dc4..15ae9fcd65a7 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c | |||
@@ -15,15 +15,26 @@ | |||
15 | #include <linux/bitmap.h> | 15 | #include <linux/bitmap.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/topology.h> | 17 | #include <linux/topology.h> |
18 | #include <linux/bootmem.h> | ||
19 | #include <linux/mm.h> | ||
18 | #include <asm/proto.h> | 20 | #include <asm/proto.h> |
19 | #include <asm/numa.h> | 21 | #include <asm/numa.h> |
20 | #include <asm/e820.h> | 22 | #include <asm/e820.h> |
21 | 23 | ||
24 | #if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \ | ||
25 | defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \ | ||
26 | && !defined(CONFIG_MEMORY_HOTPLUG) | ||
27 | #define RESERVE_HOTADD 1 | ||
28 | #endif | ||
29 | |||
22 | static struct acpi_table_slit *acpi_slit; | 30 | static struct acpi_table_slit *acpi_slit; |
23 | 31 | ||
24 | static nodemask_t nodes_parsed __initdata; | 32 | static nodemask_t nodes_parsed __initdata; |
25 | static nodemask_t nodes_found __initdata; | 33 | static nodemask_t nodes_found __initdata; |
26 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 34 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
35 | static struct bootnode nodes_add[MAX_NUMNODES] __initdata; | ||
36 | static int found_add_area __initdata; | ||
37 | int hotadd_percent __initdata = 10; | ||
27 | static u8 pxm2node[256] = { [0 ... 255] = 0xff }; | 38 | static u8 pxm2node[256] = { [0 ... 255] = 0xff }; |
28 | 39 | ||
29 | /* Too small nodes confuse the VM badly. Usually they result | 40 | /* Too small nodes confuse the VM badly. Usually they result |
@@ -71,6 +82,10 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end) | |||
71 | static __init void cutoff_node(int i, unsigned long start, unsigned long end) | 82 | static __init void cutoff_node(int i, unsigned long start, unsigned long end) |
72 | { | 83 | { |
73 | struct bootnode *nd = &nodes[i]; | 84 | struct bootnode *nd = &nodes[i]; |
85 | |||
86 | if (found_add_area) | ||
87 | return; | ||
88 | |||
74 | if (nd->start < start) { | 89 | if (nd->start < start) { |
75 | nd->start = start; | 90 | nd->start = start; |
76 | if (nd->end < nd->start) | 91 | if (nd->end < nd->start) |
@@ -90,6 +105,8 @@ static __init void bad_srat(void) | |||
90 | acpi_numa = -1; | 105 | acpi_numa = -1; |
91 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 106 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
92 | apicid_to_node[i] = NUMA_NO_NODE; | 107 | apicid_to_node[i] = NUMA_NO_NODE; |
108 | for (i = 0; i < MAX_NUMNODES; i++) | ||
109 | nodes_add[i].start = nodes[i].end = 0; | ||
93 | } | 110 | } |
94 | 111 | ||
95 | static __init inline int srat_disabled(void) | 112 | static __init inline int srat_disabled(void) |
@@ -155,11 +172,114 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) | |||
155 | pxm, pa->apic_id, node); | 172 | pxm, pa->apic_id, node); |
156 | } | 173 | } |
157 | 174 | ||
175 | #ifdef RESERVE_HOTADD | ||
176 | /* | ||
177 | * Protect against too large hotadd areas that would fill up memory. | ||
178 | */ | ||
179 | static int hotadd_enough_memory(struct bootnode *nd) | ||
180 | { | ||
181 | static unsigned long allocated; | ||
182 | static unsigned long last_area_end; | ||
183 | unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT; | ||
184 | long mem = pages * sizeof(struct page); | ||
185 | unsigned long addr; | ||
186 | unsigned long allowed; | ||
187 | unsigned long oldpages = pages; | ||
188 | |||
189 | if (mem < 0) | ||
190 | return 0; | ||
191 | allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE; | ||
192 | allowed = (allowed / 100) * hotadd_percent; | ||
193 | if (allocated + mem > allowed) { | ||
194 | /* Give them at least part of their hotadd memory upto hotadd_percent | ||
195 | It would be better to spread the limit out | ||
196 | over multiple hotplug areas, but that is too complicated | ||
197 | right now */ | ||
198 | if (allocated >= allowed) | ||
199 | return 0; | ||
200 | pages = (allowed - allocated + mem) / sizeof(struct page); | ||
201 | mem = pages * sizeof(struct page); | ||
202 | nd->end = nd->start + pages*PAGE_SIZE; | ||
203 | } | ||
204 | /* Not completely fool proof, but a good sanity check */ | ||
205 | addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem); | ||
206 | if (addr == -1UL) | ||
207 | return 0; | ||
208 | if (pages != oldpages) | ||
209 | printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n", | ||
210 | pages << PAGE_SHIFT); | ||
211 | last_area_end = addr + mem; | ||
212 | allocated += mem; | ||
213 | return 1; | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * It is fine to add this area to the nodes data it will be used later | ||
218 | * This code supports one contigious hot add area per node. | ||
219 | */ | ||
220 | static int reserve_hotadd(int node, unsigned long start, unsigned long end) | ||
221 | { | ||
222 | unsigned long s_pfn = start >> PAGE_SHIFT; | ||
223 | unsigned long e_pfn = end >> PAGE_SHIFT; | ||
224 | int changed = 0; | ||
225 | struct bootnode *nd = &nodes_add[node]; | ||
226 | |||
227 | /* I had some trouble with strange memory hotadd regions breaking | ||
228 | the boot. Be very strict here and reject anything unexpected. | ||
229 | If you want working memory hotadd write correct SRATs. | ||
230 | |||
231 | The node size check is a basic sanity check to guard against | ||
232 | mistakes */ | ||
233 | if ((signed long)(end - start) < NODE_MIN_SIZE) { | ||
234 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); | ||
235 | return -1; | ||
236 | } | ||
237 | |||
238 | /* This check might be a bit too strict, but I'm keeping it for now. */ | ||
239 | if (e820_hole_size(s_pfn, e_pfn) != e_pfn - s_pfn) { | ||
240 | printk(KERN_ERR "SRAT: Hotplug area has existing memory\n"); | ||
241 | return -1; | ||
242 | } | ||
243 | |||
244 | if (!hotadd_enough_memory(&nodes_add[node])) { | ||
245 | printk(KERN_ERR "SRAT: Hotplug area too large\n"); | ||
246 | return -1; | ||
247 | } | ||
248 | |||
249 | /* Looks good */ | ||
250 | |||
251 | found_add_area = 1; | ||
252 | if (nd->start == nd->end) { | ||
253 | nd->start = start; | ||
254 | nd->end = end; | ||
255 | changed = 1; | ||
256 | } else { | ||
257 | if (nd->start == end) { | ||
258 | nd->start = start; | ||
259 | changed = 1; | ||
260 | } | ||
261 | if (nd->end == start) { | ||
262 | nd->end = end; | ||
263 | changed = 1; | ||
264 | } | ||
265 | if (!changed) | ||
266 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | ||
267 | } | ||
268 | |||
269 | if ((nd->end >> PAGE_SHIFT) > end_pfn) | ||
270 | end_pfn = nd->end >> PAGE_SHIFT; | ||
271 | |||
272 | if (changed) | ||
273 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); | ||
274 | return 0; | ||
275 | } | ||
276 | #endif | ||
277 | |||
158 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 278 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
159 | void __init | 279 | void __init |
160 | acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) | 280 | acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) |
161 | { | 281 | { |
162 | struct bootnode *nd; | 282 | struct bootnode *nd, oldnode; |
163 | unsigned long start, end; | 283 | unsigned long start, end; |
164 | int node, pxm; | 284 | int node, pxm; |
165 | int i; | 285 | int i; |
@@ -172,6 +292,8 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) | |||
172 | } | 292 | } |
173 | if (ma->flags.enabled == 0) | 293 | if (ma->flags.enabled == 0) |
174 | return; | 294 | return; |
295 | if (ma->flags.hot_pluggable && hotadd_percent == 0) | ||
296 | return; | ||
175 | start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32); | 297 | start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32); |
176 | end = start + (ma->length_lo | ((u64)ma->length_hi << 32)); | 298 | end = start + (ma->length_lo | ((u64)ma->length_hi << 32)); |
177 | pxm = ma->proximity_domain; | 299 | pxm = ma->proximity_domain; |
@@ -181,10 +303,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) | |||
181 | bad_srat(); | 303 | bad_srat(); |
182 | return; | 304 | return; |
183 | } | 305 | } |
184 | /* It is fine to add this area to the nodes data it will be used later*/ | ||
185 | if (ma->flags.hot_pluggable == 1) | ||
186 | printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n", | ||
187 | start, end); | ||
188 | i = conflicting_nodes(start, end); | 306 | i = conflicting_nodes(start, end); |
189 | if (i == node) { | 307 | if (i == node) { |
190 | printk(KERN_WARNING | 308 | printk(KERN_WARNING |
@@ -199,6 +317,7 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) | |||
199 | return; | 317 | return; |
200 | } | 318 | } |
201 | nd = &nodes[node]; | 319 | nd = &nodes[node]; |
320 | oldnode = *nd; | ||
202 | if (!node_test_and_set(node, nodes_parsed)) { | 321 | if (!node_test_and_set(node, nodes_parsed)) { |
203 | nd->start = start; | 322 | nd->start = start; |
204 | nd->end = end; | 323 | nd->end = end; |
@@ -208,8 +327,19 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) | |||
208 | if (nd->end < end) | 327 | if (nd->end < end) |
209 | nd->end = end; | 328 | nd->end = end; |
210 | } | 329 | } |
330 | |||
211 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, | 331 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, |
212 | nd->start, nd->end); | 332 | nd->start, nd->end); |
333 | |||
334 | #ifdef RESERVE_HOTADD | ||
335 | if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) { | ||
336 | /* Ignore hotadd region. Undo damage */ | ||
337 | printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); | ||
338 | *nd = oldnode; | ||
339 | if ((nd->start | nd->end) == 0) | ||
340 | node_clear(node, nodes_parsed); | ||
341 | } | ||
342 | #endif | ||
213 | } | 343 | } |
214 | 344 | ||
215 | /* Sanity check to catch more bad SRATs (they are amazingly common). | 345 | /* Sanity check to catch more bad SRATs (they are amazingly common). |
@@ -225,6 +355,9 @@ static int nodes_cover_memory(void) | |||
225 | unsigned long e = nodes[i].end >> PAGE_SHIFT; | 355 | unsigned long e = nodes[i].end >> PAGE_SHIFT; |
226 | pxmram += e - s; | 356 | pxmram += e - s; |
227 | pxmram -= e820_hole_size(s, e); | 357 | pxmram -= e820_hole_size(s, e); |
358 | pxmram -= nodes_add[i].end - nodes_add[i].start; | ||
359 | if ((long)pxmram < 0) | ||
360 | pxmram = 0; | ||
228 | } | 361 | } |
229 | 362 | ||
230 | e820ram = end_pfn - e820_hole_size(0, end_pfn); | 363 | e820ram = end_pfn - e820_hole_size(0, end_pfn); |
@@ -258,7 +391,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
258 | 391 | ||
259 | /* First clean up the node list */ | 392 | /* First clean up the node list */ |
260 | for (i = 0; i < MAX_NUMNODES; i++) { | 393 | for (i = 0; i < MAX_NUMNODES; i++) { |
261 | cutoff_node(i, start, end); | 394 | cutoff_node(i, start, end); |
262 | if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) | 395 | if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) |
263 | unparse_node(i); | 396 | unparse_node(i); |
264 | } | 397 | } |
@@ -282,6 +415,12 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
282 | /* Finally register nodes */ | 415 | /* Finally register nodes */ |
283 | for_each_node_mask(i, nodes_parsed) | 416 | for_each_node_mask(i, nodes_parsed) |
284 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 417 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
418 | /* Try again in case setup_node_bootmem missed one due | ||
419 | to missing bootmem */ | ||
420 | for_each_node_mask(i, nodes_parsed) | ||
421 | if (!node_online(i)) | ||
422 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | ||
423 | |||
285 | for (i = 0; i < NR_CPUS; i++) { | 424 | for (i = 0; i < NR_CPUS; i++) { |
286 | if (cpu_to_node[i] == NUMA_NO_NODE) | 425 | if (cpu_to_node[i] == NUMA_NO_NODE) |
287 | continue; | 426 | continue; |
@@ -303,6 +442,25 @@ static int node_to_pxm(int n) | |||
303 | return 0; | 442 | return 0; |
304 | } | 443 | } |
305 | 444 | ||
445 | void __init srat_reserve_add_area(int nodeid) | ||
446 | { | ||
447 | if (found_add_area && nodes_add[nodeid].end) { | ||
448 | u64 total_mb; | ||
449 | |||
450 | printk(KERN_INFO "SRAT: Reserving hot-add memory space " | ||
451 | "for node %d at %Lx-%Lx\n", | ||
452 | nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); | ||
453 | total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) | ||
454 | >> PAGE_SHIFT; | ||
455 | total_mb *= sizeof(struct page); | ||
456 | total_mb >>= 20; | ||
457 | printk(KERN_INFO "SRAT: This will cost you %Lu MB of " | ||
458 | "pre-allocated memory.\n", (unsigned long long)total_mb); | ||
459 | reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, | ||
460 | nodes_add[nodeid].end - nodes_add[nodeid].start); | ||
461 | } | ||
462 | } | ||
463 | |||
306 | int __node_distance(int a, int b) | 464 | int __node_distance(int a, int b) |
307 | { | 465 | { |
308 | int index; | 466 | int index; |
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index e616500207e4..a2060e4d5de6 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c | |||
@@ -9,11 +9,16 @@ | |||
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | #include <linux/acpi.h> | 10 | #include <linux/acpi.h> |
11 | #include <linux/bitmap.h> | 11 | #include <linux/bitmap.h> |
12 | #include <asm/e820.h> | ||
13 | |||
12 | #include "pci.h" | 14 | #include "pci.h" |
13 | 15 | ||
14 | #define MMCONFIG_APER_SIZE (256*1024*1024) | 16 | #define MMCONFIG_APER_SIZE (256*1024*1024) |
17 | /* Verify the first 16 busses. We assume that systems with more busses | ||
18 | get MCFG right. */ | ||
19 | #define MAX_CHECK_BUS 16 | ||
15 | 20 | ||
16 | static DECLARE_BITMAP(fallback_slots, 32); | 21 | static DECLARE_BITMAP(fallback_slots, 32*MAX_CHECK_BUS); |
17 | 22 | ||
18 | /* Static virtual mapping of the MMCONFIG aperture */ | 23 | /* Static virtual mapping of the MMCONFIG aperture */ |
19 | struct mmcfg_virt { | 24 | struct mmcfg_virt { |
@@ -55,7 +60,8 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus) | |||
55 | static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) | 60 | static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) |
56 | { | 61 | { |
57 | char __iomem *addr; | 62 | char __iomem *addr; |
58 | if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), fallback_slots)) | 63 | if (seg == 0 && bus < MAX_CHECK_BUS && |
64 | test_bit(32*bus + PCI_SLOT(devfn), fallback_slots)) | ||
59 | return NULL; | 65 | return NULL; |
60 | addr = get_virt(seg, bus); | 66 | addr = get_virt(seg, bus); |
61 | if (!addr) | 67 | if (!addr) |
@@ -69,8 +75,10 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus, | |||
69 | char __iomem *addr; | 75 | char __iomem *addr; |
70 | 76 | ||
71 | /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ | 77 | /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ |
72 | if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) | 78 | if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) { |
79 | *value = -1; | ||
73 | return -EINVAL; | 80 | return -EINVAL; |
81 | } | ||
74 | 82 | ||
75 | addr = pci_dev_base(seg, bus, devfn); | 83 | addr = pci_dev_base(seg, bus, devfn); |
76 | if (!addr) | 84 | if (!addr) |
@@ -129,21 +137,26 @@ static struct pci_raw_ops pci_mmcfg = { | |||
129 | Normally this can be expressed in the MCFG by not listing them | 137 | Normally this can be expressed in the MCFG by not listing them |
130 | and assigning suitable _SEGs, but this isn't implemented in some BIOS. | 138 | and assigning suitable _SEGs, but this isn't implemented in some BIOS. |
131 | Instead try to discover all devices on bus 0 that are unreachable using MM | 139 | Instead try to discover all devices on bus 0 that are unreachable using MM |
132 | and fallback for them. | 140 | and fallback for them. */ |
133 | We only do this for bus 0/seg 0 */ | ||
134 | static __init void unreachable_devices(void) | 141 | static __init void unreachable_devices(void) |
135 | { | 142 | { |
136 | int i; | 143 | int i, k; |
137 | for (i = 0; i < 32; i++) { | 144 | /* Use the max bus number from ACPI here? */ |
138 | u32 val1; | 145 | for (k = 0; k < MAX_CHECK_BUS; k++) { |
139 | char __iomem *addr; | 146 | for (i = 0; i < 32; i++) { |
140 | 147 | u32 val1; | |
141 | pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1); | 148 | char __iomem *addr; |
142 | if (val1 == 0xffffffff) | 149 | |
143 | continue; | 150 | pci_conf1_read(0, k, PCI_DEVFN(i,0), 0, 4, &val1); |
144 | addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0)); | 151 | if (val1 == 0xffffffff) |
145 | if (addr == NULL|| readl(addr) != val1) { | 152 | continue; |
146 | set_bit(i, fallback_slots); | 153 | addr = pci_dev_base(0, k, PCI_DEVFN(i, 0)); |
154 | if (addr == NULL|| readl(addr) != val1) { | ||
155 | set_bit(i + 32*k, fallback_slots); | ||
156 | printk(KERN_NOTICE | ||
157 | "PCI: No mmconfig possible on device %x:%x\n", | ||
158 | k, i); | ||
159 | } | ||
147 | } | 160 | } |
148 | } | 161 | } |
149 | } | 162 | } |
@@ -161,6 +174,14 @@ void __init pci_mmcfg_init(void) | |||
161 | (pci_mmcfg_config[0].base_address == 0)) | 174 | (pci_mmcfg_config[0].base_address == 0)) |
162 | return; | 175 | return; |
163 | 176 | ||
177 | if (!e820_all_mapped(pci_mmcfg_config[0].base_address, | ||
178 | pci_mmcfg_config[0].base_address + MMCONFIG_APER_SIZE, | ||
179 | E820_RESERVED)) { | ||
180 | printk(KERN_ERR "PCI: BIOS Bug: MCFG area is not E820-reserved\n"); | ||
181 | printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); | ||
182 | return; | ||
183 | } | ||
184 | |||
164 | /* RED-PEN i386 doesn't do _nocache right now */ | 185 | /* RED-PEN i386 doesn't do _nocache right now */ |
165 | pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); | 186 | pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); |
166 | if (pci_mmcfg_virt == NULL) { | 187 | if (pci_mmcfg_virt == NULL) { |