aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/defconfig19
-rw-r--r--arch/x86_64/ia32/Makefile4
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c4
-rw-r--r--arch/x86_64/ia32/ia32entry.S2
-rw-r--r--arch/x86_64/kernel/Makefile4
-rw-r--r--arch/x86_64/kernel/e820.c8
-rw-r--r--arch/x86_64/kernel/entry.S7
-rw-r--r--arch/x86_64/kernel/io_apic.c35
-rw-r--r--arch/x86_64/kernel/kprobes.c16
-rw-r--r--arch/x86_64/kernel/mce.c2
-rw-r--r--arch/x86_64/kernel/mce_amd.c2
-rw-r--r--arch/x86_64/kernel/mpparse.c12
-rw-r--r--arch/x86_64/kernel/pci-dma.c4
-rw-r--r--arch/x86_64/kernel/pci-gart.c18
-rw-r--r--arch/x86_64/kernel/pci-nommu.c7
-rw-r--r--arch/x86_64/kernel/pmtimer.c2
-rw-r--r--arch/x86_64/kernel/process.c4
-rw-r--r--arch/x86_64/kernel/ptrace.c6
-rw-r--r--arch/x86_64/kernel/setup.c55
-rw-r--r--arch/x86_64/kernel/traps.c38
-rw-r--r--arch/x86_64/mm/numa.c2
-rw-r--r--arch/x86_64/mm/srat.c19
22 files changed, 199 insertions, 71 deletions
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 3c45ec22b3fe..69db0c0721d1 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.17-rc1 3# Linux kernel version: 2.6.17-rc1-git11
4# Mon Apr 3 16:11:14 2006 4# Sun Apr 16 07:22:36 2006
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -57,6 +57,7 @@ CONFIG_FUTEX=y
57CONFIG_EPOLL=y 57CONFIG_EPOLL=y
58CONFIG_SHMEM=y 58CONFIG_SHMEM=y
59CONFIG_SLAB=y 59CONFIG_SLAB=y
60CONFIG_DOUBLEFAULT=y
60# CONFIG_TINY_SHMEM is not set 61# CONFIG_TINY_SHMEM is not set
61CONFIG_BASE_SMALL=0 62CONFIG_BASE_SMALL=0
62# CONFIG_SLOB is not set 63# CONFIG_SLOB is not set
@@ -121,6 +122,7 @@ CONFIG_PREEMPT_VOLUNTARY=y
121CONFIG_PREEMPT_BKL=y 122CONFIG_PREEMPT_BKL=y
122CONFIG_NUMA=y 123CONFIG_NUMA=y
123CONFIG_K8_NUMA=y 124CONFIG_K8_NUMA=y
125CONFIG_NODES_SHIFT=6
124CONFIG_X86_64_ACPI_NUMA=y 126CONFIG_X86_64_ACPI_NUMA=y
125CONFIG_NUMA_EMU=y 127CONFIG_NUMA_EMU=y
126CONFIG_ARCH_DISCONTIGMEM_ENABLE=y 128CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
@@ -544,7 +546,6 @@ CONFIG_SCSI_SATA_INTEL_COMBINED=y
544# CONFIG_SCSI_INIA100 is not set 546# CONFIG_SCSI_INIA100 is not set
545# CONFIG_SCSI_SYM53C8XX_2 is not set 547# CONFIG_SCSI_SYM53C8XX_2 is not set
546# CONFIG_SCSI_IPR is not set 548# CONFIG_SCSI_IPR is not set
547# CONFIG_SCSI_QLOGIC_FC is not set
548# CONFIG_SCSI_QLOGIC_1280 is not set 549# CONFIG_SCSI_QLOGIC_1280 is not set
549# CONFIG_SCSI_QLA_FC is not set 550# CONFIG_SCSI_QLA_FC is not set
550# CONFIG_SCSI_LPFC is not set 551# CONFIG_SCSI_LPFC is not set
@@ -1045,9 +1046,7 @@ CONFIG_USB_HIDINPUT=y
1045# CONFIG_USB_ACECAD is not set 1046# CONFIG_USB_ACECAD is not set
1046# CONFIG_USB_KBTAB is not set 1047# CONFIG_USB_KBTAB is not set
1047# CONFIG_USB_POWERMATE is not set 1048# CONFIG_USB_POWERMATE is not set
1048# CONFIG_USB_MTOUCH is not set 1049# CONFIG_USB_TOUCHSCREEN is not set
1049# CONFIG_USB_ITMTOUCH is not set
1050# CONFIG_USB_EGALAX is not set
1051# CONFIG_USB_YEALINK is not set 1050# CONFIG_USB_YEALINK is not set
1052# CONFIG_USB_XPAD is not set 1051# CONFIG_USB_XPAD is not set
1053# CONFIG_USB_ATI_REMOTE is not set 1052# CONFIG_USB_ATI_REMOTE is not set
@@ -1118,6 +1117,14 @@ CONFIG_USB_MON=y
1118# CONFIG_NEW_LEDS is not set 1117# CONFIG_NEW_LEDS is not set
1119 1118
1120# 1119#
1120# LED drivers
1121#
1122
1123#
1124# LED Triggers
1125#
1126
1127#
1121# InfiniBand support 1128# InfiniBand support
1122# 1129#
1123# CONFIG_INFINIBAND is not set 1130# CONFIG_INFINIBAND is not set
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile
index 929e6b0771f8..e9263b4975e0 100644
--- a/arch/x86_64/ia32/Makefile
+++ b/arch/x86_64/ia32/Makefile
@@ -27,5 +27,5 @@ $(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
27$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE 27$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
28 $(call if_changed,syscall) 28 $(call if_changed,syscall)
29 29
30AFLAGS_vsyscall-sysenter.o = -m32 30AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32
31AFLAGS_vsyscall-syscall.o = -m32 31AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index e776139afb20..926c4743d13b 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -339,7 +339,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
339 struct mm_struct *mm = current->mm; 339 struct mm_struct *mm = current->mm;
340 int i, ret; 340 int i, ret;
341 341
342 stack_base = IA32_STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE; 342 stack_base = stack_top - MAX_ARG_PAGES * PAGE_SIZE;
343 mm->arg_start = bprm->p + stack_base; 343 mm->arg_start = bprm->p + stack_base;
344 344
345 bprm->p += stack_base; 345 bprm->p += stack_base;
@@ -357,7 +357,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
357 { 357 {
358 mpnt->vm_mm = mm; 358 mpnt->vm_mm = mm;
359 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; 359 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
360 mpnt->vm_end = IA32_STACK_TOP; 360 mpnt->vm_end = stack_top;
361 if (executable_stack == EXSTACK_ENABLE_X) 361 if (executable_stack == EXSTACK_ENABLE_X)
362 mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; 362 mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
363 else if (executable_stack == EXSTACK_DISABLE_X) 363 else if (executable_stack == EXSTACK_DISABLE_X)
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 5a9802676689..5a92fed2d1d5 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -694,4 +694,6 @@ ia32_sys_call_table:
694 .quad compat_sys_get_robust_list 694 .quad compat_sys_get_robust_list
695 .quad sys_splice 695 .quad sys_splice
696 .quad sys_sync_file_range 696 .quad sys_sync_file_range
697 .quad sys_tee
698 .quad compat_sys_vmsplice
697ia32_syscall_end: 699ia32_syscall_end:
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index a098a11e7755..059c88313f4e 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 dmi_scan.o pci-dma.o pci-nommu.o 11 pci-dma.o pci-nommu.o
12 12
13obj-$(CONFIG_X86_MCE) += mce.o 13obj-$(CONFIG_X86_MCE) += mce.o
14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -49,5 +49,3 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
49quirks-y += ../../i386/kernel/quirks.o 49quirks-y += ../../i386/kernel/quirks.o
50i8237-y += ../../i386/kernel/i8237.o 50i8237-y += ../../i386/kernel/i8237.o
51msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o 51msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
52dmi_scan-y += ../../i386/kernel/dmi_scan.o
53
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 62776c07cff1..1ef6028f721e 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -76,6 +76,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
76 *addrp = __pa_symbol(&_end); 76 *addrp = __pa_symbol(&_end);
77 return 1; 77 return 1;
78 } 78 }
79
80 if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
81 *addrp = ebda_addr + ebda_size;
82 return 1;
83 }
84
79 /* XXX ramdisk image here? */ 85 /* XXX ramdisk image here? */
80 return 0; 86 return 0;
81} 87}
@@ -143,7 +149,7 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi
143 addr = start; 149 addr = start;
144 if (addr > ei->addr + ei->size) 150 if (addr > ei->addr + ei->size)
145 continue; 151 continue;
146 while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size) 152 while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
147 ; 153 ;
148 last = addr + size; 154 last = addr + size;
149 if (last > ei->addr + ei->size) 155 if (last > ei->addr + ei->size)
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index c946e4fe67a7..586b34c00c48 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -281,12 +281,7 @@ tracesys:
281 ja 1f 281 ja 1f
282 movq %r10,%rcx /* fixup for C */ 282 movq %r10,%rcx /* fixup for C */
283 call *sys_call_table(,%rax,8) 283 call *sys_call_table(,%rax,8)
284 movq %rax,RAX-ARGOFFSET(%rsp) 2841: movq %rax,RAX-ARGOFFSET(%rsp)
2851: SAVE_REST
286 movq %rsp,%rdi
287 call syscall_trace_leave
288 RESTORE_TOP_OF_STACK %rbx
289 RESTORE_REST
290 /* Use IRET because user could have changed frame */ 285 /* Use IRET because user could have changed frame */
291 jmp int_ret_from_sys_call 286 jmp int_ret_from_sys_call
292 CFI_ENDPROC 287 CFI_ENDPROC
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 77b4c608cca0..9cc7031b7151 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -271,6 +271,18 @@ __setup("enable_8254_timer", setup_enable_8254_timer);
271#include <linux/pci_ids.h> 271#include <linux/pci_ids.h>
272#include <linux/pci.h> 272#include <linux/pci.h>
273 273
274
275#ifdef CONFIG_ACPI
276
277static int nvidia_hpet_detected __initdata;
278
279static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
280{
281 nvidia_hpet_detected = 1;
282 return 0;
283}
284#endif
285
274/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC 286/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
275 off. Check for an Nvidia or VIA PCI bridge and turn it off. 287 off. Check for an Nvidia or VIA PCI bridge and turn it off.
276 Use pci direct infrastructure because this runs before the PCI subsystem. 288 Use pci direct infrastructure because this runs before the PCI subsystem.
@@ -317,11 +329,19 @@ void __init check_ioapic(void)
317 return; 329 return;
318 case PCI_VENDOR_ID_NVIDIA: 330 case PCI_VENDOR_ID_NVIDIA:
319#ifdef CONFIG_ACPI 331#ifdef CONFIG_ACPI
320 /* All timer overrides on Nvidia 332 /*
321 seem to be wrong. Skip them. */ 333 * All timer overrides on Nvidia are
322 acpi_skip_timer_override = 1; 334 * wrong unless HPET is enabled.
323 printk(KERN_INFO 335 */
324 "Nvidia board detected. Ignoring ACPI timer override.\n"); 336 nvidia_hpet_detected = 0;
337 acpi_table_parse(ACPI_HPET,
338 nvidia_hpet_check);
339 if (nvidia_hpet_detected == 0) {
340 acpi_skip_timer_override = 1;
341 printk(KERN_INFO "Nvidia board "
342 "detected. Ignoring ACPI "
343 "timer override.\n");
344 }
325#endif 345#endif
326 /* RED-PEN skip them on mptables too? */ 346 /* RED-PEN skip them on mptables too? */
327 return; 347 return;
@@ -1777,6 +1797,8 @@ static inline void unlock_ExtINT_logic(void)
1777 spin_unlock_irqrestore(&ioapic_lock, flags); 1797 spin_unlock_irqrestore(&ioapic_lock, flags);
1778} 1798}
1779 1799
1800int timer_uses_ioapic_pin_0;
1801
1780/* 1802/*
1781 * This code may look a bit paranoid, but it's supposed to cooperate with 1803 * This code may look a bit paranoid, but it's supposed to cooperate with
1782 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 1804 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
@@ -1814,6 +1836,9 @@ static inline void check_timer(void)
1814 pin2 = ioapic_i8259.pin; 1836 pin2 = ioapic_i8259.pin;
1815 apic2 = ioapic_i8259.apic; 1837 apic2 = ioapic_i8259.apic;
1816 1838
1839 if (pin1 == 0)
1840 timer_uses_ioapic_pin_0 = 1;
1841
1817 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 1842 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1818 vector, apic1, pin1, apic2, pin2); 1843 vector, apic1, pin1, apic2, pin2);
1819 1844
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index accbff3fec49..fa1d19ca700a 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -53,7 +53,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
53/* 53/*
54 * returns non-zero if opcode modifies the interrupt flag. 54 * returns non-zero if opcode modifies the interrupt flag.
55 */ 55 */
56static inline int is_IF_modifier(kprobe_opcode_t *insn) 56static __always_inline int is_IF_modifier(kprobe_opcode_t *insn)
57{ 57{
58 switch (*insn) { 58 switch (*insn) {
59 case 0xfa: /* cli */ 59 case 0xfa: /* cli */
@@ -84,7 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
84 * If it does, return the address of the 32-bit displacement word. 84 * If it does, return the address of the 32-bit displacement word.
85 * If not, return null. 85 * If not, return null.
86 */ 86 */
87static inline s32 *is_riprel(u8 *insn) 87static s32 __kprobes *is_riprel(u8 *insn)
88{ 88{
89#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ 89#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
90 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ 90 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -229,7 +229,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
229 mutex_unlock(&kprobe_mutex); 229 mutex_unlock(&kprobe_mutex);
230} 230}
231 231
232static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) 232static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
233{ 233{
234 kcb->prev_kprobe.kp = kprobe_running(); 234 kcb->prev_kprobe.kp = kprobe_running();
235 kcb->prev_kprobe.status = kcb->kprobe_status; 235 kcb->prev_kprobe.status = kcb->kprobe_status;
@@ -237,7 +237,7 @@ static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
237 kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; 237 kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags;
238} 238}
239 239
240static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) 240static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
241{ 241{
242 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 242 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
243 kcb->kprobe_status = kcb->prev_kprobe.status; 243 kcb->kprobe_status = kcb->prev_kprobe.status;
@@ -245,7 +245,7 @@ static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
245 kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; 245 kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags;
246} 246}
247 247
248static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 248static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
249 struct kprobe_ctlblk *kcb) 249 struct kprobe_ctlblk *kcb)
250{ 250{
251 __get_cpu_var(current_kprobe) = p; 251 __get_cpu_var(current_kprobe) = p;
@@ -514,13 +514,13 @@ static void __kprobes resume_execution(struct kprobe *p,
514 *tos = orig_rip + (*tos - copy_rip); 514 *tos = orig_rip + (*tos - copy_rip);
515 break; 515 break;
516 case 0xff: 516 case 0xff:
517 if ((*insn & 0x30) == 0x10) { 517 if ((insn[1] & 0x30) == 0x10) {
518 /* call absolute, indirect */ 518 /* call absolute, indirect */
519 /* Fix return addr; rip is correct. */ 519 /* Fix return addr; rip is correct. */
520 next_rip = regs->rip; 520 next_rip = regs->rip;
521 *tos = orig_rip + (*tos - copy_rip); 521 *tos = orig_rip + (*tos - copy_rip);
522 } else if (((*insn & 0x31) == 0x20) || /* jmp near, absolute indirect */ 522 } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
523 ((*insn & 0x31) == 0x21)) { /* jmp far, absolute indirect */ 523 ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */
524 /* rip is correct. */ 524 /* rip is correct. */
525 next_rip = regs->rip; 525 next_rip = regs->rip;
526 } 526 }
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 6f0790e8b6d3..c69fc43cee7b 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -629,7 +629,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
629#endif 629#endif
630 630
631/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 631/* Get notified when a cpu comes on/off. Be hotplug friendly. */
632static __cpuinit int 632static int
633mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 633mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
634{ 634{
635 unsigned int cpu = (unsigned long)hcpu; 635 unsigned int cpu = (unsigned long)hcpu;
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index d3ad7d81266d..d13b241ad094 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -482,7 +482,7 @@ static void threshold_remove_device(unsigned int cpu)
482#endif 482#endif
483 483
484/* get notified when a cpu comes on/off */ 484/* get notified when a cpu comes on/off */
485static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, 485static int threshold_cpu_callback(struct notifier_block *nfb,
486 unsigned long action, void *hcpu) 486 unsigned long action, void *hcpu)
487{ 487{
488 /* cpu was unsigned int to begin with */ 488 /* cpu was unsigned int to begin with */
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index b17cf3eba359..083da7e606b1 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -968,7 +968,17 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
968 */ 968 */
969 int irq = gsi; 969 int irq = gsi;
970 if (gsi < MAX_GSI_NUM) { 970 if (gsi < MAX_GSI_NUM) {
971 if (gsi > 15) 971 /*
972 * Retain the VIA chipset work-around (gsi > 15), but
973 * avoid a problem where the 8254 timer (IRQ0) is setup
974 * via an override (so it's not on pin 0 of the ioapic),
975 * and at the same time, the pin 0 interrupt is a PCI
976 * type. The gsi > 15 test could cause these two pins
977 * to be shared as IRQ0, and they are not shareable.
978 * So test for this condition, and if necessary, avoid
979 * the pin collision.
980 */
981 if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
972 gsi = pci_irq++; 982 gsi = pci_irq++;
973 /* 983 /*
974 * Don't assign IRQ used by ACPI SCI 984 * Don't assign IRQ used by ACPI SCI
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index af035ede70cd..a9275c9557cf 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -54,6 +54,10 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
54 else 54 else
55#endif 55#endif
56 node = numa_node_id(); 56 node = numa_node_id();
57
58 if (node < first_node(node_online_map))
59 node = first_node(node_online_map);
60
57 page = alloc_pages_node(node, gfp, order); 61 page = alloc_pages_node(node, gfp, order);
58 return page ? page_address(page) : NULL; 62 return page ? page_address(page) : NULL;
59} 63}
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index a6c01e121266..82a7c9bfdfa0 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -112,10 +112,6 @@ static unsigned long alloc_iommu(int size)
112static void free_iommu(unsigned long offset, int size) 112static void free_iommu(unsigned long offset, int size)
113{ 113{
114 unsigned long flags; 114 unsigned long flags;
115 if (size == 1) {
116 clear_bit(offset, iommu_gart_bitmap);
117 return;
118 }
119 spin_lock_irqsave(&iommu_bitmap_lock, flags); 115 spin_lock_irqsave(&iommu_bitmap_lock, flags);
120 __clear_bit_string(iommu_gart_bitmap, offset, size); 116 __clear_bit_string(iommu_gart_bitmap, offset, size);
121 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 117 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -635,14 +631,20 @@ static int __init pci_iommu_init(void)
635 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); 631 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
636 if (end_pfn > MAX_DMA32_PFN) { 632 if (end_pfn > MAX_DMA32_PFN) {
637 printk(KERN_ERR "WARNING more than 4GB of memory " 633 printk(KERN_ERR "WARNING more than 4GB of memory "
638 "but IOMMU not compiled in.\n" 634 "but IOMMU not available.\n"
639 KERN_ERR "WARNING 32bit PCI may malfunction.\n" 635 KERN_ERR "WARNING 32bit PCI may malfunction.\n");
640 KERN_ERR "You might want to enable "
641 "CONFIG_GART_IOMMU\n");
642 } 636 }
643 return -1; 637 return -1;
644 } 638 }
645 639
640 i = 0;
641 for_all_nb(dev)
642 i++;
643 if (i > MAX_NB) {
644 printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
645 return -1;
646 }
647
646 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); 648 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
647 aper_size = info.aper_size * 1024 * 1024; 649 aper_size = info.aper_size * 1024 * 1024;
648 iommu_size = check_iommu_size(info.aper_base, aper_size); 650 iommu_size = check_iommu_size(info.aper_base, aper_size);
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 44adcc2d5e5b..1f6ecc62061d 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -12,9 +12,10 @@ static int
12check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) 12check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
13{ 13{
14 if (hwdev && bus + size > *hwdev->dma_mask) { 14 if (hwdev && bus + size > *hwdev->dma_mask) {
15 printk(KERN_ERR 15 if (*hwdev->dma_mask >= 0xffffffffULL)
16 "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", 16 printk(KERN_ERR
17 name, (long long)bus, size, (long long)*hwdev->dma_mask); 17 "nommu_%s: overflow %Lx+%lu of device mask %Lx\n",
18 name, (long long)bus, size, (long long)*hwdev->dma_mask);
18 return 0; 19 return 0;
19 } 20 }
20 return 1; 21 return 1;
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
index b0444a415bd6..bf421ed26808 100644
--- a/arch/x86_64/kernel/pmtimer.c
+++ b/arch/x86_64/kernel/pmtimer.c
@@ -68,7 +68,7 @@ int pmtimer_mark_offset(void)
68 offset_delay = delta % (USEC_PER_SEC / HZ); 68 offset_delay = delta % (USEC_PER_SEC / HZ);
69 69
70 rdtscll(tsc); 70 rdtscll(tsc);
71 vxtime.last_tsc = tsc - offset_delay * cpu_khz; 71 vxtime.last_tsc = tsc - offset_delay * (u64)cpu_khz / 1000;
72 72
73 /* don't calculate delay for first run, 73 /* don't calculate delay for first run,
74 or if we've got less then a tick */ 74 or if we've got less then a tick */
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 1c44b53cb15b..fb903e65e079 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -575,8 +575,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
575 prev->userrsp = read_pda(oldrsp); 575 prev->userrsp = read_pda(oldrsp);
576 write_pda(oldrsp, next->userrsp); 576 write_pda(oldrsp, next->userrsp);
577 write_pda(pcurrent, next_p); 577 write_pda(pcurrent, next_p);
578
578 /* This must be here to ensure both math_state_restore() and 579 /* This must be here to ensure both math_state_restore() and
579 kernel_fpu_begin() work consistently. */ 580 kernel_fpu_begin() work consistently.
581 And the AMD workaround requires it to be after DS reload. */
580 unlazy_fpu(prev_p); 582 unlazy_fpu(prev_p);
581 write_pda(kernelstack, 583 write_pda(kernelstack,
582 task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); 584 task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index da8e7903d817..2d50024c9f30 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -600,12 +600,12 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
600 600
601 if (unlikely(current->audit_context)) { 601 if (unlikely(current->audit_context)) {
602 if (test_thread_flag(TIF_IA32)) { 602 if (test_thread_flag(TIF_IA32)) {
603 audit_syscall_entry(current, AUDIT_ARCH_I386, 603 audit_syscall_entry(AUDIT_ARCH_I386,
604 regs->orig_rax, 604 regs->orig_rax,
605 regs->rbx, regs->rcx, 605 regs->rbx, regs->rcx,
606 regs->rdx, regs->rsi); 606 regs->rdx, regs->rsi);
607 } else { 607 } else {
608 audit_syscall_entry(current, AUDIT_ARCH_X86_64, 608 audit_syscall_entry(AUDIT_ARCH_X86_64,
609 regs->orig_rax, 609 regs->orig_rax,
610 regs->rdi, regs->rsi, 610 regs->rdi, regs->rsi,
611 regs->rdx, regs->r10); 611 regs->rdx, regs->r10);
@@ -616,7 +616,7 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
616asmlinkage void syscall_trace_leave(struct pt_regs *regs) 616asmlinkage void syscall_trace_leave(struct pt_regs *regs)
617{ 617{
618 if (unlikely(current->audit_context)) 618 if (unlikely(current->audit_context))
619 audit_syscall_exit(current, AUDITSC_RESULT(regs->rax), regs->rax); 619 audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax);
620 620
621 if ((test_thread_flag(TIF_SYSCALL_TRACE) 621 if ((test_thread_flag(TIF_SYSCALL_TRACE)
622 || test_thread_flag(TIF_SINGLESTEP)) 622 || test_thread_flag(TIF_SINGLESTEP))
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index c50b06765a80..655b9192eeb3 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -571,17 +571,28 @@ static inline void copy_edd(void)
571#endif 571#endif
572 572
573#define EBDA_ADDR_POINTER 0x40E 573#define EBDA_ADDR_POINTER 0x40E
574static void __init reserve_ebda_region(void) 574
575unsigned __initdata ebda_addr;
576unsigned __initdata ebda_size;
577
578static void discover_ebda(void)
575{ 579{
576 unsigned int addr; 580 /*
577 /**
578 * there is a real-mode segmented pointer pointing to the 581 * there is a real-mode segmented pointer pointing to the
579 * 4K EBDA area at 0x40E 582 * 4K EBDA area at 0x40E
580 */ 583 */
581 addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER); 584 ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
582 addr <<= 4; 585 ebda_addr <<= 4;
583 if (addr) 586
584 reserve_bootmem_generic(addr, PAGE_SIZE); 587 ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
588
589 /* Round EBDA up to pages */
590 if (ebda_size == 0)
591 ebda_size = 1;
592 ebda_size <<= 10;
593 ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
594 if (ebda_size > 64*1024)
595 ebda_size = 64*1024;
585} 596}
586 597
587void __init setup_arch(char **cmdline_p) 598void __init setup_arch(char **cmdline_p)
@@ -627,6 +638,8 @@ void __init setup_arch(char **cmdline_p)
627 638
628 check_efer(); 639 check_efer();
629 640
641 discover_ebda();
642
630 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); 643 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
631 644
632 dmi_scan_machine(); 645 dmi_scan_machine();
@@ -669,7 +682,8 @@ void __init setup_arch(char **cmdline_p)
669 reserve_bootmem_generic(0, PAGE_SIZE); 682 reserve_bootmem_generic(0, PAGE_SIZE);
670 683
671 /* reserve ebda region */ 684 /* reserve ebda region */
672 reserve_ebda_region(); 685 if (ebda_addr)
686 reserve_bootmem_generic(ebda_addr, ebda_size);
673 687
674#ifdef CONFIG_SMP 688#ifdef CONFIG_SMP
675 /* 689 /*
@@ -930,6 +944,10 @@ static int __init init_amd(struct cpuinfo_x86 *c)
930 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) 944 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
931 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); 945 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
932 946
947 /* Enable workaround for FXSAVE leak */
948 if (c->x86 >= 6)
949 set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
950
933 r = get_model_name(c); 951 r = get_model_name(c);
934 if (!r) { 952 if (!r) {
935 switch (c->x86) { 953 switch (c->x86) {
@@ -1033,7 +1051,7 @@ static void srat_detect_node(void)
1033 for now. */ 1051 for now. */
1034 node = apicid_to_node[hard_smp_processor_id()]; 1052 node = apicid_to_node[hard_smp_processor_id()];
1035 if (node == NUMA_NO_NODE) 1053 if (node == NUMA_NO_NODE)
1036 node = 0; 1054 node = first_node(node_online_map);
1037 numa_set_node(cpu, node); 1055 numa_set_node(cpu, node);
1038 1056
1039 if (acpi_numa > 0) 1057 if (acpi_numa > 0)
@@ -1422,3 +1440,22 @@ struct seq_operations cpuinfo_op = {
1422 .show = show_cpuinfo, 1440 .show = show_cpuinfo,
1423}; 1441};
1424 1442
1443#ifdef CONFIG_INPUT_PCSPKR
1444#include <linux/platform_device.h>
1445static __init int add_pcspkr(void)
1446{
1447 struct platform_device *pd;
1448 int ret;
1449
1450 pd = platform_device_alloc("pcspkr", -1);
1451 if (!pd)
1452 return -ENOMEM;
1453
1454 ret = platform_device_add(pd);
1455 if (ret)
1456 platform_device_put(pd);
1457
1458 return ret;
1459}
1460device_initcall(add_pcspkr);
1461#endif
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 6bda322d3caf..cea335e8746c 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -30,6 +30,7 @@
30#include <linux/moduleparam.h> 30#include <linux/moduleparam.h>
31#include <linux/nmi.h> 31#include <linux/nmi.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/kexec.h>
33 34
34#include <asm/system.h> 35#include <asm/system.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
@@ -101,6 +102,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
101{ 102{
102 if (regs->eflags & X86_EFLAGS_IF) 103 if (regs->eflags & X86_EFLAGS_IF)
103 local_irq_disable(); 104 local_irq_disable();
105 /* Make sure to not schedule here because we could be running
106 on an exception stack. */
104 preempt_enable_no_resched(); 107 preempt_enable_no_resched();
105} 108}
106 109
@@ -384,6 +387,7 @@ void out_of_line_bug(void)
384 387
385static DEFINE_SPINLOCK(die_lock); 388static DEFINE_SPINLOCK(die_lock);
386static int die_owner = -1; 389static int die_owner = -1;
390static unsigned int die_nest_count;
387 391
388unsigned __kprobes long oops_begin(void) 392unsigned __kprobes long oops_begin(void)
389{ 393{
@@ -398,6 +402,7 @@ unsigned __kprobes long oops_begin(void)
398 else 402 else
399 spin_lock(&die_lock); 403 spin_lock(&die_lock);
400 } 404 }
405 die_nest_count++;
401 die_owner = cpu; 406 die_owner = cpu;
402 console_verbose(); 407 console_verbose();
403 bust_spinlocks(1); 408 bust_spinlocks(1);
@@ -408,7 +413,13 @@ void __kprobes oops_end(unsigned long flags)
408{ 413{
409 die_owner = -1; 414 die_owner = -1;
410 bust_spinlocks(0); 415 bust_spinlocks(0);
411 spin_unlock_irqrestore(&die_lock, flags); 416 die_nest_count--;
417 if (die_nest_count)
418 /* We still own the lock */
419 local_irq_restore(flags);
420 else
421 /* Nest count reaches zero, release the lock. */
422 spin_unlock_irqrestore(&die_lock, flags);
412 if (panic_on_oops) 423 if (panic_on_oops)
413 panic("Oops"); 424 panic("Oops");
414} 425}
@@ -433,6 +444,8 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
433 printk(KERN_ALERT "RIP "); 444 printk(KERN_ALERT "RIP ");
434 printk_address(regs->rip); 445 printk_address(regs->rip);
435 printk(" RSP <%016lx>\n", regs->rsp); 446 printk(" RSP <%016lx>\n", regs->rsp);
447 if (kexec_should_crash(current))
448 crash_kexec(regs);
436} 449}
437 450
438void die(const char * str, struct pt_regs * regs, long err) 451void die(const char * str, struct pt_regs * regs, long err)
@@ -455,10 +468,14 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs)
455 */ 468 */
456 printk(str, safe_smp_processor_id()); 469 printk(str, safe_smp_processor_id());
457 show_registers(regs); 470 show_registers(regs);
471 if (kexec_should_crash(current))
472 crash_kexec(regs);
458 if (panic_on_timeout || panic_on_oops) 473 if (panic_on_timeout || panic_on_oops)
459 panic("nmi watchdog"); 474 panic("nmi watchdog");
460 printk("console shuts up ...\n"); 475 printk("console shuts up ...\n");
461 oops_end(flags); 476 oops_end(flags);
477 nmi_exit();
478 local_irq_enable();
462 do_exit(SIGSEGV); 479 do_exit(SIGSEGV);
463} 480}
464 481
@@ -468,8 +485,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
468{ 485{
469 struct task_struct *tsk = current; 486 struct task_struct *tsk = current;
470 487
471 conditional_sti(regs);
472
473 tsk->thread.error_code = error_code; 488 tsk->thread.error_code = error_code;
474 tsk->thread.trap_no = trapnr; 489 tsk->thread.trap_no = trapnr;
475 490
@@ -506,6 +521,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
506 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 521 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
507 == NOTIFY_STOP) \ 522 == NOTIFY_STOP) \
508 return; \ 523 return; \
524 conditional_sti(regs); \
509 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 525 do_trap(trapnr, signr, str, regs, error_code, NULL); \
510} 526}
511 527
@@ -520,6 +536,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
520 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 536 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
521 == NOTIFY_STOP) \ 537 == NOTIFY_STOP) \
522 return; \ 538 return; \
539 conditional_sti(regs); \
523 do_trap(trapnr, signr, str, regs, error_code, &info); \ 540 do_trap(trapnr, signr, str, regs, error_code, &info); \
524} 541}
525 542
@@ -533,7 +550,17 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
533DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 550DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
534DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 551DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
535DO_ERROR(18, SIGSEGV, "reserved", reserved) 552DO_ERROR(18, SIGSEGV, "reserved", reserved)
536DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 553
554/* Runs on IST stack */
555asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
556{
557 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
558 12, SIGBUS) == NOTIFY_STOP)
559 return;
560 preempt_conditional_sti(regs);
561 do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
562 preempt_conditional_cli(regs);
563}
537 564
538asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) 565asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
539{ 566{
@@ -667,8 +694,9 @@ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
667 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { 694 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
668 return; 695 return;
669 } 696 }
697 preempt_conditional_sti(regs);
670 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 698 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
671 return; 699 preempt_conditional_cli(regs);
672} 700}
673 701
674/* Help handler running on IST stack to switch back to user stack 702/* Help handler running on IST stack to switch back to user stack
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index cc02573a3271..b2fac14baac0 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -188,11 +188,13 @@ void __init setup_node_zones(int nodeid)
188 memory. */ 188 memory. */
189 memmapsize = sizeof(struct page) * (end_pfn-start_pfn); 189 memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
190 limit = end_pfn << PAGE_SHIFT; 190 limit = end_pfn << PAGE_SHIFT;
191#ifdef CONFIG_FLAT_NODE_MEM_MAP
191 NODE_DATA(nodeid)->node_mem_map = 192 NODE_DATA(nodeid)->node_mem_map =
192 __alloc_bootmem_core(NODE_DATA(nodeid)->bdata, 193 __alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
193 memmapsize, SMP_CACHE_BYTES, 194 memmapsize, SMP_CACHE_BYTES,
194 round_down(limit - memmapsize, PAGE_SIZE), 195 round_down(limit - memmapsize, PAGE_SIZE),
195 limit); 196 limit);
197#endif
196 198
197 size_zones(zones, holes, start_pfn, end_pfn); 199 size_zones(zones, holes, start_pfn, end_pfn);
198 free_area_init_node(nodeid, NODE_DATA(nodeid), zones, 200 free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 15ae9fcd65a7..474df22c6ed2 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -34,7 +34,10 @@ static nodemask_t nodes_found __initdata;
34static struct bootnode nodes[MAX_NUMNODES] __initdata; 34static struct bootnode nodes[MAX_NUMNODES] __initdata;
35static struct bootnode nodes_add[MAX_NUMNODES] __initdata; 35static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
36static int found_add_area __initdata; 36static int found_add_area __initdata;
37int hotadd_percent __initdata = 10; 37int hotadd_percent __initdata = 0;
38#ifndef RESERVE_HOTADD
39#define hotadd_percent 0 /* Ignore all settings */
40#endif
38static u8 pxm2node[256] = { [0 ... 255] = 0xff }; 41static u8 pxm2node[256] = { [0 ... 255] = 0xff };
39 42
40/* Too small nodes confuse the VM badly. Usually they result 43/* Too small nodes confuse the VM badly. Usually they result
@@ -103,6 +106,7 @@ static __init void bad_srat(void)
103 int i; 106 int i;
104 printk(KERN_ERR "SRAT: SRAT not used.\n"); 107 printk(KERN_ERR "SRAT: SRAT not used.\n");
105 acpi_numa = -1; 108 acpi_numa = -1;
109 found_add_area = 0;
106 for (i = 0; i < MAX_LOCAL_APIC; i++) 110 for (i = 0; i < MAX_LOCAL_APIC; i++)
107 apicid_to_node[i] = NUMA_NO_NODE; 111 apicid_to_node[i] = NUMA_NO_NODE;
108 for (i = 0; i < MAX_NUMNODES; i++) 112 for (i = 0; i < MAX_NUMNODES; i++)
@@ -154,7 +158,8 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
154 int pxm, node; 158 int pxm, node;
155 if (srat_disabled()) 159 if (srat_disabled())
156 return; 160 return;
157 if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) { bad_srat(); 161 if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) {
162 bad_srat();
158 return; 163 return;
159 } 164 }
160 if (pa->flags.enabled == 0) 165 if (pa->flags.enabled == 0)
@@ -191,15 +196,17 @@ static int hotadd_enough_memory(struct bootnode *nd)
191 allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE; 196 allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE;
192 allowed = (allowed / 100) * hotadd_percent; 197 allowed = (allowed / 100) * hotadd_percent;
193 if (allocated + mem > allowed) { 198 if (allocated + mem > allowed) {
199 unsigned long range;
194 /* Give them at least part of their hotadd memory upto hotadd_percent 200 /* Give them at least part of their hotadd memory upto hotadd_percent
195 It would be better to spread the limit out 201 It would be better to spread the limit out
196 over multiple hotplug areas, but that is too complicated 202 over multiple hotplug areas, but that is too complicated
197 right now */ 203 right now */
198 if (allocated >= allowed) 204 if (allocated >= allowed)
199 return 0; 205 return 0;
200 pages = (allowed - allocated + mem) / sizeof(struct page); 206 range = allowed - allocated;
207 pages = (range / PAGE_SIZE);
201 mem = pages * sizeof(struct page); 208 mem = pages * sizeof(struct page);
202 nd->end = nd->start + pages*PAGE_SIZE; 209 nd->end = nd->start + range;
203 } 210 }
204 /* Not completely fool proof, but a good sanity check */ 211 /* Not completely fool proof, but a good sanity check */
205 addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem); 212 addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
@@ -392,8 +399,10 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
392 /* First clean up the node list */ 399 /* First clean up the node list */
393 for (i = 0; i < MAX_NUMNODES; i++) { 400 for (i = 0; i < MAX_NUMNODES; i++) {
394 cutoff_node(i, start, end); 401 cutoff_node(i, start, end);
395 if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) 402 if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
396 unparse_node(i); 403 unparse_node(i);
404 node_set_offline(i);
405 }
397 } 406 }
398 407
399 if (acpi_numa <= 0) 408 if (acpi_numa <= 0)