aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2006-01-17 22:49:59 -0500
committerSteve French <sfrench@us.ibm.com>2006-01-17 22:49:59 -0500
commitd65177c1ae7f085723154105c5dc8d9e16ae8265 (patch)
tree14408129d880d89cc5e937f2810f243ed1e6fcde /arch/x86_64
parentd41f084a74de860fe879403fbbad13abdf7aea8e (diff)
parent15578eeb6cd4b74492f26e60624aa1a9a52ddd7b (diff)
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig9
-rw-r--r--arch/x86_64/defconfig21
-rw-r--r--arch/x86_64/ia32/Makefile3
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c17
-rw-r--r--arch/x86_64/ia32/mmap32.c78
-rw-r--r--arch/x86_64/kernel/apic.c5
-rw-r--r--arch/x86_64/kernel/asm-offsets.c5
-rw-r--r--arch/x86_64/kernel/entry.S12
-rw-r--r--arch/x86_64/kernel/head.S108
-rw-r--r--arch/x86_64/kernel/setup64.c2
-rw-r--r--arch/x86_64/mm/Makefile2
-rw-r--r--arch/x86_64/mm/init.c180
-rw-r--r--arch/x86_64/mm/mmap.c30
13 files changed, 315 insertions, 157 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 2efc4be22709..2f9deca31cc9 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -305,7 +305,11 @@ config ARCH_DISCONTIGMEM_DEFAULT
305 305
306config ARCH_SPARSEMEM_ENABLE 306config ARCH_SPARSEMEM_ENABLE
307 def_bool y 307 def_bool y
308 depends on NUMA 308 depends on (NUMA || EXPERIMENTAL)
309
310config ARCH_MEMORY_PROBE
311 def_bool y
312 depends on MEMORY_HOTPLUG
309 313
310config ARCH_FLATMEM_ENABLE 314config ARCH_FLATMEM_ENABLE
311 def_bool y 315 def_bool y
@@ -315,6 +319,7 @@ source "mm/Kconfig"
315 319
316config HAVE_ARCH_EARLY_PFN_TO_NID 320config HAVE_ARCH_EARLY_PFN_TO_NID
317 def_bool y 321 def_bool y
322 depends on NUMA
318 323
319config NR_CPUS 324config NR_CPUS
320 int "Maximum number of CPUs (2-256)" 325 int "Maximum number of CPUs (2-256)"
@@ -350,7 +355,7 @@ config HPET_TIMER
350 <http://www.intel.com/hardwaredesign/hpetspec.htm>. 355 <http://www.intel.com/hardwaredesign/hpetspec.htm>.
351 356
352config X86_PM_TIMER 357config X86_PM_TIMER
353 bool "PM timer" 358 bool "PM timer" if EMBEDDED
354 depends on ACPI 359 depends on ACPI
355 default y 360 default y
356 help 361 help
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 054dcd8a5e9d..5231fe83ea4b 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.15-git7 3# Linux kernel version: 2.6.15-git12
4# Wed Jan 11 11:57:36 2006 4# Mon Jan 16 13:09:08 2006
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -319,6 +319,11 @@ CONFIG_IPV6=y
319# CONFIG_ATALK is not set 319# CONFIG_ATALK is not set
320# CONFIG_X25 is not set 320# CONFIG_X25 is not set
321# CONFIG_LAPB is not set 321# CONFIG_LAPB is not set
322
323#
324# TIPC Configuration (EXPERIMENTAL)
325#
326# CONFIG_TIPC is not set
322# CONFIG_NET_DIVERT is not set 327# CONFIG_NET_DIVERT is not set
323# CONFIG_ECONET is not set 328# CONFIG_ECONET is not set
324# CONFIG_WAN_ROUTER is not set 329# CONFIG_WAN_ROUTER is not set
@@ -537,8 +542,7 @@ CONFIG_SCSI_SATA_INTEL_COMBINED=y
537# CONFIG_SCSI_IPR is not set 542# CONFIG_SCSI_IPR is not set
538# CONFIG_SCSI_QLOGIC_FC is not set 543# CONFIG_SCSI_QLOGIC_FC is not set
539# CONFIG_SCSI_QLOGIC_1280 is not set 544# CONFIG_SCSI_QLOGIC_1280 is not set
540CONFIG_SCSI_QLA2XXX=y 545# CONFIG_SCSI_QLA_FC is not set
541# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set
542# CONFIG_SCSI_LPFC is not set 546# CONFIG_SCSI_LPFC is not set
543# CONFIG_SCSI_DC395x is not set 547# CONFIG_SCSI_DC395x is not set
544# CONFIG_SCSI_DC390T is not set 548# CONFIG_SCSI_DC390T is not set
@@ -805,6 +809,7 @@ CONFIG_SOFT_WATCHDOG=y
805# CONFIG_W83877F_WDT is not set 809# CONFIG_W83877F_WDT is not set
806# CONFIG_W83977F_WDT is not set 810# CONFIG_W83977F_WDT is not set
807# CONFIG_MACHZ_WDT is not set 811# CONFIG_MACHZ_WDT is not set
812# CONFIG_SBC_EPX_C3_WATCHDOG is not set
808 813
809# 814#
810# PCI-based Watchdog Cards 815# PCI-based Watchdog Cards
@@ -850,6 +855,12 @@ CONFIG_HPET_MMAP=y
850# CONFIG_I2C is not set 855# CONFIG_I2C is not set
851 856
852# 857#
858# SPI support
859#
860# CONFIG_SPI is not set
861# CONFIG_SPI_MASTER is not set
862
863#
853# Dallas's 1-wire bus 864# Dallas's 1-wire bus
854# 865#
855# CONFIG_W1 is not set 866# CONFIG_W1 is not set
@@ -992,6 +1003,7 @@ CONFIG_USB_STORAGE=y
992# 1003#
993CONFIG_USB_HID=y 1004CONFIG_USB_HID=y
994CONFIG_USB_HIDINPUT=y 1005CONFIG_USB_HIDINPUT=y
1006# CONFIG_USB_HIDINPUT_POWERBOOK is not set
995# CONFIG_HID_FF is not set 1007# CONFIG_HID_FF is not set
996# CONFIG_USB_HIDDEV is not set 1008# CONFIG_USB_HIDDEV is not set
997# CONFIG_USB_AIPTEK is not set 1009# CONFIG_USB_AIPTEK is not set
@@ -1276,6 +1288,7 @@ CONFIG_DETECT_SOFTLOCKUP=y
1276CONFIG_DEBUG_FS=y 1288CONFIG_DEBUG_FS=y
1277# CONFIG_DEBUG_VM is not set 1289# CONFIG_DEBUG_VM is not set
1278# CONFIG_FRAME_POINTER is not set 1290# CONFIG_FRAME_POINTER is not set
1291# CONFIG_FORCED_INLINING is not set
1279# CONFIG_RCU_TORTURE_TEST is not set 1292# CONFIG_RCU_TORTURE_TEST is not set
1280CONFIG_INIT_DEBUG=y 1293CONFIG_INIT_DEBUG=y
1281# CONFIG_DEBUG_RODATA is not set 1294# CONFIG_DEBUG_RODATA is not set
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile
index 051608d55920..929e6b0771f8 100644
--- a/arch/x86_64/ia32/Makefile
+++ b/arch/x86_64/ia32/Makefile
@@ -3,7 +3,8 @@
3# 3#
4 4
5obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \ 5obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \
6 ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o 6 ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \
7 mmap32.o
7 8
8sysv-$(CONFIG_SYSVIPC) := ipc32.o 9sysv-$(CONFIG_SYSVIPC) := ipc32.o
9obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) 10obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 029bddab0459..572b3b28772d 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -293,8 +293,6 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int
293} while(0) 293} while(0)
294 294
295 295
296#define elf_map elf32_map
297
298#include <linux/module.h> 296#include <linux/module.h>
299 297
300MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); 298MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries.");
@@ -390,21 +388,6 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
390} 388}
391EXPORT_SYMBOL(ia32_setup_arg_pages); 389EXPORT_SYMBOL(ia32_setup_arg_pages);
392 390
393static unsigned long
394elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
395{
396 unsigned long map_addr;
397 struct task_struct *me = current;
398
399 down_write(&me->mm->mmap_sem);
400 map_addr = do_mmap(filep, ELF_PAGESTART(addr),
401 eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot,
402 type,
403 eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
404 up_write(&me->mm->mmap_sem);
405 return(map_addr);
406}
407
408#ifdef CONFIG_SYSCTL 391#ifdef CONFIG_SYSCTL
409/* Register vsyscall32 into the ABI table */ 392/* Register vsyscall32 into the ABI table */
410#include <linux/sysctl.h> 393#include <linux/sysctl.h>
diff --git a/arch/x86_64/ia32/mmap32.c b/arch/x86_64/ia32/mmap32.c
new file mode 100644
index 000000000000..079f4132575c
--- /dev/null
+++ b/arch/x86_64/ia32/mmap32.c
@@ -0,0 +1,78 @@
1/*
2 * linux/arch/x86_64/ia32/mm/mmap.c
3 *
4 * flexible mmap layout support
5 *
6 * Based on the i386 version which was
7 *
8 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
9 * All Rights Reserved.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 *
25 *
26 * Started by Ingo Molnar <mingo@elte.hu>
27 */
28
29#include <linux/personality.h>
30#include <linux/mm.h>
31#include <linux/random.h>
32
33/*
34 * Top of mmap area (just below the process stack).
35 *
36 * Leave an at least ~128 MB hole.
37 */
38#define MIN_GAP (128*1024*1024)
39#define MAX_GAP (TASK_SIZE/6*5)
40
41static inline unsigned long mmap_base(struct mm_struct *mm)
42{
43 unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
44 unsigned long random_factor = 0;
45
46 if (current->flags & PF_RANDOMIZE)
47 random_factor = get_random_int() % (1024*1024);
48
49 if (gap < MIN_GAP)
50 gap = MIN_GAP;
51 else if (gap > MAX_GAP)
52 gap = MAX_GAP;
53
54 return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
55}
56
57/*
58 * This function, called very early during the creation of a new
59 * process VM image, sets up which VM layout function to use:
60 */
61void ia32_pick_mmap_layout(struct mm_struct *mm)
62{
63 /*
64 * Fall back to the standard layout if the personality
65 * bit is set, or if the expected stack growth is unlimited:
66 */
67 if (sysctl_legacy_va_layout ||
68 (current->personality & ADDR_COMPAT_LAYOUT) ||
69 current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
70 mm->mmap_base = TASK_UNMAPPED_BASE;
71 mm->get_unmapped_area = arch_get_unmapped_area;
72 mm->unmap_area = arch_unmap_area;
73 } else {
74 mm->mmap_base = mmap_base(mm);
75 mm->get_unmapped_area = arch_get_unmapped_area_topdown;
76 mm->unmap_area = arch_unmap_area_topdown;
77 }
78}
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 8fdd089fd17e..5d3c5b07b8db 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -499,13 +499,10 @@ static int lapic_resume(struct sys_device *dev)
499 if (!apic_pm_state.active) 499 if (!apic_pm_state.active)
500 return 0; 500 return 0;
501 501
502 /* XXX: Pavel needs this for S3 resume, but can't explain why */
503 set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
504
505 local_irq_save(flags); 502 local_irq_save(flags);
506 rdmsr(MSR_IA32_APICBASE, l, h); 503 rdmsr(MSR_IA32_APICBASE, l, h);
507 l &= ~MSR_IA32_APICBASE_BASE; 504 l &= ~MSR_IA32_APICBASE_BASE;
508 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; 505 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
509 wrmsr(MSR_IA32_APICBASE, l, h); 506 wrmsr(MSR_IA32_APICBASE, l, h);
510 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 507 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
511 apic_write(APIC_ID, apic_pm_state.apic_id); 508 apic_write(APIC_ID, apic_pm_state.apic_id);
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index cfb4f9cebea4..38834bbbae11 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -43,6 +43,7 @@ int main(void)
43 ENTRY(irqcount); 43 ENTRY(irqcount);
44 ENTRY(cpunumber); 44 ENTRY(cpunumber);
45 ENTRY(irqstackptr); 45 ENTRY(irqstackptr);
46 ENTRY(data_offset);
46 BLANK(); 47 BLANK();
47#undef ENTRY 48#undef ENTRY
48#ifdef CONFIG_IA32_EMULATION 49#ifdef CONFIG_IA32_EMULATION
@@ -66,8 +67,6 @@ int main(void)
66 DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); 67 DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
67 DEFINE(pbe_next, offsetof(struct pbe, next)); 68 DEFINE(pbe_next, offsetof(struct pbe, next));
68 BLANK(); 69 BLANK();
69#if DEBUG_STKSZ > EXCEPTION_STKSZ 70 DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
70 DEFINE(DEBUG_IST, DEBUG_STACK);
71#endif
72 return 0; 71 return 0;
73} 72}
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 632fc0f59fcc..dbdba56e8faa 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -41,6 +41,7 @@
41#include <asm/unistd.h> 41#include <asm/unistd.h>
42#include <asm/thread_info.h> 42#include <asm/thread_info.h>
43#include <asm/hw_irq.h> 43#include <asm/hw_irq.h>
44#include <asm/page.h>
44 45
45 .code64 46 .code64
46 47
@@ -674,9 +675,6 @@ ENTRY(spurious_interrupt)
674 675
675 /* error code is on the stack already */ 676 /* error code is on the stack already */
676 /* handle NMI like exceptions that can happen everywhere */ 677 /* handle NMI like exceptions that can happen everywhere */
677#ifndef DEBUG_IST
678# define DEBUG_IST 0
679#endif
680 .macro paranoidentry sym, ist=0 678 .macro paranoidentry sym, ist=0
681 SAVE_ALL 679 SAVE_ALL
682 cld 680 cld
@@ -695,11 +693,11 @@ ENTRY(spurious_interrupt)
695 movq ORIG_RAX(%rsp),%rsi 693 movq ORIG_RAX(%rsp),%rsi
696 movq $-1,ORIG_RAX(%rsp) 694 movq $-1,ORIG_RAX(%rsp)
697 .if \ist 695 .if \ist
698 subq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 696 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
699 .endif 697 .endif
700 call \sym 698 call \sym
701 .if \ist 699 .if \ist
702 addq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 700 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
703 .endif 701 .endif
704 cli 702 cli
705 .endm 703 .endm
@@ -918,7 +916,7 @@ KPROBE_ENTRY(debug)
918 INTR_FRAME 916 INTR_FRAME
919 pushq $0 917 pushq $0
920 CFI_ADJUST_CFA_OFFSET 8 918 CFI_ADJUST_CFA_OFFSET 8
921 paranoidentry do_debug, DEBUG_IST 919 paranoidentry do_debug, DEBUG_STACK
922 jmp paranoid_exit 920 jmp paranoid_exit
923 CFI_ENDPROC 921 CFI_ENDPROC
924 .previous .text 922 .previous .text
@@ -976,7 +974,7 @@ KPROBE_ENTRY(int3)
976 INTR_FRAME 974 INTR_FRAME
977 pushq $0 975 pushq $0
978 CFI_ADJUST_CFA_OFFSET 8 976 CFI_ADJUST_CFA_OFFSET 8
979 paranoidentry do_int3, DEBUG_IST 977 paranoidentry do_int3, DEBUG_STACK
980 jmp paranoid_exit 978 jmp paranoid_exit
981 CFI_ENDPROC 979 CFI_ENDPROC
982 .previous .text 980 .previous .text
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 38fc3d5112e7..692c737feddb 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -241,104 +241,70 @@ ljumpvector:
241ENTRY(stext) 241ENTRY(stext)
242ENTRY(_stext) 242ENTRY(_stext)
243 243
244.org 0x1000 244 $page = 0
245ENTRY(init_level4_pgt) 245#define NEXT_PAGE(name) \
246 $page = $page + 1; \
247 .org $page * 0x1000; \
248 phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \
249ENTRY(name)
250
251NEXT_PAGE(init_level4_pgt)
246 /* This gets initialized in x86_64_start_kernel */ 252 /* This gets initialized in x86_64_start_kernel */
247 .fill 512,8,0 253 .fill 512,8,0
248 254
249.org 0x2000 255NEXT_PAGE(level3_ident_pgt)
250ENTRY(level3_ident_pgt) 256 .quad phys_level2_ident_pgt | 0x007
251 .quad 0x0000000000004007 + __PHYSICAL_START
252 .fill 511,8,0 257 .fill 511,8,0
253 258
254.org 0x3000 259NEXT_PAGE(level3_kernel_pgt)
255ENTRY(level3_kernel_pgt)
256 .fill 510,8,0 260 .fill 510,8,0
257 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 261 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
258 .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt */ 262 .quad phys_level2_kernel_pgt | 0x007
259 .fill 1,8,0 263 .fill 1,8,0
260 264
261.org 0x4000 265NEXT_PAGE(level2_ident_pgt)
262ENTRY(level2_ident_pgt)
263 /* 40MB for bootup. */ 266 /* 40MB for bootup. */
264 .quad 0x0000000000000083 267 i = 0
265 .quad 0x0000000000200083 268 .rept 20
266 .quad 0x0000000000400083 269 .quad i << 21 | 0x083
267 .quad 0x0000000000600083 270 i = i + 1
268 .quad 0x0000000000800083 271 .endr
269 .quad 0x0000000000A00083
270 .quad 0x0000000000C00083
271 .quad 0x0000000000E00083
272 .quad 0x0000000001000083
273 .quad 0x0000000001200083
274 .quad 0x0000000001400083
275 .quad 0x0000000001600083
276 .quad 0x0000000001800083
277 .quad 0x0000000001A00083
278 .quad 0x0000000001C00083
279 .quad 0x0000000001E00083
280 .quad 0x0000000002000083
281 .quad 0x0000000002200083
282 .quad 0x0000000002400083
283 .quad 0x0000000002600083
284 /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ 272 /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */
285 .globl temp_boot_pmds 273 .globl temp_boot_pmds
286temp_boot_pmds: 274temp_boot_pmds:
287 .fill 492,8,0 275 .fill 492,8,0
288 276
289.org 0x5000 277NEXT_PAGE(level2_kernel_pgt)
290ENTRY(level2_kernel_pgt)
291 /* 40MB kernel mapping. The kernel code cannot be bigger than that. 278 /* 40MB kernel mapping. The kernel code cannot be bigger than that.
292 When you change this change KERNEL_TEXT_SIZE in page.h too. */ 279 When you change this change KERNEL_TEXT_SIZE in page.h too. */
293 /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ 280 /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
294 .quad 0x0000000000000183 281 i = 0
295 .quad 0x0000000000200183 282 .rept 20
296 .quad 0x0000000000400183 283 .quad i << 21 | 0x183
297 .quad 0x0000000000600183 284 i = i + 1
298 .quad 0x0000000000800183 285 .endr
299 .quad 0x0000000000A00183
300 .quad 0x0000000000C00183
301 .quad 0x0000000000E00183
302 .quad 0x0000000001000183
303 .quad 0x0000000001200183
304 .quad 0x0000000001400183
305 .quad 0x0000000001600183
306 .quad 0x0000000001800183
307 .quad 0x0000000001A00183
308 .quad 0x0000000001C00183
309 .quad 0x0000000001E00183
310 .quad 0x0000000002000183
311 .quad 0x0000000002200183
312 .quad 0x0000000002400183
313 .quad 0x0000000002600183
314 /* Module mapping starts here */ 286 /* Module mapping starts here */
315 .fill 492,8,0 287 .fill 492,8,0
316 288
317.org 0x6000 289NEXT_PAGE(empty_zero_page)
318ENTRY(empty_zero_page)
319
320.org 0x7000
321ENTRY(empty_bad_page)
322 290
323.org 0x8000 291NEXT_PAGE(level3_physmem_pgt)
324ENTRY(empty_bad_pte_table) 292 .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */
293 .fill 511,8,0
325 294
326.org 0x9000 295#undef NEXT_PAGE
327ENTRY(empty_bad_pmd_table)
328 296
329.org 0xa000 297 .data
330ENTRY(level3_physmem_pgt)
331 .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */
332 298
333 .org 0xb000
334#ifdef CONFIG_ACPI_SLEEP 299#ifdef CONFIG_ACPI_SLEEP
300 .align PAGE_SIZE
335ENTRY(wakeup_level4_pgt) 301ENTRY(wakeup_level4_pgt)
336 .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ 302 .quad phys_level3_ident_pgt | 0x007
337 .fill 255,8,0 303 .fill 255,8,0
338 .quad 0x000000000000a007 + __PHYSICAL_START 304 .quad phys_level3_physmem_pgt | 0x007
339 .fill 254,8,0 305 .fill 254,8,0
340 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 306 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
341 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 307 .quad phys_level3_kernel_pgt | 0x007
342#endif 308#endif
343 309
344#ifndef CONFIG_HOTPLUG_CPU 310#ifndef CONFIG_HOTPLUG_CPU
@@ -352,12 +318,12 @@ ENTRY(wakeup_level4_pgt)
352 */ 318 */
353 .align PAGE_SIZE 319 .align PAGE_SIZE
354ENTRY(boot_level4_pgt) 320ENTRY(boot_level4_pgt)
355 .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ 321 .quad phys_level3_ident_pgt | 0x007
356 .fill 255,8,0 322 .fill 255,8,0
357 .quad 0x000000000000a007 + __PHYSICAL_START 323 .quad phys_level3_physmem_pgt | 0x007
358 .fill 254,8,0 324 .fill 254,8,0
359 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 325 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
360 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 326 .quad phys_level3_kernel_pgt | 0x007
361 327
362 .data 328 .data
363 329
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 6eff51e9400c..8ac4db09610a 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -38,7 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); 38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
39 39
40unsigned long __supported_pte_mask __read_mostly = ~0UL; 40unsigned long __supported_pte_mask __read_mostly = ~0UL;
41static int do_not_nx __initdata = 0; 41static int do_not_nx __cpuinitdata = 0;
42 42
43/* noexec=on|off 43/* noexec=on|off
44Control non executable mappings for 64bit processes. 44Control non executable mappings for 64bit processes.
diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile
index 1d232a87f113..d25ac86fe27a 100644
--- a/arch/x86_64/mm/Makefile
+++ b/arch/x86_64/mm/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the linux x86_64-specific parts of the memory manager. 2# Makefile for the linux x86_64-specific parts of the memory manager.
3# 3#
4 4
5obj-y := init.o fault.o ioremap.o extable.o pageattr.o 5obj-y := init.o fault.o ioremap.o extable.o pageattr.o mmap.o
6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
7obj-$(CONFIG_NUMA) += numa.o 7obj-$(CONFIG_NUMA) += numa.o
8obj-$(CONFIG_K8_NUMA) += k8topology.o 8obj-$(CONFIG_K8_NUMA) += k8topology.o
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index eca60125efc3..7af1742aa958 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -24,6 +24,8 @@
24#include <linux/proc_fs.h> 24#include <linux/proc_fs.h>
25#include <linux/pci.h> 25#include <linux/pci.h>
26#include <linux/dma-mapping.h> 26#include <linux/dma-mapping.h>
27#include <linux/module.h>
28#include <linux/memory_hotplug.h>
27 29
28#include <asm/processor.h> 30#include <asm/processor.h>
29#include <asm/system.h> 31#include <asm/system.h>
@@ -180,13 +182,19 @@ static struct temp_map {
180 {} 182 {}
181}; 183};
182 184
183static __init void *alloc_low_page(int *index, unsigned long *phys) 185static __meminit void *alloc_low_page(int *index, unsigned long *phys)
184{ 186{
185 struct temp_map *ti; 187 struct temp_map *ti;
186 int i; 188 int i;
187 unsigned long pfn = table_end++, paddr; 189 unsigned long pfn = table_end++, paddr;
188 void *adr; 190 void *adr;
189 191
192 if (after_bootmem) {
193 adr = (void *)get_zeroed_page(GFP_ATOMIC);
194 *phys = __pa(adr);
195 return adr;
196 }
197
190 if (pfn >= end_pfn) 198 if (pfn >= end_pfn)
191 panic("alloc_low_page: ran out of memory"); 199 panic("alloc_low_page: ran out of memory");
192 for (i = 0; temp_mappings[i].allocated; i++) { 200 for (i = 0; temp_mappings[i].allocated; i++) {
@@ -199,55 +207,86 @@ static __init void *alloc_low_page(int *index, unsigned long *phys)
199 ti->allocated = 1; 207 ti->allocated = 1;
200 __flush_tlb(); 208 __flush_tlb();
201 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); 209 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
210 memset(adr, 0, PAGE_SIZE);
202 *index = i; 211 *index = i;
203 *phys = pfn * PAGE_SIZE; 212 *phys = pfn * PAGE_SIZE;
204 return adr; 213 return adr;
205} 214}
206 215
207static __init void unmap_low_page(int i) 216static __meminit void unmap_low_page(int i)
208{ 217{
209 struct temp_map *ti = &temp_mappings[i]; 218 struct temp_map *ti;
219
220 if (after_bootmem)
221 return;
222
223 ti = &temp_mappings[i];
210 set_pmd(ti->pmd, __pmd(0)); 224 set_pmd(ti->pmd, __pmd(0));
211 ti->allocated = 0; 225 ti->allocated = 0;
212} 226}
213 227
214static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) 228static void __meminit
229phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
230{
231 int i;
232
233 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
234 unsigned long entry;
235
236 if (address > end) {
237 for (; i < PTRS_PER_PMD; i++, pmd++)
238 set_pmd(pmd, __pmd(0));
239 break;
240 }
241 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
242 entry &= __supported_pte_mask;
243 set_pmd(pmd, __pmd(entry));
244 }
245}
246
247static void __meminit
248phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
249{
250 pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
251
252 if (pmd_none(*pmd)) {
253 spin_lock(&init_mm.page_table_lock);
254 phys_pmd_init(pmd, address, end);
255 spin_unlock(&init_mm.page_table_lock);
256 __flush_tlb_all();
257 }
258}
259
260static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
215{ 261{
216 long i, j; 262 long i = pud_index(address);
217 263
218 i = pud_index(address);
219 pud = pud + i; 264 pud = pud + i;
265
266 if (after_bootmem && pud_val(*pud)) {
267 phys_pmd_update(pud, address, end);
268 return;
269 }
270
220 for (; i < PTRS_PER_PUD; pud++, i++) { 271 for (; i < PTRS_PER_PUD; pud++, i++) {
221 int map; 272 int map;
222 unsigned long paddr, pmd_phys; 273 unsigned long paddr, pmd_phys;
223 pmd_t *pmd; 274 pmd_t *pmd;
224 275
225 paddr = address + i*PUD_SIZE; 276 paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
226 if (paddr >= end) { 277 if (paddr >= end)
227 for (; i < PTRS_PER_PUD; i++, pud++)
228 set_pud(pud, __pud(0));
229 break; 278 break;
230 }
231 279
232 if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) { 280 if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) {
233 set_pud(pud, __pud(0)); 281 set_pud(pud, __pud(0));
234 continue; 282 continue;
235 } 283 }
236 284
237 pmd = alloc_low_page(&map, &pmd_phys); 285 pmd = alloc_low_page(&map, &pmd_phys);
286 spin_lock(&init_mm.page_table_lock);
238 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 287 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
239 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { 288 phys_pmd_init(pmd, paddr, end);
240 unsigned long pe; 289 spin_unlock(&init_mm.page_table_lock);
241
242 if (paddr >= end) {
243 for (; j < PTRS_PER_PMD; j++, pmd++)
244 set_pmd(pmd, __pmd(0));
245 break;
246 }
247 pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
248 pe &= __supported_pte_mask;
249 set_pmd(pmd, __pmd(pe));
250 }
251 unmap_low_page(map); 290 unmap_low_page(map);
252 } 291 }
253 __flush_tlb(); 292 __flush_tlb();
@@ -262,30 +301,25 @@ static void __init find_early_table_space(unsigned long end)
262 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + 301 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
263 round_up(pmds * sizeof(pmd_t), PAGE_SIZE); 302 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
264 303
265 /* Put page tables beyond the DMA zones if possible. 304 /* RED-PEN putting page tables only on node 0 could
266 RED-PEN might be better to spread them out more over 305 cause a hotspot and fill up ZONE_DMA. The page tables
267 memory to avoid hotspots */ 306 need roughly 0.5KB per GB. */
268 if (end > MAX_DMA32_PFN<<PAGE_SHIFT) 307 start = 0x8000;
269 start = MAX_DMA32_PFN << PAGE_SHIFT; 308 table_start = find_e820_area(start, end, tables);
270 else if (end > MAX_DMA_PFN << PAGE_SHIFT)
271 start = MAX_DMA_PFN << PAGE_SHIFT;
272 else
273 start = 0x8000;
274
275 table_start = find_e820_area(start, end, tables);
276 if (table_start == -1)
277 table_start = find_e820_area(0x8000, end, tables);
278 if (table_start == -1UL) 309 if (table_start == -1UL)
279 panic("Cannot find space for the kernel page tables"); 310 panic("Cannot find space for the kernel page tables");
280 311
281 table_start >>= PAGE_SHIFT; 312 table_start >>= PAGE_SHIFT;
282 table_end = table_start; 313 table_end = table_start;
314
315 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
316 end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
283} 317}
284 318
285/* Setup the direct mapping of the physical memory at PAGE_OFFSET. 319/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
286 This runs before bootmem is initialized and gets pages directly from the 320 This runs before bootmem is initialized and gets pages directly from the
287 physical memory. To access them they are temporarily mapped. */ 321 physical memory. To access them they are temporarily mapped. */
288void __init init_memory_mapping(unsigned long start, unsigned long end) 322void __meminit init_memory_mapping(unsigned long start, unsigned long end)
289{ 323{
290 unsigned long next; 324 unsigned long next;
291 325
@@ -297,7 +331,8 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
297 * mapped. Unfortunately this is done currently before the nodes are 331 * mapped. Unfortunately this is done currently before the nodes are
298 * discovered. 332 * discovered.
299 */ 333 */
300 find_early_table_space(end); 334 if (!after_bootmem)
335 find_early_table_space(end);
301 336
302 start = (unsigned long)__va(start); 337 start = (unsigned long)__va(start);
303 end = (unsigned long)__va(end); 338 end = (unsigned long)__va(end);
@@ -305,20 +340,26 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
305 for (; start < end; start = next) { 340 for (; start < end; start = next) {
306 int map; 341 int map;
307 unsigned long pud_phys; 342 unsigned long pud_phys;
308 pud_t *pud = alloc_low_page(&map, &pud_phys); 343 pgd_t *pgd = pgd_offset_k(start);
344 pud_t *pud;
345
346 if (after_bootmem)
347 pud = pud_offset_k(pgd, __PAGE_OFFSET);
348 else
349 pud = alloc_low_page(&map, &pud_phys);
350
309 next = start + PGDIR_SIZE; 351 next = start + PGDIR_SIZE;
310 if (next > end) 352 if (next > end)
311 next = end; 353 next = end;
312 phys_pud_init(pud, __pa(start), __pa(next)); 354 phys_pud_init(pud, __pa(start), __pa(next));
313 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); 355 if (!after_bootmem)
356 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
314 unmap_low_page(map); 357 unmap_low_page(map);
315 } 358 }
316 359
317 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); 360 if (!after_bootmem)
361 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
318 __flush_tlb_all(); 362 __flush_tlb_all();
319 early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
320 table_start<<PAGE_SHIFT,
321 table_end<<PAGE_SHIFT);
322} 363}
323 364
324void __cpuinit zap_low_mappings(int cpu) 365void __cpuinit zap_low_mappings(int cpu)
@@ -393,6 +434,9 @@ size_zones(unsigned long *z, unsigned long *h,
393void __init paging_init(void) 434void __init paging_init(void)
394{ 435{
395 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; 436 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
437
438 memory_present(0, 0, end_pfn);
439 sparse_init();
396 size_zones(zones, holes, 0, end_pfn); 440 size_zones(zones, holes, 0, end_pfn);
397 free_area_init_node(0, NODE_DATA(0), zones, 441 free_area_init_node(0, NODE_DATA(0), zones,
398 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); 442 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
@@ -433,6 +477,50 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
433 __flush_tlb_all(); 477 __flush_tlb_all();
434} 478}
435 479
480/*
481 * Memory hotplug specific functions
482 * These are only for non-NUMA machines right now.
483 */
484#ifdef CONFIG_MEMORY_HOTPLUG
485
486void online_page(struct page *page)
487{
488 ClearPageReserved(page);
489 set_page_count(page, 1);
490 __free_page(page);
491 totalram_pages++;
492 num_physpages++;
493}
494
495int add_memory(u64 start, u64 size)
496{
497 struct pglist_data *pgdat = NODE_DATA(0);
498 struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
499 unsigned long start_pfn = start >> PAGE_SHIFT;
500 unsigned long nr_pages = size >> PAGE_SHIFT;
501 int ret;
502
503 ret = __add_pages(zone, start_pfn, nr_pages);
504 if (ret)
505 goto error;
506
507 init_memory_mapping(start, (start + size -1));
508
509 return ret;
510error:
511 printk("%s: Problem encountered in __add_pages!\n", __func__);
512 return ret;
513}
514EXPORT_SYMBOL_GPL(add_memory);
515
516int remove_memory(u64 start, u64 size)
517{
518 return -EINVAL;
519}
520EXPORT_SYMBOL_GPL(remove_memory);
521
522#endif
523
436static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, 524static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
437 kcore_vsyscall; 525 kcore_vsyscall;
438 526
@@ -539,7 +627,7 @@ void mark_rodata_ro(void)
539#ifdef CONFIG_BLK_DEV_INITRD 627#ifdef CONFIG_BLK_DEV_INITRD
540void free_initrd_mem(unsigned long start, unsigned long end) 628void free_initrd_mem(unsigned long start, unsigned long end)
541{ 629{
542 if (start < (unsigned long)&_end) 630 if (start >= end)
543 return; 631 return;
544 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); 632 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
545 for (; start < end; start += PAGE_SIZE) { 633 for (; start < end; start += PAGE_SIZE) {
diff --git a/arch/x86_64/mm/mmap.c b/arch/x86_64/mm/mmap.c
new file mode 100644
index 000000000000..43e9b99bdf25
--- /dev/null
+++ b/arch/x86_64/mm/mmap.c
@@ -0,0 +1,30 @@
1/* Copyright 2005 Andi Kleen, SuSE Labs.
2 * Licensed under GPL, v.2
3 */
4#include <linux/config.h>
5#include <linux/mm.h>
6#include <linux/sched.h>
7#include <linux/random.h>
8#include <asm/ia32.h>
9
10/* Notebook: move the mmap code from sys_x86_64.c over here. */
11
12void arch_pick_mmap_layout(struct mm_struct *mm)
13{
14#ifdef CONFIG_IA32_EMULATION
15 if (current_thread_info()->flags & _TIF_IA32)
16 return ia32_pick_mmap_layout(mm);
17#endif
18 mm->mmap_base = TASK_UNMAPPED_BASE;
19 if (current->flags & PF_RANDOMIZE) {
20 /* Add 28bit randomness which is about 40bits of address space
21 because mmap base has to be page aligned.
22 or ~1/128 of the total user VM
23 (total user address space is 47bits) */
24 unsigned rnd = get_random_int() & 0xfffffff;
25 mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
26 }
27 mm->get_unmapped_area = arch_get_unmapped_area;
28 mm->unmap_area = arch_unmap_area;
29}
30