aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorJeff Dike <jdike@addtoit.com>2008-02-08 07:22:07 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-08 12:22:42 -0500
commit536788fe2d28e11db6aeda74207d95d750fb761f (patch)
tree73df2d3a46c542c71d3a84c20c8fd1ce617386a3 /arch
parent2f569afd9ced9ebec9a6eb3dbf6f83429be0a7b4 (diff)
uml: runtime host VMSPLIT detection
Calculate TASK_SIZE at run-time by figuring out the host's VMSPLIT - this is needed on i386 if UML is to run on hosts with varying VMSPLITs without recompilation. TASK_SIZE is now defined in terms of a variable, task_size. This gets rid of an include of pgtable.h from processor.h, which can cause include loops. On i386, task_size is calculated early in boot by probing the address space in a binary search to figure out where the boundary between usable and non-usable memory is. This tries to make sure that a page that is considered to be in userspace is, or can be made, read-write. I'm concerned about a system-global VDSO page in kernel memory being hit and considered to be a userspace page. On x86_64, task_size is just the old value of CONFIG_TOP_ADDR. A bunch of config variable are gone now. CONFIG_TOP_ADDR is directly replaced by TASK_SIZE. NEST_LEVEL is gone since the relocation of the stubs makes it irrelevant. All the HOST_VMSPLIT stuff is gone. All references to these in arch/um/Makefile are also gone. I noticed and fixed a missing extern in os.h when adding os_get_task_size. Note: This has been revised to fix the 32-bit UML on 64-bit host bug that Miklos ran into. Signed-off-by: Jeff Dike <jdike@linux.intel.com> Cc: Miklos Szeredi <miklos@szeredi.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/um/Kconfig11
-rw-r--r--arch/um/Kconfig.i38637
-rw-r--r--arch/um/Kconfig.x86_644
-rw-r--r--arch/um/Makefile11
-rw-r--r--arch/um/defconfig3
-rw-r--r--arch/um/include/as-layout.h2
-rw-r--r--arch/um/include/os.h5
-rw-r--r--arch/um/kernel/exec.c2
-rw-r--r--arch/um/kernel/um_arch.c16
-rw-r--r--arch/um/os-Linux/sys-i386/Makefile2
-rw-r--r--arch/um/os-Linux/sys-i386/task_size.c120
-rw-r--r--arch/um/os-Linux/sys-x86_64/Makefile2
-rw-r--r--arch/um/os-Linux/sys-x86_64/task_size.c5
13 files changed, 148 insertions, 72 deletions
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 99e51d059a02..dba8e05f0287 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -203,17 +203,6 @@ config NR_CPUS
203 depends on SMP 203 depends on SMP
204 default "32" 204 default "32"
205 205
206config NEST_LEVEL
207 int "Nesting level"
208 default "0"
209 help
210 This is set to the number of layers of UMLs that this UML will be run
211 in. Normally, this is zero, meaning that it will run directly on the
212 host. Setting it to one will build a UML that can run inside a UML
213 that is running on the host. Generally, if you intend this UML to run
214 inside another UML, set CONFIG_NEST_LEVEL to one more than the host
215 UML.
216
217config HIGHMEM 206config HIGHMEM
218 bool "Highmem support (EXPERIMENTAL)" 207 bool "Highmem support (EXPERIMENTAL)"
219 depends on !64BIT && EXPERIMENTAL 208 depends on !64BIT && EXPERIMENTAL
diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386
index e75264603d24..3cd8a04d66d8 100644
--- a/arch/um/Kconfig.i386
+++ b/arch/um/Kconfig.i386
@@ -23,43 +23,6 @@ config SEMAPHORE_SLEEPERS
23 bool 23 bool
24 default y 24 default y
25 25
26choice
27 prompt "Host memory split"
28 default HOST_VMSPLIT_3G
29 help
30 This is needed when the host kernel on which you run has a non-default
31 (like 2G/2G) memory split, instead of the customary 3G/1G. If you did
32 not recompile your own kernel but use the default distro's one, you can
33 safely accept the "Default split" option.
34
35 It can be enabled on recent (>=2.6.16-rc2) vanilla kernels via
36 CONFIG_VM_SPLIT_*, or on previous kernels with special patches (-ck
37 patchset by Con Kolivas, or other ones) - option names match closely the
38 host CONFIG_VM_SPLIT_* ones.
39
40 A lower setting (where 1G/3G is lowest and 3G/1G is higher) will
41 tolerate even more "normal" host kernels, but an higher setting will be
42 stricter.
43
44 So, if you do not know what to do here, say 'Default split'.
45
46config HOST_VMSPLIT_3G
47 bool "Default split (3G/1G user/kernel host split)"
48config HOST_VMSPLIT_3G_OPT
49 bool "3G/1G user/kernel host split (for full 1G low memory)"
50config HOST_VMSPLIT_2G
51 bool "2G/2G user/kernel host split"
52config HOST_VMSPLIT_1G
53 bool "1G/3G user/kernel host split"
54endchoice
55
56config TOP_ADDR
57 hex
58 default 0xB0000000 if HOST_VMSPLIT_3G_OPT
59 default 0x78000000 if HOST_VMSPLIT_2G
60 default 0x40000000 if HOST_VMSPLIT_1G
61 default 0xC0000000
62
63config 3_LEVEL_PGTABLES 26config 3_LEVEL_PGTABLES
64 bool "Three-level pagetables (EXPERIMENTAL)" 27 bool "Three-level pagetables (EXPERIMENTAL)"
65 default n 28 default n
diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64
index b438f0e14271..6533b349f061 100644
--- a/arch/um/Kconfig.x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -15,10 +15,6 @@ config SEMAPHORE_SLEEPERS
15 bool 15 bool
16 default y 16 default y
17 17
18config TOP_ADDR
19 hex
20 default 0x7fc0000000
21
22config 3_LEVEL_PGTABLES 18config 3_LEVEL_PGTABLES
23 bool 19 bool
24 default y 20 default y
diff --git a/arch/um/Makefile b/arch/um/Makefile
index cb4af9bf2074..dbeab15e7bb7 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -79,13 +79,6 @@ KERNEL_DEFINES = $(strip -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask \
79KBUILD_CFLAGS += $(KERNEL_DEFINES) 79KBUILD_CFLAGS += $(KERNEL_DEFINES)
80KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time,) 80KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
81 81
82# These are needed for clean and mrproper, since in that case .config is not
83# included; the values here are meaningless
84
85CONFIG_NEST_LEVEL ?= 0
86
87SIZE = ($(CONFIG_NEST_LEVEL) * 0x20000000)
88
89PHONY += linux 82PHONY += linux
90 83
91all: linux 84all: linux
@@ -120,10 +113,6 @@ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,)
120CONFIG_KERNEL_STACK_ORDER ?= 2 113CONFIG_KERNEL_STACK_ORDER ?= 2
121STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] ) 114STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
122 115
123ifndef START
124 START = $(shell echo $$[ $(TOP_ADDR) - $(SIZE) ] )
125endif
126
127CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \ 116CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
128 -DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE) 117 -DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE)
129 118
diff --git a/arch/um/defconfig b/arch/um/defconfig
index 86db2862f222..59215bc264ef 100644
--- a/arch/um/defconfig
+++ b/arch/um/defconfig
@@ -56,8 +56,6 @@ CONFIG_X86_TSC=y
56CONFIG_UML_X86=y 56CONFIG_UML_X86=y
57# CONFIG_64BIT is not set 57# CONFIG_64BIT is not set
58CONFIG_SEMAPHORE_SLEEPERS=y 58CONFIG_SEMAPHORE_SLEEPERS=y
59# CONFIG_HOST_2G_2G is not set
60CONFIG_TOP_ADDR=0xc0000000
61# CONFIG_3_LEVEL_PGTABLES is not set 59# CONFIG_3_LEVEL_PGTABLES is not set
62CONFIG_ARCH_HAS_SC_SIGNALS=y 60CONFIG_ARCH_HAS_SC_SIGNALS=y
63CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y 61CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y
@@ -81,7 +79,6 @@ CONFIG_HOSTFS=y
81# CONFIG_HPPFS is not set 79# CONFIG_HPPFS is not set
82CONFIG_MCONSOLE=y 80CONFIG_MCONSOLE=y
83CONFIG_MAGIC_SYSRQ=y 81CONFIG_MAGIC_SYSRQ=y
84CONFIG_NEST_LEVEL=0
85# CONFIG_HIGHMEM is not set 82# CONFIG_HIGHMEM is not set
86CONFIG_KERNEL_STACK_ORDER=0 83CONFIG_KERNEL_STACK_ORDER=0
87 84
diff --git a/arch/um/include/as-layout.h b/arch/um/include/as-layout.h
index 606bb5c7fdf6..cac542d8ff70 100644
--- a/arch/um/include/as-layout.h
+++ b/arch/um/include/as-layout.h
@@ -57,6 +57,8 @@ extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end;
57extern unsigned long _unprotected_end; 57extern unsigned long _unprotected_end;
58extern unsigned long brk_start; 58extern unsigned long brk_start;
59 59
60extern unsigned long host_task_size;
61
60extern int linux_main(int argc, char **argv); 62extern int linux_main(int argc, char **argv);
61 63
62extern void (*sig_info[])(int, struct uml_pt_regs *); 64extern void (*sig_info[])(int, struct uml_pt_regs *);
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 0b6b62733303..32c799e3a495 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -295,6 +295,9 @@ extern void maybe_sigio_broken(int fd, int read);
295extern int os_arch_prctl(int pid, int code, unsigned long *addr); 295extern int os_arch_prctl(int pid, int code, unsigned long *addr);
296 296
297/* tty.c */ 297/* tty.c */
298int get_pty(void); 298extern int get_pty(void);
299
300/* sys-$ARCH/task_size.c */
301extern unsigned long os_get_task_size(void);
299 302
300#endif 303#endif
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 76a62c0cb2bc..f5d7f4569ba7 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -25,7 +25,7 @@ void flush_thread(void)
25 25
26 ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data); 26 ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data);
27 ret = ret || unmap(&current->mm->context.id, STUB_END, 27 ret = ret || unmap(&current->mm->context.id, STUB_END,
28 TASK_SIZE - STUB_END, 1, &data); 28 host_task_size - STUB_END, 1, &data);
29 if (ret) { 29 if (ret) {
30 printk(KERN_ERR "flush_thread - clearing address space failed, " 30 printk(KERN_ERR "flush_thread - clearing address space failed, "
31 "err = %d\n", ret); 31 "err = %d\n", ret);
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 468aba990dbd..a6c1dd1cf5a1 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -241,6 +241,11 @@ static struct notifier_block panic_exit_notifier = {
241}; 241};
242 242
243/* Set during early boot */ 243/* Set during early boot */
244unsigned long task_size;
245EXPORT_SYMBOL(task_size);
246
247unsigned long host_task_size;
248
244unsigned long brk_start; 249unsigned long brk_start;
245unsigned long end_iomem; 250unsigned long end_iomem;
246EXPORT_SYMBOL(end_iomem); 251EXPORT_SYMBOL(end_iomem);
@@ -267,6 +272,13 @@ int __init linux_main(int argc, char **argv)
267 if (have_root == 0) 272 if (have_root == 0)
268 add_arg(DEFAULT_COMMAND_LINE); 273 add_arg(DEFAULT_COMMAND_LINE);
269 274
275 host_task_size = os_get_task_size();
276 /*
277 * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps
278 * out
279 */
280 task_size = host_task_size & PGDIR_MASK;
281
270 /* OS sanity checks that need to happen before the kernel runs */ 282 /* OS sanity checks that need to happen before the kernel runs */
271 os_early_checks(); 283 os_early_checks();
272 284
@@ -303,7 +315,7 @@ int __init linux_main(int argc, char **argv)
303 315
304 highmem = 0; 316 highmem = 0;
305 iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; 317 iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
306 max_physmem = CONFIG_TOP_ADDR - uml_physmem - iomem_size - MIN_VMALLOC; 318 max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
307 319
308 /* 320 /*
309 * Zones have to begin on a 1 << MAX_ORDER page boundary, 321 * Zones have to begin on a 1 << MAX_ORDER page boundary,
@@ -335,7 +347,7 @@ int __init linux_main(int argc, char **argv)
335 } 347 }
336 348
337 virtmem_size = physmem_size; 349 virtmem_size = physmem_size;
338 avail = CONFIG_TOP_ADDR - start_vm; 350 avail = TASK_SIZE - start_vm;
339 if (physmem_size > avail) 351 if (physmem_size > avail)
340 virtmem_size = avail; 352 virtmem_size = avail;
341 end_vm = start_vm + virtmem_size; 353 end_vm = start_vm + virtmem_size;
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
index a841262c594a..b4bc6ac4f30b 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/um/os-Linux/sys-i386/Makefile
@@ -3,7 +3,7 @@
3# Licensed under the GPL 3# Licensed under the GPL
4# 4#
5 5
6obj-y = registers.o signal.o tls.o 6obj-y = registers.o signal.o task_size.o tls.o
7 7
8USER_OBJS := $(obj-y) 8USER_OBJS := $(obj-y)
9 9
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c
new file mode 100644
index 000000000000..48d211b3d9a1
--- /dev/null
+++ b/arch/um/os-Linux/sys-i386/task_size.c
@@ -0,0 +1,120 @@
1#include <stdio.h>
2#include <stdlib.h>
3#include <signal.h>
4#include <sys/mman.h>
5#include "longjmp.h"
6#include "kern_constants.h"
7
8static jmp_buf buf;
9
10static void segfault(int sig)
11{
12 longjmp(buf, 1);
13}
14
15static int page_ok(unsigned long page)
16{
17 unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT);
18 unsigned long n = ~0UL;
19 void *mapped = NULL;
20 int ok = 0;
21
22 /*
23 * First see if the page is readable. If it is, it may still
24 * be a VDSO, so we go on to see if it's writable. If not
25 * then try mapping memory there. If that fails, then we're
26 * still in the kernel area. As a sanity check, we'll fail if
27 * the mmap succeeds, but gives us an address different from
28 * what we wanted.
29 */
30 if (setjmp(buf) == 0)
31 n = *address;
32 else {
33 mapped = mmap(address, UM_KERN_PAGE_SIZE,
34 PROT_READ | PROT_WRITE,
35 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
36 if (mapped == MAP_FAILED)
37 return 0;
38 if (mapped != address)
39 goto out;
40 }
41
42 /*
43 * Now, is it writeable? If so, then we're in user address
44 * space. If not, then try mprotecting it and try the write
45 * again.
46 */
47 if (setjmp(buf) == 0) {
48 *address = n;
49 ok = 1;
50 goto out;
51 } else if (mprotect(address, UM_KERN_PAGE_SIZE,
52 PROT_READ | PROT_WRITE) != 0)
53 goto out;
54
55 if (setjmp(buf) == 0) {
56 *address = n;
57 ok = 1;
58 }
59
60 out:
61 if (mapped != NULL)
62 munmap(mapped, UM_KERN_PAGE_SIZE);
63 return ok;
64}
65
66unsigned long os_get_task_size(void)
67{
68 struct sigaction sa, old;
69 unsigned long bottom = 0;
70 /*
71 * A 32-bit UML on a 64-bit host gets confused about the VDSO at
72 * 0xffffe000. It is mapped, is readable, can be reprotected writeable
73 * and written. However, exec discovers later that it can't be
74 * unmapped. So, just set the highest address to be checked to just
75 * below it. This might waste some address space on 4G/4G 32-bit
76 * hosts, but shouldn't hurt otherwise.
77 */
78 unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT;
79 unsigned long test;
80
81 printf("Locating the top of the address space ... ");
82 fflush(stdout);
83
84 /*
85 * We're going to be longjmping out of the signal handler, so
86 * SA_DEFER needs to be set.
87 */
88 sa.sa_handler = segfault;
89 sigemptyset(&sa.sa_mask);
90 sa.sa_flags = SA_NODEFER;
91 sigaction(SIGSEGV, &sa, &old);
92
93 if (!page_ok(bottom)) {
94 fprintf(stderr, "Address 0x%x no good?\n",
95 bottom << UM_KERN_PAGE_SHIFT);
96 exit(1);
97 }
98
99 /* This could happen with a 4G/4G split */
100 if (page_ok(top))
101 goto out;
102
103 do {
104 test = bottom + (top - bottom) / 2;
105 if (page_ok(test))
106 bottom = test;
107 else
108 top = test;
109 } while (top - bottom > 1);
110
111out:
112 /* Restore the old SIGSEGV handling */
113 sigaction(SIGSEGV, &old, NULL);
114
115 top <<= UM_KERN_PAGE_SHIFT;
116 printf("0x%x\n", top);
117 fflush(stdout);
118
119 return top;
120}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
index a42a4ef02e1e..a44a47f8f57b 100644
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ b/arch/um/os-Linux/sys-x86_64/Makefile
@@ -3,7 +3,7 @@
3# Licensed under the GPL 3# Licensed under the GPL
4# 4#
5 5
6obj-y = registers.o prctl.o signal.o 6obj-y = registers.o prctl.o signal.o task_size.o
7 7
8USER_OBJS := $(obj-y) 8USER_OBJS := $(obj-y)
9 9
diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c
new file mode 100644
index 000000000000..fad6f57f8ee3
--- /dev/null
+++ b/arch/um/os-Linux/sys-x86_64/task_size.c
@@ -0,0 +1,5 @@
1unsigned long os_get_task_size(unsigned long shift)
2{
3 /* The old value of CONFIG_TOP_ADDR */
4 return 0x7fc0000000;
5}