diff options
author | Jeff Dike <jdike@addtoit.com> | 2008-02-08 07:22:07 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-08 12:22:42 -0500 |
commit | 536788fe2d28e11db6aeda74207d95d750fb761f (patch) | |
tree | 73df2d3a46c542c71d3a84c20c8fd1ce617386a3 /arch/um/os-Linux | |
parent | 2f569afd9ced9ebec9a6eb3dbf6f83429be0a7b4 (diff) |
uml: runtime host VMSPLIT detection
Calculate TASK_SIZE at run-time by figuring out the host's VMSPLIT - this is
needed on i386 if UML is to run on hosts with varying VMSPLITs without
recompilation.
TASK_SIZE is now defined in terms of a variable, task_size. This gets rid of
an include of pgtable.h from processor.h, which can cause include loops.
On i386, task_size is calculated early in boot by probing the address space in
a binary search to figure out where the boundary between usable and non-usable
memory is. This tries to make sure that a page that is considered to be in
userspace is, or can be made, read-write. I'm concerned about a system-global
VDSO page in kernel memory being hit and considered to be a userspace page.
On x86_64, task_size is just the old value of CONFIG_TOP_ADDR.
A bunch of config variable are gone now. CONFIG_TOP_ADDR is directly replaced
by TASK_SIZE. NEST_LEVEL is gone since the relocation of the stubs makes it
irrelevant. All the HOST_VMSPLIT stuff is gone. All references to these in
arch/um/Makefile are also gone.
I noticed and fixed a missing extern in os.h when adding os_get_task_size.
Note: This has been revised to fix the 32-bit UML on 64-bit host bug that
Miklos ran into.
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/um/os-Linux')
-rw-r--r-- | arch/um/os-Linux/sys-i386/Makefile | 2 | ||||
-rw-r--r-- | arch/um/os-Linux/sys-i386/task_size.c | 120 | ||||
-rw-r--r-- | arch/um/os-Linux/sys-x86_64/Makefile | 2 | ||||
-rw-r--r-- | arch/um/os-Linux/sys-x86_64/task_size.c | 5 |
4 files changed, 127 insertions, 2 deletions
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile index a841262c594a..b4bc6ac4f30b 100644 --- a/arch/um/os-Linux/sys-i386/Makefile +++ b/arch/um/os-Linux/sys-i386/Makefile | |||
@@ -3,7 +3,7 @@ | |||
3 | # Licensed under the GPL | 3 | # Licensed under the GPL |
4 | # | 4 | # |
5 | 5 | ||
6 | obj-y = registers.o signal.o tls.o | 6 | obj-y = registers.o signal.o task_size.o tls.o |
7 | 7 | ||
8 | USER_OBJS := $(obj-y) | 8 | USER_OBJS := $(obj-y) |
9 | 9 | ||
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c new file mode 100644 index 000000000000..48d211b3d9a1 --- /dev/null +++ b/arch/um/os-Linux/sys-i386/task_size.c | |||
@@ -0,0 +1,120 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <signal.h> | ||
4 | #include <sys/mman.h> | ||
5 | #include "longjmp.h" | ||
6 | #include "kern_constants.h" | ||
7 | |||
8 | static jmp_buf buf; | ||
9 | |||
10 | static void segfault(int sig) | ||
11 | { | ||
12 | longjmp(buf, 1); | ||
13 | } | ||
14 | |||
15 | static int page_ok(unsigned long page) | ||
16 | { | ||
17 | unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT); | ||
18 | unsigned long n = ~0UL; | ||
19 | void *mapped = NULL; | ||
20 | int ok = 0; | ||
21 | |||
22 | /* | ||
23 | * First see if the page is readable. If it is, it may still | ||
24 | * be a VDSO, so we go on to see if it's writable. If not | ||
25 | * then try mapping memory there. If that fails, then we're | ||
26 | * still in the kernel area. As a sanity check, we'll fail if | ||
27 | * the mmap succeeds, but gives us an address different from | ||
28 | * what we wanted. | ||
29 | */ | ||
30 | if (setjmp(buf) == 0) | ||
31 | n = *address; | ||
32 | else { | ||
33 | mapped = mmap(address, UM_KERN_PAGE_SIZE, | ||
34 | PROT_READ | PROT_WRITE, | ||
35 | MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | ||
36 | if (mapped == MAP_FAILED) | ||
37 | return 0; | ||
38 | if (mapped != address) | ||
39 | goto out; | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * Now, is it writeable? If so, then we're in user address | ||
44 | * space. If not, then try mprotecting it and try the write | ||
45 | * again. | ||
46 | */ | ||
47 | if (setjmp(buf) == 0) { | ||
48 | *address = n; | ||
49 | ok = 1; | ||
50 | goto out; | ||
51 | } else if (mprotect(address, UM_KERN_PAGE_SIZE, | ||
52 | PROT_READ | PROT_WRITE) != 0) | ||
53 | goto out; | ||
54 | |||
55 | if (setjmp(buf) == 0) { | ||
56 | *address = n; | ||
57 | ok = 1; | ||
58 | } | ||
59 | |||
60 | out: | ||
61 | if (mapped != NULL) | ||
62 | munmap(mapped, UM_KERN_PAGE_SIZE); | ||
63 | return ok; | ||
64 | } | ||
65 | |||
66 | unsigned long os_get_task_size(void) | ||
67 | { | ||
68 | struct sigaction sa, old; | ||
69 | unsigned long bottom = 0; | ||
70 | /* | ||
71 | * A 32-bit UML on a 64-bit host gets confused about the VDSO at | ||
72 | * 0xffffe000. It is mapped, is readable, can be reprotected writeable | ||
73 | * and written. However, exec discovers later that it can't be | ||
74 | * unmapped. So, just set the highest address to be checked to just | ||
75 | * below it. This might waste some address space on 4G/4G 32-bit | ||
76 | * hosts, but shouldn't hurt otherwise. | ||
77 | */ | ||
78 | unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT; | ||
79 | unsigned long test; | ||
80 | |||
81 | printf("Locating the top of the address space ... "); | ||
82 | fflush(stdout); | ||
83 | |||
84 | /* | ||
85 | * We're going to be longjmping out of the signal handler, so | ||
86 | * SA_DEFER needs to be set. | ||
87 | */ | ||
88 | sa.sa_handler = segfault; | ||
89 | sigemptyset(&sa.sa_mask); | ||
90 | sa.sa_flags = SA_NODEFER; | ||
91 | sigaction(SIGSEGV, &sa, &old); | ||
92 | |||
93 | if (!page_ok(bottom)) { | ||
94 | fprintf(stderr, "Address 0x%x no good?\n", | ||
95 | bottom << UM_KERN_PAGE_SHIFT); | ||
96 | exit(1); | ||
97 | } | ||
98 | |||
99 | /* This could happen with a 4G/4G split */ | ||
100 | if (page_ok(top)) | ||
101 | goto out; | ||
102 | |||
103 | do { | ||
104 | test = bottom + (top - bottom) / 2; | ||
105 | if (page_ok(test)) | ||
106 | bottom = test; | ||
107 | else | ||
108 | top = test; | ||
109 | } while (top - bottom > 1); | ||
110 | |||
111 | out: | ||
112 | /* Restore the old SIGSEGV handling */ | ||
113 | sigaction(SIGSEGV, &old, NULL); | ||
114 | |||
115 | top <<= UM_KERN_PAGE_SHIFT; | ||
116 | printf("0x%x\n", top); | ||
117 | fflush(stdout); | ||
118 | |||
119 | return top; | ||
120 | } | ||
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile index a42a4ef02e1e..a44a47f8f57b 100644 --- a/arch/um/os-Linux/sys-x86_64/Makefile +++ b/arch/um/os-Linux/sys-x86_64/Makefile | |||
@@ -3,7 +3,7 @@ | |||
3 | # Licensed under the GPL | 3 | # Licensed under the GPL |
4 | # | 4 | # |
5 | 5 | ||
6 | obj-y = registers.o prctl.o signal.o | 6 | obj-y = registers.o prctl.o signal.o task_size.o |
7 | 7 | ||
8 | USER_OBJS := $(obj-y) | 8 | USER_OBJS := $(obj-y) |
9 | 9 | ||
diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c new file mode 100644 index 000000000000..fad6f57f8ee3 --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/task_size.c | |||
@@ -0,0 +1,5 @@ | |||
1 | unsigned long os_get_task_size(unsigned long shift) | ||
2 | { | ||
3 | /* The old value of CONFIG_TOP_ADDR */ | ||
4 | return 0x7fc0000000; | ||
5 | } | ||