aboutsummaryrefslogtreecommitdiffstats
path: root/arch/um/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um/kernel')
-rw-r--r--arch/um/kernel/Makefile15
-rw-r--r--arch/um/kernel/irq.c41
-rw-r--r--arch/um/kernel/ksyms.c19
-rw-r--r--arch/um/kernel/main.c2
-rw-r--r--arch/um/kernel/process.c439
-rw-r--r--arch/um/kernel/skas/Makefile2
-rw-r--r--arch/um/kernel/skas/include/mmu-skas.h4
-rw-r--r--arch/um/kernel/skas/include/skas.h30
-rw-r--r--arch/um/kernel/skas/mem_user.c224
-rw-r--r--arch/um/kernel/skas/mmu.c61
-rw-r--r--arch/um/kernel/skas/process.c69
-rw-r--r--arch/um/kernel/skas/process_kern.c7
-rw-r--r--arch/um/kernel/skas/syscall.c50
-rw-r--r--arch/um/kernel/skas/syscall_kern.c43
-rw-r--r--arch/um/kernel/skas/syscall_user.c44
-rw-r--r--arch/um/kernel/skas/tlb.c28
-rw-r--r--arch/um/kernel/syscall.c36
-rw-r--r--arch/um/kernel/syscall_user.c48
-rw-r--r--arch/um/kernel/tlb.c267
-rw-r--r--arch/um/kernel/trap_kern.c55
-rw-r--r--arch/um/kernel/trap_user.c21
-rw-r--r--arch/um/kernel/tt/syscall_kern.c47
-rw-r--r--arch/um/kernel/tt/syscall_user.c35
-rw-r--r--arch/um/kernel/tt/tlb.c26
-rw-r--r--arch/um/kernel/um_arch.c8
25 files changed, 632 insertions, 989 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index a8918e80df96..614b8ebeb0ed 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -8,25 +8,24 @@ clean-files :=
8 8
9obj-y = config.o exec_kern.o exitcode.o \ 9obj-y = config.o exec_kern.o exitcode.o \
10 helper.o init_task.o irq.o irq_user.o ksyms.o main.o mem.o mem_user.o \ 10 helper.o init_task.o irq.o irq_user.o ksyms.o main.o mem.o mem_user.o \
11 physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ 11 physmem.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \
12 sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ 12 sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o sysrq.o \
13 syscall_kern.o sysrq.o tempfile.o time.o time_kern.o \ 13 tempfile.o time.o time_kern.o tlb.o trap_kern.o trap_user.o \
14 tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \ 14 uaccess_user.o um_arch.o umid.o user_util.o
15 user_util.o
16 15
17obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o 16obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
18obj-$(CONFIG_GPROF) += gprof_syms.o 17obj-$(CONFIG_GPROF) += gprof_syms.o
19obj-$(CONFIG_GCOV) += gmon_syms.o 18obj-$(CONFIG_GCOV) += gmon_syms.o
20obj-$(CONFIG_TTY_LOG) += tty_log.o 19obj-$(CONFIG_TTY_LOG) += tty_log.o
21obj-$(CONFIG_SYSCALL_DEBUG) += syscall_user.o 20obj-$(CONFIG_SYSCALL_DEBUG) += syscall.o
22 21
23obj-$(CONFIG_MODE_TT) += tt/ 22obj-$(CONFIG_MODE_TT) += tt/
24obj-$(CONFIG_MODE_SKAS) += skas/ 23obj-$(CONFIG_MODE_SKAS) += skas/
25 24
26user-objs-$(CONFIG_TTY_LOG) += tty_log.o 25user-objs-$(CONFIG_TTY_LOG) += tty_log.o
27 26
28USER_OBJS := $(user-objs-y) config.o helper.o main.o process.o tempfile.o \ 27USER_OBJS := $(user-objs-y) config.o helper.o main.o tempfile.o time.o \
29 time.o tty_log.o umid.o user_util.o 28 tty_log.o umid.o user_util.o
30 29
31include arch/um/scripts/Makefile.rules 30include arch/um/scripts/Makefile.rules
32 31
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 9f18061ef4c9..dcd814971995 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -31,7 +31,7 @@
31#include "kern_util.h" 31#include "kern_util.h"
32#include "irq_user.h" 32#include "irq_user.h"
33#include "irq_kern.h" 33#include "irq_kern.h"
34 34#include "os.h"
35 35
36/* 36/*
37 * Generic, controller-independent functions: 37 * Generic, controller-independent functions:
@@ -168,13 +168,32 @@ void __init init_IRQ(void)
168 } 168 }
169} 169}
170 170
171/* 171int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *,
172 * Overrides for Emacs so that we follow Linus's tabbing style. 172 struct pt_regs *))
173 * Emacs will notice this stuff at the end of the file and automatically 173{
174 * adjust the settings for this buffer only. This must remain at the end 174 int fds[2], err;
175 * of the file. 175
176 * --------------------------------------------------------------------------- 176 err = os_pipe(fds, 1, 1);
177 * Local variables: 177 if(err){
178 * c-file-style: "linux" 178 printk("init_aio_irq - os_pipe failed, err = %d\n", -err);
179 * End: 179 goto out;
180 */ 180 }
181
182 err = um_request_irq(irq, fds[0], IRQ_READ, handler,
183 SA_INTERRUPT | SA_SAMPLE_RANDOM, name,
184 (void *) (long) fds[0]);
185 if(err){
186 printk("init_aio_irq - : um_request_irq failed, err = %d\n",
187 err);
188 goto out_close;
189 }
190
191 err = fds[1];
192 goto out;
193
194 out_close:
195 os_close_file(fds[0]);
196 os_close_file(fds[1]);
197 out:
198 return(err);
199}
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 99439fa15ef4..32d3076dd220 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -114,22 +114,3 @@ extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
114EXPORT_SYMBOL(__read_lock_failed); 114EXPORT_SYMBOL(__read_lock_failed);
115 115
116#endif 116#endif
117
118#ifdef CONFIG_HIGHMEM
119EXPORT_SYMBOL(kmap);
120EXPORT_SYMBOL(kunmap);
121EXPORT_SYMBOL(kmap_atomic);
122EXPORT_SYMBOL(kunmap_atomic);
123EXPORT_SYMBOL(kmap_atomic_to_page);
124#endif
125
126/*
127 * Overrides for Emacs so that we follow Linus's tabbing style.
128 * Emacs will notice this stuff at the end of the file and automatically
129 * adjust the settings for this buffer only. This must remain at the end
130 * of the file.
131 * ---------------------------------------------------------------------------
132 * Local variables:
133 * c-file-style: "linux"
134 * End:
135 */
diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c
index 1e1a87f1c510..d31027f0fe39 100644
--- a/arch/um/kernel/main.c
+++ b/arch/um/kernel/main.c
@@ -97,7 +97,7 @@ int main(int argc, char **argv, char **envp)
97 exit(1); 97 exit(1);
98 } 98 }
99 99
100#ifdef UML_CONFIG_MODE_TT 100#ifdef UML_CONFIG_CMDLINE_ON_HOST
101 /* Allocate memory for thread command lines */ 101 /* Allocate memory for thread command lines */
102 if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ 102 if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){
103 103
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
deleted file mode 100644
index 67acd92c5322..000000000000
--- a/arch/um/kernel/process.c
+++ /dev/null
@@ -1,439 +0,0 @@
1/*
2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#include <stdio.h>
7#include <unistd.h>
8#include <signal.h>
9#include <sched.h>
10#include <errno.h>
11#include <stdarg.h>
12#include <stdlib.h>
13#include <setjmp.h>
14#include <sys/time.h>
15#include <sys/wait.h>
16#include <sys/mman.h>
17#include <asm/unistd.h>
18#include <asm/page.h>
19#include "user_util.h"
20#include "kern_util.h"
21#include "user.h"
22#include "process.h"
23#include "signal_kern.h"
24#include "signal_user.h"
25#include "sysdep/ptrace.h"
26#include "sysdep/sigcontext.h"
27#include "irq_user.h"
28#include "ptrace_user.h"
29#include "time_user.h"
30#include "init.h"
31#include "os.h"
32#include "uml-config.h"
33#include "choose-mode.h"
34#include "mode.h"
35#include "tempfile.h"
36#ifdef UML_CONFIG_MODE_SKAS
37#include "skas.h"
38#include "skas_ptrace.h"
39#include "registers.h"
40#endif
41
42void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
43{
44 int flags = 0, pages;
45
46 if(sig_stack != NULL){
47 pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER);
48 set_sigstack(sig_stack, pages * page_size());
49 flags = SA_ONSTACK;
50 }
51 if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1);
52}
53
54void init_new_thread_signals(int altstack)
55{
56 int flags = altstack ? SA_ONSTACK : 0;
57
58 set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags,
59 SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
60 set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags,
61 SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
62 set_handler(SIGFPE, (__sighandler_t) sig_handler, flags,
63 SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
64 set_handler(SIGILL, (__sighandler_t) sig_handler, flags,
65 SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
66 set_handler(SIGBUS, (__sighandler_t) sig_handler, flags,
67 SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
68 set_handler(SIGUSR2, (__sighandler_t) sig_handler,
69 flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
70 signal(SIGHUP, SIG_IGN);
71
72 init_irq_signals(altstack);
73}
74
75struct tramp {
76 int (*tramp)(void *);
77 void *tramp_data;
78 unsigned long temp_stack;
79 int flags;
80 int pid;
81};
82
83/* See above for why sigkill is here */
84
85int sigkill = SIGKILL;
86
87int outer_tramp(void *arg)
88{
89 struct tramp *t;
90 int sig = sigkill;
91
92 t = arg;
93 t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2,
94 t->flags, t->tramp_data);
95 if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL);
96 kill(os_getpid(), sig);
97 _exit(0);
98}
99
100int start_fork_tramp(void *thread_arg, unsigned long temp_stack,
101 int clone_flags, int (*tramp)(void *))
102{
103 struct tramp arg;
104 unsigned long sp;
105 int new_pid, status, err;
106
107 /* The trampoline will run on the temporary stack */
108 sp = stack_sp(temp_stack);
109
110 clone_flags |= CLONE_FILES | SIGCHLD;
111
112 arg.tramp = tramp;
113 arg.tramp_data = thread_arg;
114 arg.temp_stack = temp_stack;
115 arg.flags = clone_flags;
116
117 /* Start the process and wait for it to kill itself */
118 new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg);
119 if(new_pid < 0)
120 return(new_pid);
121
122 CATCH_EINTR(err = waitpid(new_pid, &status, 0));
123 if(err < 0)
124 panic("Waiting for outer trampoline failed - errno = %d",
125 errno);
126
127 if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL))
128 panic("outer trampoline didn't exit with SIGKILL, "
129 "status = %d", status);
130
131 return(arg.pid);
132}
133
134static int ptrace_child(void *arg)
135{
136 int ret;
137 int pid = os_getpid(), ppid = getppid();
138 int sc_result;
139
140 if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){
141 perror("ptrace");
142 os_kill_process(pid, 0);
143 }
144 os_stop_process(pid);
145
146 /*This syscall will be intercepted by the parent. Don't call more than
147 * once, please.*/
148 sc_result = os_getpid();
149
150 if (sc_result == pid)
151 ret = 1; /*Nothing modified by the parent, we are running
152 normally.*/
153 else if (sc_result == ppid)
154 ret = 0; /*Expected in check_ptrace and check_sysemu when they
155 succeed in modifying the stack frame*/
156 else
157 ret = 2; /*Serious trouble! This could be caused by a bug in
158 host 2.6 SKAS3/2.6 patch before release -V6, together
159 with a bug in the UML code itself.*/
160 _exit(ret);
161}
162
163static int start_ptraced_child(void **stack_out)
164{
165 void *stack;
166 unsigned long sp;
167 int pid, n, status;
168
169 stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
170 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
171 if(stack == MAP_FAILED)
172 panic("check_ptrace : mmap failed, errno = %d", errno);
173 sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *);
174 pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL);
175 if(pid < 0)
176 panic("check_ptrace : clone failed, errno = %d", errno);
177 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
178 if(n < 0)
179 panic("check_ptrace : wait failed, errno = %d", errno);
180 if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP))
181 panic("check_ptrace : expected SIGSTOP, got status = %d",
182 status);
183
184 *stack_out = stack;
185 return(pid);
186}
187
188/* When testing for SYSEMU support, if it is one of the broken versions, we must
189 * just avoid using sysemu, not panic, but only if SYSEMU features are broken.
190 * So only for SYSEMU features we test mustpanic, while normal host features
191 * must work anyway!*/
192static int stop_ptraced_child(int pid, void *stack, int exitcode, int mustpanic)
193{
194 int status, n, ret = 0;
195
196 if(ptrace(PTRACE_CONT, pid, 0, 0) < 0)
197 panic("check_ptrace : ptrace failed, errno = %d", errno);
198 CATCH_EINTR(n = waitpid(pid, &status, 0));
199 if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
200 int exit_with = WEXITSTATUS(status);
201 if (exit_with == 2)
202 printk("check_ptrace : child exited with status 2. "
203 "Serious trouble happening! Try updating your "
204 "host skas patch!\nDisabling SYSEMU support.");
205 printk("check_ptrace : child exited with exitcode %d, while "
206 "expecting %d; status 0x%x", exit_with,
207 exitcode, status);
208 if (mustpanic)
209 panic("\n");
210 else
211 printk("\n");
212 ret = -1;
213 }
214
215 if(munmap(stack, PAGE_SIZE) < 0)
216 panic("check_ptrace : munmap failed, errno = %d", errno);
217 return ret;
218}
219
220static int force_sysemu_disabled = 0;
221
222int ptrace_faultinfo = 1;
223int proc_mm = 1;
224
225static int __init skas0_cmd_param(char *str, int* add)
226{
227 ptrace_faultinfo = proc_mm = 0;
228 return 0;
229}
230
231static int __init nosysemu_cmd_param(char *str, int* add)
232{
233 force_sysemu_disabled = 1;
234 return 0;
235}
236
237__uml_setup("skas0", skas0_cmd_param,
238 "skas0\n"
239 " Disables SKAS3 usage, so that SKAS0 is used, unless you \n"
240 " specify mode=tt.\n\n");
241
242__uml_setup("nosysemu", nosysemu_cmd_param,
243 "nosysemu\n"
244 " Turns off syscall emulation patch for ptrace (SYSEMU) on.\n"
245 " SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n"
246 " behaviour of ptrace() and helps reducing host context switch rate.\n"
247 " To make it working, you need a kernel patch for your host, too.\n"
248 " See http://perso.wanadoo.fr/laurent.vivier/UML/ for further information.\n\n");
249
250static void __init check_sysemu(void)
251{
252 void *stack;
253 int pid, syscall, n, status, count=0;
254
255 printk("Checking syscall emulation patch for ptrace...");
256 sysemu_supported = 0;
257 pid = start_ptraced_child(&stack);
258
259 if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
260 goto fail;
261
262 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
263 if (n < 0)
264 panic("check_sysemu : wait failed, errno = %d", errno);
265 if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
266 panic("check_sysemu : expected SIGTRAP, "
267 "got status = %d", status);
268
269 n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET,
270 os_getpid());
271 if(n < 0)
272 panic("check_sysemu : failed to modify system "
273 "call return, errno = %d", errno);
274
275 if (stop_ptraced_child(pid, stack, 0, 0) < 0)
276 goto fail_stopped;
277
278 sysemu_supported = 1;
279 printk("OK\n");
280 set_using_sysemu(!force_sysemu_disabled);
281
282 printk("Checking advanced syscall emulation patch for ptrace...");
283 pid = start_ptraced_child(&stack);
284 while(1){
285 count++;
286 if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
287 goto fail;
288 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
289 if(n < 0)
290 panic("check_ptrace : wait failed, errno = %d", errno);
291 if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
292 panic("check_ptrace : expected (SIGTRAP|SYSCALL_TRAP), "
293 "got status = %d", status);
294
295 syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET,
296 0);
297 if(syscall == __NR_getpid){
298 if (!count)
299 panic("check_ptrace : SYSEMU_SINGLESTEP doesn't singlestep");
300 n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET,
301 os_getpid());
302 if(n < 0)
303 panic("check_sysemu : failed to modify system "
304 "call return, errno = %d", errno);
305 break;
306 }
307 }
308 if (stop_ptraced_child(pid, stack, 0, 0) < 0)
309 goto fail_stopped;
310
311 sysemu_supported = 2;
312 printk("OK\n");
313
314 if ( !force_sysemu_disabled )
315 set_using_sysemu(sysemu_supported);
316 return;
317
318fail:
319 stop_ptraced_child(pid, stack, 1, 0);
320fail_stopped:
321 printk("missing\n");
322}
323
324void __init check_ptrace(void)
325{
326 void *stack;
327 int pid, syscall, n, status;
328
329 printk("Checking that ptrace can change system call numbers...");
330 pid = start_ptraced_child(&stack);
331
332 if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0)
333 panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d", errno);
334
335 while(1){
336 if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
337 panic("check_ptrace : ptrace failed, errno = %d",
338 errno);
339 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
340 if(n < 0)
341 panic("check_ptrace : wait failed, errno = %d", errno);
342 if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP + 0x80))
343 panic("check_ptrace : expected SIGTRAP + 0x80, "
344 "got status = %d", status);
345
346 syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET,
347 0);
348 if(syscall == __NR_getpid){
349 n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
350 __NR_getppid);
351 if(n < 0)
352 panic("check_ptrace : failed to modify system "
353 "call, errno = %d", errno);
354 break;
355 }
356 }
357 stop_ptraced_child(pid, stack, 0, 1);
358 printk("OK\n");
359 check_sysemu();
360}
361
362int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr)
363{
364 sigjmp_buf buf;
365 int n;
366
367 *jmp_ptr = &buf;
368 n = sigsetjmp(buf, 1);
369 if(n != 0)
370 return(n);
371 (*fn)(arg);
372 return(0);
373}
374
375void forward_pending_sigio(int target)
376{
377 sigset_t sigs;
378
379 if(sigpending(&sigs))
380 panic("forward_pending_sigio : sigpending failed");
381 if(sigismember(&sigs, SIGIO))
382 kill(target, SIGIO);
383}
384
385extern void *__syscall_stub_start, __syscall_stub_end;
386
387#ifdef UML_CONFIG_MODE_SKAS
388
389static inline void check_skas3_ptrace_support(void)
390{
391 struct ptrace_faultinfo fi;
392 void *stack;
393 int pid, n;
394
395 printf("Checking for the skas3 patch in the host...");
396 pid = start_ptraced_child(&stack);
397
398 n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi);
399 if (n < 0) {
400 ptrace_faultinfo = 0;
401 if(errno == EIO)
402 printf("not found\n");
403 else {
404 perror("not found");
405 }
406 }
407 else {
408 if (!ptrace_faultinfo)
409 printf("found but disabled on command line\n");
410 else
411 printf("found\n");
412 }
413
414 init_registers(pid);
415 stop_ptraced_child(pid, stack, 1, 1);
416}
417
418int can_do_skas(void)
419{
420 printf("Checking for /proc/mm...");
421 if (os_access("/proc/mm", OS_ACC_W_OK) < 0) {
422 proc_mm = 0;
423 printf("not found\n");
424 } else {
425 if (!proc_mm)
426 printf("found but disabled on command line\n");
427 else
428 printf("found\n");
429 }
430
431 check_skas3_ptrace_support();
432 return 1;
433}
434#else
435int can_do_skas(void)
436{
437 return(0);
438}
439#endif
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index d296d55ade4b..db36c7c95940 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -4,7 +4,7 @@
4# 4#
5 5
6obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ 6obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \
7 syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ 7 syscall.o tlb.o trap_user.o uaccess.o
8 8
9subdir- := util 9subdir- := util
10 10
diff --git a/arch/um/kernel/skas/include/mmu-skas.h b/arch/um/kernel/skas/include/mmu-skas.h
index 278b72f1d9ad..09536f81ee42 100644
--- a/arch/um/kernel/skas/include/mmu-skas.h
+++ b/arch/um/kernel/skas/include/mmu-skas.h
@@ -6,11 +6,15 @@
6#ifndef __SKAS_MMU_H 6#ifndef __SKAS_MMU_H
7#define __SKAS_MMU_H 7#define __SKAS_MMU_H
8 8
9#include "linux/config.h"
9#include "mm_id.h" 10#include "mm_id.h"
10 11
11struct mmu_context_skas { 12struct mmu_context_skas {
12 struct mm_id id; 13 struct mm_id id;
13 unsigned long last_page_table; 14 unsigned long last_page_table;
15#ifdef CONFIG_3_LEVEL_PGTABLES
16 unsigned long last_pmd;
17#endif
14}; 18};
15 19
16extern void switch_mm_skas(struct mm_id * mm_idp); 20extern void switch_mm_skas(struct mm_id * mm_idp);
diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h
index d983ea842547..060934740f9f 100644
--- a/arch/um/kernel/skas/include/skas.h
+++ b/arch/um/kernel/skas/include/skas.h
@@ -24,28 +24,26 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig));
24extern void remove_sigstack(void); 24extern void remove_sigstack(void);
25extern void new_thread_handler(int sig); 25extern void new_thread_handler(int sig);
26extern void handle_syscall(union uml_pt_regs *regs); 26extern void handle_syscall(union uml_pt_regs *regs);
27extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, 27extern int map(struct mm_id * mm_idp, unsigned long virt,
28 int r, int w, int x, int phys_fd, unsigned long long offset); 28 unsigned long len, int r, int w, int x, int phys_fd,
29extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len); 29 unsigned long long offset, int done, void **data);
30extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len,
31 int done, void **data);
30extern int protect(struct mm_id * mm_idp, unsigned long addr, 32extern int protect(struct mm_id * mm_idp, unsigned long addr,
31 unsigned long len, int r, int w, int x); 33 unsigned long len, int r, int w, int x, int done,
34 void **data);
32extern void user_signal(int sig, union uml_pt_regs *regs, int pid); 35extern void user_signal(int sig, union uml_pt_regs *regs, int pid);
33extern int new_mm(int from); 36extern int new_mm(int from, unsigned long stack);
34extern int start_userspace(unsigned long stub_stack); 37extern int start_userspace(unsigned long stub_stack);
35extern int copy_context_skas0(unsigned long stack, int pid); 38extern int copy_context_skas0(unsigned long stack, int pid);
36extern void get_skas_faultinfo(int pid, struct faultinfo * fi); 39extern void get_skas_faultinfo(int pid, struct faultinfo * fi);
37extern long execute_syscall_skas(void *r); 40extern long execute_syscall_skas(void *r);
38extern unsigned long current_stub_stack(void); 41extern unsigned long current_stub_stack(void);
42extern long run_syscall_stub(struct mm_id * mm_idp,
43 int syscall, unsigned long *args, long expected,
44 void **addr, int done);
45extern long syscall_stub_data(struct mm_id * mm_idp,
46 unsigned long *data, int data_count,
47 void **addr, void **stub_addr);
39 48
40#endif 49#endif
41
42/*
43 * Overrides for Emacs so that we follow Linus's tabbing style.
44 * Emacs will notice this stuff at the end of the file and automatically
45 * adjust the settings for this buffer only. This must remain at the end
46 * of the file.
47 * ---------------------------------------------------------------------------
48 * Local variables:
49 * c-file-style: "linux"
50 * End:
51 */
diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c
index b0980ff3bd95..1d89640bd502 100644
--- a/arch/um/kernel/skas/mem_user.c
+++ b/arch/um/kernel/skas/mem_user.c
@@ -5,13 +5,14 @@
5 5
6#include <signal.h> 6#include <signal.h>
7#include <errno.h> 7#include <errno.h>
8#include <string.h>
8#include <sys/mman.h> 9#include <sys/mman.h>
9#include <sys/wait.h> 10#include <sys/wait.h>
10#include <asm/page.h> 11#include <asm/page.h>
11#include <asm/unistd.h> 12#include <asm/unistd.h>
12#include "mem_user.h" 13#include "mem_user.h"
13#include "mem.h" 14#include "mem.h"
14#include "mm_id.h" 15#include "skas.h"
15#include "user.h" 16#include "user.h"
16#include "os.h" 17#include "os.h"
17#include "proc_mm.h" 18#include "proc_mm.h"
@@ -23,46 +24,155 @@
23#include "uml-config.h" 24#include "uml-config.h"
24#include "sysdep/ptrace.h" 25#include "sysdep/ptrace.h"
25#include "sysdep/stub.h" 26#include "sysdep/stub.h"
26#include "skas.h"
27 27
28extern unsigned long syscall_stub, __syscall_stub_start; 28extern unsigned long batch_syscall_stub, __syscall_stub_start;
29 29
30extern void wait_stub_done(int pid, int sig, char * fname); 30extern void wait_stub_done(int pid, int sig, char * fname);
31 31
32static long run_syscall_stub(struct mm_id * mm_idp, int syscall, 32static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
33 unsigned long *args) 33 unsigned long *stack)
34{
35 if(stack == NULL){
36 stack = (unsigned long *) mm_idp->stack + 2;
37 *stack = 0;
38 }
39 return stack;
40}
41
42extern int proc_mm;
43
44int single_count = 0;
45int multi_count = 0;
46int multi_op_count = 0;
47
48static long do_syscall_stub(struct mm_id *mm_idp, void **addr)
34{ 49{
50 unsigned long regs[MAX_REG_NR];
51 unsigned long *data;
52 unsigned long *syscall;
53 long ret, offset;
35 int n, pid = mm_idp->u.pid; 54 int n, pid = mm_idp->u.pid;
36 unsigned long regs[MAX_REG_NR]; 55
56 if(proc_mm)
57#warning Need to look up userspace_pid by cpu
58 pid = userspace_pid[0];
59
60 multi_count++;
37 61
38 get_safe_registers(regs); 62 get_safe_registers(regs);
39 regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + 63 regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE +
40 ((unsigned long) &syscall_stub - 64 ((unsigned long) &batch_syscall_stub -
41 (unsigned long) &__syscall_stub_start); 65 (unsigned long) &__syscall_stub_start);
42 /* XXX Don't have a define for starting a syscall */ 66 n = ptrace_setregs(pid, regs);
43 regs[REGS_SYSCALL_NR] = syscall; 67 if(n < 0)
44 regs[REGS_SYSCALL_ARG1] = args[0]; 68 panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n",
45 regs[REGS_SYSCALL_ARG2] = args[1]; 69 n);
46 regs[REGS_SYSCALL_ARG3] = args[2]; 70
47 regs[REGS_SYSCALL_ARG4] = args[3]; 71 wait_stub_done(pid, 0, "do_syscall_stub");
48 regs[REGS_SYSCALL_ARG5] = args[4]; 72
49 regs[REGS_SYSCALL_ARG6] = args[5]; 73 /* When the stub stops, we find the following values on the
50 n = ptrace_setregs(pid, regs); 74 * beginning of the stack:
51 if(n < 0){ 75 * (long )return_value
52 printk("run_syscall_stub : PTRACE_SETREGS failed, " 76 * (long )offset to failed sycall-data (0, if no error)
53 "errno = %d\n", n); 77 */
54 return(n); 78 ret = *((unsigned long *) mm_idp->stack);
79 offset = *((unsigned long *) mm_idp->stack + 1);
80 if (offset) {
81 data = (unsigned long *)(mm_idp->stack +
82 offset - UML_CONFIG_STUB_DATA);
83 syscall = (unsigned long *)((unsigned long)data + data[0]);
84 printk("do_syscall_stub: syscall %ld failed, return value = "
85 "0x%lx, expected return value = 0x%lx\n",
86 syscall[0], ret, syscall[7]);
87 printk(" syscall parameters: "
88 "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
89 syscall[1], syscall[2], syscall[3],
90 syscall[4], syscall[5], syscall[6]);
91 for(n = 1; n < data[0]/sizeof(long); n++) {
92 if(n == 1)
93 printk(" additional syscall data:");
94 if(n % 4 == 1)
95 printk("\n ");
96 printk(" 0x%lx", data[n]);
97 }
98 if(n > 1)
99 printk("\n");
100 }
101 else ret = 0;
102
103 *addr = check_init_stack(mm_idp, NULL);
104
105 return ret;
106}
107
108long run_syscall_stub(struct mm_id * mm_idp, int syscall,
109 unsigned long *args, long expected, void **addr,
110 int done)
111{
112 unsigned long *stack = check_init_stack(mm_idp, *addr);
113
114 if(done && *addr == NULL)
115 single_count++;
116
117 *stack += sizeof(long);
118 stack += *stack / sizeof(long);
119
120 *stack++ = syscall;
121 *stack++ = args[0];
122 *stack++ = args[1];
123 *stack++ = args[2];
124 *stack++ = args[3];
125 *stack++ = args[4];
126 *stack++ = args[5];
127 *stack++ = expected;
128 *stack = 0;
129 multi_op_count++;
130
131 if(!done && ((((unsigned long) stack) & ~PAGE_MASK) <
132 PAGE_SIZE - 10 * sizeof(long))){
133 *addr = stack;
134 return 0;
55 } 135 }
56 136
57 wait_stub_done(pid, 0, "run_syscall_stub"); 137 return do_syscall_stub(mm_idp, addr);
138}
139
140long syscall_stub_data(struct mm_id * mm_idp,
141 unsigned long *data, int data_count,
142 void **addr, void **stub_addr)
143{
144 unsigned long *stack;
145 int ret = 0;
58 146
59 return(*((unsigned long *) mm_idp->stack)); 147 /* If *addr still is uninitialized, it *must* contain NULL.
148 * Thus in this case do_syscall_stub correctly won't be called.
149 */
150 if((((unsigned long) *addr) & ~PAGE_MASK) >=
151 PAGE_SIZE - (10 + data_count) * sizeof(long)) {
152 ret = do_syscall_stub(mm_idp, addr);
153 /* in case of error, don't overwrite data on stack */
154 if(ret)
155 return ret;
156 }
157
158 stack = check_init_stack(mm_idp, *addr);
159 *addr = stack;
160
161 *stack = data_count * sizeof(long);
162
163 memcpy(stack + 1, data, data_count * sizeof(long));
164
165 *stub_addr = (void *)(((unsigned long)(stack + 1) & ~PAGE_MASK) +
166 UML_CONFIG_STUB_DATA);
167
168 return 0;
60} 169}
61 170
62int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, 171int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len,
63 int r, int w, int x, int phys_fd, unsigned long long offset) 172 int r, int w, int x, int phys_fd, unsigned long long offset,
173 int done, void **data)
64{ 174{
65 int prot, n; 175 int prot, ret;
66 176
67 prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | 177 prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
68 (x ? PROT_EXEC : 0); 178 (x ? PROT_EXEC : 0);
@@ -70,6 +180,7 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len,
70 if(proc_mm){ 180 if(proc_mm){
71 struct proc_mm_op map; 181 struct proc_mm_op map;
72 int fd = mm_idp->u.mm_fd; 182 int fd = mm_idp->u.mm_fd;
183
73 map = ((struct proc_mm_op) { .op = MM_MMAP, 184 map = ((struct proc_mm_op) { .op = MM_MMAP,
74 .u = 185 .u =
75 { .mmap = 186 { .mmap =
@@ -81,63 +192,61 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len,
81 .fd = phys_fd, 192 .fd = phys_fd,
82 .offset= offset 193 .offset= offset
83 } } } ); 194 } } } );
84 n = os_write_file(fd, &map, sizeof(map)); 195 ret = os_write_file(fd, &map, sizeof(map));
85 if(n != sizeof(map)) 196 if(ret != sizeof(map))
86 printk("map : /proc/mm map failed, err = %d\n", -n); 197 printk("map : /proc/mm map failed, err = %d\n", -ret);
198 else ret = 0;
87 } 199 }
88 else { 200 else {
89 long res;
90 unsigned long args[] = { virt, len, prot, 201 unsigned long args[] = { virt, len, prot,
91 MAP_SHARED | MAP_FIXED, phys_fd, 202 MAP_SHARED | MAP_FIXED, phys_fd,
92 MMAP_OFFSET(offset) }; 203 MMAP_OFFSET(offset) };
93 204
94 res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args); 205 ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt,
95 if((void *) res == MAP_FAILED) 206 data, done);
96 printk("mmap stub failed, errno = %d\n", res);
97 } 207 }
98 208
99 return 0; 209 return ret;
100} 210}
101 211
102int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) 212int unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done,
213 void **data)
103{ 214{
104 int n; 215 int ret;
105 216
106 if(proc_mm){ 217 if(proc_mm){
107 struct proc_mm_op unmap; 218 struct proc_mm_op unmap;
108 int fd = mm_idp->u.mm_fd; 219 int fd = mm_idp->u.mm_fd;
220
109 unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, 221 unmap = ((struct proc_mm_op) { .op = MM_MUNMAP,
110 .u = 222 .u =
111 { .munmap = 223 { .munmap =
112 { .addr = 224 { .addr =
113 (unsigned long) addr, 225 (unsigned long) addr,
114 .len = len } } } ); 226 .len = len } } } );
115 n = os_write_file(fd, &unmap, sizeof(unmap)); 227 ret = os_write_file(fd, &unmap, sizeof(unmap));
116 if(n != sizeof(unmap)) { 228 if(ret != sizeof(unmap))
117 if(n < 0) 229 printk("unmap - proc_mm write returned %d\n", ret);
118 return(n); 230 else ret = 0;
119 else if(n > 0)
120 return(-EIO);
121 }
122 } 231 }
123 else { 232 else {
124 int res;
125 unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, 233 unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
126 0 }; 234 0 };
127 235
128 res = run_syscall_stub(mm_idp, __NR_munmap, args); 236 ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0,
129 if(res < 0) 237 data, done);
130 printk("munmap stub failed, errno = %d\n", res); 238 if(ret < 0)
239 printk("munmap stub failed, errno = %d\n", ret);
131 } 240 }
132 241
133 return(0); 242 return ret;
134} 243}
135 244
136int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, 245int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
137 int r, int w, int x) 246 int r, int w, int x, int done, void **data)
138{ 247{
139 struct proc_mm_op protect; 248 struct proc_mm_op protect;
140 int prot, n; 249 int prot, ret;
141 250
142 prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | 251 prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
143 (x ? PROT_EXEC : 0); 252 (x ? PROT_EXEC : 0);
@@ -152,20 +261,19 @@ int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len,
152 .len = len, 261 .len = len,
153 .prot = prot } } } ); 262 .prot = prot } } } );
154 263
155 n = os_write_file(fd, &protect, sizeof(protect)); 264 ret = os_write_file(fd, &protect, sizeof(protect));
156 if(n != sizeof(protect)) 265 if(ret != sizeof(protect))
157 panic("protect failed, err = %d", -n); 266 printk("protect failed, err = %d", -ret);
267 else ret = 0;
158 } 268 }
159 else { 269 else {
160 int res;
161 unsigned long args[] = { addr, len, prot, 0, 0, 0 }; 270 unsigned long args[] = { addr, len, prot, 0, 0, 0 };
162 271
163 res = run_syscall_stub(mm_idp, __NR_mprotect, args); 272 ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
164 if(res < 0) 273 data, done);
165 panic("mprotect stub failed, errno = %d\n", res);
166 } 274 }
167 275
168 return(0); 276 return ret;
169} 277}
170 278
171void before_mem_skas(unsigned long unused) 279void before_mem_skas(unsigned long unused)
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index d232daa42c31..240143b616a2 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -56,6 +56,9 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
56 */ 56 */
57 57
58 mm->context.skas.last_page_table = pmd_page_kernel(*pmd); 58 mm->context.skas.last_page_table = pmd_page_kernel(*pmd);
59#ifdef CONFIG_3_LEVEL_PGTABLES
60 mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud));
61#endif
59 62
60 *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); 63 *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
61 *pte = pte_mkexec(*pte); 64 *pte = pte_mkexec(*pte);
@@ -77,23 +80,14 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm)
77 struct mm_struct *cur_mm = current->mm; 80 struct mm_struct *cur_mm = current->mm;
78 struct mm_id *cur_mm_id = &cur_mm->context.skas.id; 81 struct mm_id *cur_mm_id = &cur_mm->context.skas.id;
79 struct mm_id *mm_id = &mm->context.skas.id; 82 struct mm_id *mm_id = &mm->context.skas.id;
80 unsigned long stack; 83 unsigned long stack = 0;
81 int from, ret; 84 int from, ret = -ENOMEM;
82 85
83 if(proc_mm){ 86 if(!proc_mm || !ptrace_faultinfo){
84 if((cur_mm != NULL) && (cur_mm != &init_mm)) 87 stack = get_zeroed_page(GFP_KERNEL);
85 from = cur_mm->context.skas.id.u.mm_fd; 88 if(stack == 0)
86 else from = -1; 89 goto out;
87 90
88 ret = new_mm(from);
89 if(ret < 0){
90 printk("init_new_context_skas - new_mm failed, "
91 "errno = %d\n", ret);
92 return ret;
93 }
94 mm_id->u.mm_fd = ret;
95 }
96 else {
97 /* This zeros the entry that pgd_alloc didn't, needed since 91 /* This zeros the entry that pgd_alloc didn't, needed since
98 * we are about to reinitialize it, and want mm.nr_ptes to 92 * we are about to reinitialize it, and want mm.nr_ptes to
99 * be accurate. 93 * be accurate.
@@ -103,20 +97,30 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm)
103 ret = init_stub_pte(mm, CONFIG_STUB_CODE, 97 ret = init_stub_pte(mm, CONFIG_STUB_CODE,
104 (unsigned long) &__syscall_stub_start); 98 (unsigned long) &__syscall_stub_start);
105 if(ret) 99 if(ret)
106 goto out; 100 goto out_free;
107
108 ret = -ENOMEM;
109 stack = get_zeroed_page(GFP_KERNEL);
110 if(stack == 0)
111 goto out;
112 mm_id->stack = stack;
113 101
114 ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack); 102 ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack);
115 if(ret) 103 if(ret)
116 goto out_free; 104 goto out_free;
117 105
118 mm->nr_ptes--; 106 mm->nr_ptes--;
107 }
108 mm_id->stack = stack;
119 109
110 if(proc_mm){
111 if((cur_mm != NULL) && (cur_mm != &init_mm))
112 from = cur_mm_id->u.mm_fd;
113 else from = -1;
114
115 ret = new_mm(from, stack);
116 if(ret < 0){
117 printk("init_new_context_skas - new_mm failed, "
118 "errno = %d\n", ret);
119 goto out_free;
120 }
121 mm_id->u.mm_fd = ret;
122 }
123 else {
120 if((cur_mm != NULL) && (cur_mm != &init_mm)) 124 if((cur_mm != NULL) && (cur_mm != &init_mm))
121 mm_id->u.pid = copy_context_skas0(stack, 125 mm_id->u.pid = copy_context_skas0(stack,
122 cur_mm_id->u.pid); 126 cur_mm_id->u.pid);
@@ -126,7 +130,8 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm)
126 return 0; 130 return 0;
127 131
128 out_free: 132 out_free:
129 free_page(mm_id->stack); 133 if(mm_id->stack != 0)
134 free_page(mm_id->stack);
130 out: 135 out:
131 return ret; 136 return ret;
132} 137}
@@ -137,9 +142,15 @@ void destroy_context_skas(struct mm_struct *mm)
137 142
138 if(proc_mm) 143 if(proc_mm)
139 os_close_file(mmu->id.u.mm_fd); 144 os_close_file(mmu->id.u.mm_fd);
140 else { 145 else
141 os_kill_ptraced_process(mmu->id.u.pid, 1); 146 os_kill_ptraced_process(mmu->id.u.pid, 1);
147
148 if(!proc_mm || !ptrace_faultinfo){
142 free_page(mmu->id.stack); 149 free_page(mmu->id.stack);
143 free_page(mmu->last_page_table); 150 pte_free_kernel((pte_t *) mmu->last_page_table);
151 dec_page_state(nr_page_table_pages);
152#ifdef CONFIG_3_LEVEL_PGTABLES
153 pmd_free((pmd_t *) mmu->last_pmd);
154#endif
144 } 155 }
145} 156}
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index f228f8b54194..5cd0e9929789 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -138,6 +138,8 @@ static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu
138} 138}
139 139
140extern int __syscall_stub_start; 140extern int __syscall_stub_start;
141int stub_code_fd = -1;
142__u64 stub_code_offset;
141 143
142static int userspace_tramp(void *stack) 144static int userspace_tramp(void *stack)
143{ 145{
@@ -152,31 +154,31 @@ static int userspace_tramp(void *stack)
152 /* This has a pte, but it can't be mapped in with the usual 154 /* This has a pte, but it can't be mapped in with the usual
153 * tlb_flush mechanism because this is part of that mechanism 155 * tlb_flush mechanism because this is part of that mechanism
154 */ 156 */
155 int fd;
156 __u64 offset;
157
158 fd = phys_mapping(to_phys(&__syscall_stub_start), &offset);
159 addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(), 157 addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(),
160 PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); 158 PROT_EXEC, MAP_FIXED | MAP_PRIVATE,
159 stub_code_fd, stub_code_offset);
161 if(addr == MAP_FAILED){ 160 if(addr == MAP_FAILED){
162 printk("mapping mmap stub failed, errno = %d\n", 161 printk("mapping stub code failed, errno = %d\n",
163 errno); 162 errno);
164 exit(1); 163 exit(1);
165 } 164 }
166 165
167 if(stack != NULL){ 166 if(stack != NULL){
167 int fd;
168 __u64 offset;
169
168 fd = phys_mapping(to_phys(stack), &offset); 170 fd = phys_mapping(to_phys(stack), &offset);
169 addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(), 171 addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(),
170 PROT_READ | PROT_WRITE, 172 PROT_READ | PROT_WRITE,
171 MAP_FIXED | MAP_SHARED, fd, offset); 173 MAP_FIXED | MAP_SHARED, fd, offset);
172 if(addr == MAP_FAILED){ 174 if(addr == MAP_FAILED){
173 printk("mapping segfault stack failed, " 175 printk("mapping stub stack failed, "
174 "errno = %d\n", errno); 176 "errno = %d\n", errno);
175 exit(1); 177 exit(1);
176 } 178 }
177 } 179 }
178 } 180 }
179 if(!ptrace_faultinfo && (stack != NULL)){ 181 if(!ptrace_faultinfo){
180 unsigned long v = UML_CONFIG_STUB_CODE + 182 unsigned long v = UML_CONFIG_STUB_CODE +
181 (unsigned long) stub_segv_handler - 183 (unsigned long) stub_segv_handler -
182 (unsigned long) &__syscall_stub_start; 184 (unsigned long) &__syscall_stub_start;
@@ -202,6 +204,10 @@ int start_userspace(unsigned long stub_stack)
202 unsigned long sp; 204 unsigned long sp;
203 int pid, status, n, flags; 205 int pid, status, n, flags;
204 206
207 if ( stub_code_fd == -1 )
208 stub_code_fd = phys_mapping(to_phys(&__syscall_stub_start),
209 &stub_code_offset);
210
205 stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, 211 stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
206 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 212 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
207 if(stack == MAP_FAILED) 213 if(stack == MAP_FAILED)
@@ -363,6 +369,53 @@ int copy_context_skas0(unsigned long new_stack, int pid)
363 return pid; 369 return pid;
364} 370}
365 371
372/*
373 * This is used only, if proc_mm is available, while PTRACE_FAULTINFO
374 * isn't. Opening /proc/mm creates a new mm_context, which lacks the stub-pages
375 * Thus, we map them using /proc/mm-fd
376 */
377void map_stub_pages(int fd, unsigned long code,
378 unsigned long data, unsigned long stack)
379{
380 struct proc_mm_op mmop;
381 int n;
382
383 mmop = ((struct proc_mm_op) { .op = MM_MMAP,
384 .u =
385 { .mmap =
386 { .addr = code,
387 .len = PAGE_SIZE,
388 .prot = PROT_EXEC,
389 .flags = MAP_FIXED | MAP_PRIVATE,
390 .fd = stub_code_fd,
391 .offset = stub_code_offset
392 } } });
393 n = os_write_file(fd, &mmop, sizeof(mmop));
394 if(n != sizeof(mmop))
395 panic("map_stub_pages : /proc/mm map for code failed, "
396 "err = %d\n", -n);
397
398 if ( stack ) {
399 __u64 map_offset;
400 int map_fd = phys_mapping(to_phys((void *)stack), &map_offset);
401 mmop = ((struct proc_mm_op)
402 { .op = MM_MMAP,
403 .u =
404 { .mmap =
405 { .addr = data,
406 .len = PAGE_SIZE,
407 .prot = PROT_READ | PROT_WRITE,
408 .flags = MAP_FIXED | MAP_SHARED,
409 .fd = map_fd,
410 .offset = map_offset
411 } } });
412 n = os_write_file(fd, &mmop, sizeof(mmop));
413 if(n != sizeof(mmop))
414 panic("map_stub_pages : /proc/mm map for data failed, "
415 "err = %d\n", -n);
416 }
417}
418
366void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, 419void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr,
367 void (*handler)(int)) 420 void (*handler)(int))
368{ 421{
diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c
index cbabab104ac3..3d1b227226e6 100644
--- a/arch/um/kernel/skas/process_kern.c
+++ b/arch/um/kernel/skas/process_kern.c
@@ -129,7 +129,9 @@ int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp,
129 return(0); 129 return(0);
130} 130}
131 131
132int new_mm(int from) 132extern void map_stub_pages(int fd, unsigned long code,
133 unsigned long data, unsigned long stack);
134int new_mm(int from, unsigned long stack)
133{ 135{
134 struct proc_mm_op copy; 136 struct proc_mm_op copy;
135 int n, fd; 137 int n, fd;
@@ -148,6 +150,9 @@ int new_mm(int from)
148 "err = %d\n", -n); 150 "err = %d\n", -n);
149 } 151 }
150 152
153 if(!ptrace_faultinfo)
154 map_stub_pages(fd, CONFIG_STUB_CODE, CONFIG_STUB_DATA, stack);
155
151 return(fd); 156 return(fd);
152} 157}
153 158
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
new file mode 100644
index 000000000000..51fb94076fcf
--- /dev/null
+++ b/arch/um/kernel/skas/syscall.c
@@ -0,0 +1,50 @@
1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#include "linux/sys.h"
7#include "linux/ptrace.h"
8#include "asm/errno.h"
9#include "asm/unistd.h"
10#include "asm/ptrace.h"
11#include "asm/current.h"
12#include "sysdep/syscalls.h"
13#include "kern_util.h"
14#include "syscall.h"
15
16void handle_syscall(union uml_pt_regs *r)
17{
18 struct pt_regs *regs = container_of(r, struct pt_regs, regs);
19 long result;
20 int syscall;
21#ifdef UML_CONFIG_SYSCALL_DEBUG
22 int index;
23
24 index = record_syscall_start(UPT_SYSCALL_NR(r));
25#endif
26 syscall_trace(r, 0);
27
28 current->thread.nsyscalls++;
29 nsyscalls++;
30
31 /* This should go in the declaration of syscall, but when I do that,
32 * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing
33 * children at all, sometimes hanging when bash doesn't see the first
34 * ls exit.
35 * The assembly looks functionally the same to me. This is
36 * gcc version 4.0.1 20050727 (Red Hat 4.0.1-5)
37 * in case it's a compiler bug.
38 */
39 syscall = UPT_SYSCALL_NR(r);
40 if((syscall >= NR_syscalls) || (syscall < 0))
41 result = -ENOSYS;
42 else result = EXECUTE_SYSCALL(syscall, regs);
43
44 REGS_SET_SYSCALL_RETURN(r->skas.regs, result);
45
46 syscall_trace(r, 1);
47#ifdef UML_CONFIG_SYSCALL_DEBUG
48 record_syscall_end(index, result);
49#endif
50}
diff --git a/arch/um/kernel/skas/syscall_kern.c b/arch/um/kernel/skas/syscall_kern.c
deleted file mode 100644
index bdf040ce5b8e..000000000000
--- a/arch/um/kernel/skas/syscall_kern.c
+++ /dev/null
@@ -1,43 +0,0 @@
1/*
2 * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include "linux/sys.h"
7#include "linux/ptrace.h"
8#include "asm/errno.h"
9#include "asm/unistd.h"
10#include "asm/ptrace.h"
11#include "asm/current.h"
12#include "sysdep/syscalls.h"
13#include "kern_util.h"
14
15extern syscall_handler_t *sys_call_table[];
16
17long execute_syscall_skas(void *r)
18{
19 struct pt_regs *regs = r;
20 long res;
21 int syscall;
22
23 current->thread.nsyscalls++;
24 nsyscalls++;
25 syscall = UPT_SYSCALL_NR(&regs->regs);
26
27 if((syscall >= NR_syscalls) || (syscall < 0))
28 res = -ENOSYS;
29 else res = EXECUTE_SYSCALL(syscall, regs);
30
31 return(res);
32}
33
34/*
35 * Overrides for Emacs so that we follow Linus's tabbing style.
36 * Emacs will notice this stuff at the end of the file and automatically
37 * adjust the settings for this buffer only. This must remain at the end
38 * of the file.
39 * ---------------------------------------------------------------------------
40 * Local variables:
41 * c-file-style: "linux"
42 * End:
43 */
diff --git a/arch/um/kernel/skas/syscall_user.c b/arch/um/kernel/skas/syscall_user.c
deleted file mode 100644
index 6b0664970147..000000000000
--- a/arch/um/kernel/skas/syscall_user.c
+++ /dev/null
@@ -1,44 +0,0 @@
1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <signal.h>
8#include "kern_util.h"
9#include "uml-config.h"
10#include "syscall_user.h"
11#include "sysdep/ptrace.h"
12#include "sysdep/sigcontext.h"
13#include "skas.h"
14
15void handle_syscall(union uml_pt_regs *regs)
16{
17 long result;
18#ifdef UML_CONFIG_SYSCALL_DEBUG
19 int index;
20
21 index = record_syscall_start(UPT_SYSCALL_NR(regs));
22#endif
23
24 syscall_trace(regs, 0);
25 result = execute_syscall_skas(regs);
26
27 REGS_SET_SYSCALL_RETURN(regs->skas.regs, result);
28
29 syscall_trace(regs, 1);
30#ifdef UML_CONFIG_SYSCALL_DEBUG
31 record_syscall_end(index, result);
32#endif
33}
34
35/*
36 * Overrides for Emacs so that we follow Linus's tabbing style.
37 * Emacs will notice this stuff at the end of the file and automatically
38 * adjust the settings for this buffer only. This must remain at the end
39 * of the file.
40 * ---------------------------------------------------------------------------
41 * Local variables:
42 * c-file-style: "linux"
43 * End:
44 */
diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c
index 6230999c672c..6e84963dfc29 100644
--- a/arch/um/kernel/skas/tlb.c
+++ b/arch/um/kernel/skas/tlb.c
@@ -18,33 +18,39 @@
18#include "os.h" 18#include "os.h"
19#include "tlb.h" 19#include "tlb.h"
20 20
21static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) 21static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last,
22 int finished, void **flush)
22{ 23{
23 struct host_vm_op *op; 24 struct host_vm_op *op;
24 int i; 25 int i, ret = 0;
25 26
26 for(i = 0; i <= last; i++){ 27 for(i = 0; i <= last && !ret; i++){
27 op = &ops[i]; 28 op = &ops[i];
28 switch(op->type){ 29 switch(op->type){
29 case MMAP: 30 case MMAP:
30 map(&mmu->skas.id, op->u.mmap.addr, op->u.mmap.len, 31 ret = map(&mmu->skas.id, op->u.mmap.addr,
31 op->u.mmap.r, op->u.mmap.w, op->u.mmap.x, 32 op->u.mmap.len, op->u.mmap.r, op->u.mmap.w,
32 op->u.mmap.fd, op->u.mmap.offset); 33 op->u.mmap.x, op->u.mmap.fd,
34 op->u.mmap.offset, finished, flush);
33 break; 35 break;
34 case MUNMAP: 36 case MUNMAP:
35 unmap(&mmu->skas.id, (void *) op->u.munmap.addr, 37 ret = unmap(&mmu->skas.id,
36 op->u.munmap.len); 38 (void *) op->u.munmap.addr,
39 op->u.munmap.len, finished, flush);
37 break; 40 break;
38 case MPROTECT: 41 case MPROTECT:
39 protect(&mmu->skas.id, op->u.mprotect.addr, 42 ret = protect(&mmu->skas.id, op->u.mprotect.addr,
40 op->u.mprotect.len, op->u.mprotect.r, 43 op->u.mprotect.len, op->u.mprotect.r,
41 op->u.mprotect.w, op->u.mprotect.x); 44 op->u.mprotect.w, op->u.mprotect.x,
45 finished, flush);
42 break; 46 break;
43 default: 47 default:
44 printk("Unknown op type %d in do_ops\n", op->type); 48 printk("Unknown op type %d in do_ops\n", op->type);
45 break; 49 break;
46 } 50 }
47 } 51 }
52
53 return ret;
48} 54}
49 55
50extern int proc_mm; 56extern int proc_mm;
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
new file mode 100644
index 000000000000..1429c131879d
--- /dev/null
+++ b/arch/um/kernel/syscall.c
@@ -0,0 +1,36 @@
1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#include "kern_util.h"
7#include "syscall.h"
8#include "os.h"
9
10struct {
11 int syscall;
12 int pid;
13 long result;
14 unsigned long long start;
15 unsigned long long end;
16} syscall_record[1024];
17
18int record_syscall_start(int syscall)
19{
20 int max, index;
21
22 max = sizeof(syscall_record)/sizeof(syscall_record[0]);
23 index = next_syscall_index(max);
24
25 syscall_record[index].syscall = syscall;
26 syscall_record[index].pid = current_pid();
27 syscall_record[index].result = 0xdeadbeef;
28 syscall_record[index].start = os_usecs();
29 return(index);
30}
31
32void record_syscall_end(int index, long result)
33{
34 syscall_record[index].result = result;
35 syscall_record[index].end = os_usecs();
36}
diff --git a/arch/um/kernel/syscall_user.c b/arch/um/kernel/syscall_user.c
deleted file mode 100644
index 01b711e00a85..000000000000
--- a/arch/um/kernel/syscall_user.c
+++ /dev/null
@@ -1,48 +0,0 @@
1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <sys/time.h>
8#include "kern_util.h"
9#include "syscall_user.h"
10
11struct {
12 int syscall;
13 int pid;
14 long result;
15 struct timeval start;
16 struct timeval end;
17} syscall_record[1024];
18
19int record_syscall_start(int syscall)
20{
21 int max, index;
22
23 max = sizeof(syscall_record)/sizeof(syscall_record[0]);
24 index = next_syscall_index(max);
25
26 syscall_record[index].syscall = syscall;
27 syscall_record[index].pid = current_pid();
28 syscall_record[index].result = 0xdeadbeef;
29 gettimeofday(&syscall_record[index].start, NULL);
30 return(index);
31}
32
33void record_syscall_end(int index, long result)
34{
35 syscall_record[index].result = result;
36 gettimeofday(&syscall_record[index].end, NULL);
37}
38
39/*
40 * Overrides for Emacs so that we follow Linus's tabbing style.
41 * Emacs will notice this stuff at the end of the file and automatically
42 * adjust the settings for this buffer only. This must remain at the end
43 * of the file.
44 * ---------------------------------------------------------------------------
45 * Local variables:
46 * c-file-style: "linux"
47 * End:
48 */
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 83ec8d4747fd..80ed6188e8a2 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -15,12 +15,118 @@
15#include "mem_user.h" 15#include "mem_user.h"
16#include "os.h" 16#include "os.h"
17 17
18static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
19 int r, int w, int x, struct host_vm_op *ops, int *index,
20 int last_filled, union mm_context *mmu, void **flush,
21 int (*do_ops)(union mm_context *, struct host_vm_op *,
22 int, int, void **))
23{
24 __u64 offset;
25 struct host_vm_op *last;
26 int fd, ret = 0;
27
28 fd = phys_mapping(phys, &offset);
29 if(*index != -1){
30 last = &ops[*index];
31 if((last->type == MMAP) &&
32 (last->u.mmap.addr + last->u.mmap.len == virt) &&
33 (last->u.mmap.r == r) && (last->u.mmap.w == w) &&
34 (last->u.mmap.x == x) && (last->u.mmap.fd == fd) &&
35 (last->u.mmap.offset + last->u.mmap.len == offset)){
36 last->u.mmap.len += len;
37 return 0;
38 }
39 }
40
41 if(*index == last_filled){
42 ret = (*do_ops)(mmu, ops, last_filled, 0, flush);
43 *index = -1;
44 }
45
46 ops[++*index] = ((struct host_vm_op) { .type = MMAP,
47 .u = { .mmap = {
48 .addr = virt,
49 .len = len,
50 .r = r,
51 .w = w,
52 .x = x,
53 .fd = fd,
54 .offset = offset }
55 } });
56 return ret;
57}
58
59static int add_munmap(unsigned long addr, unsigned long len,
60 struct host_vm_op *ops, int *index, int last_filled,
61 union mm_context *mmu, void **flush,
62 int (*do_ops)(union mm_context *, struct host_vm_op *,
63 int, int, void **))
64{
65 struct host_vm_op *last;
66 int ret = 0;
67
68 if(*index != -1){
69 last = &ops[*index];
70 if((last->type == MUNMAP) &&
71 (last->u.munmap.addr + last->u.mmap.len == addr)){
72 last->u.munmap.len += len;
73 return 0;
74 }
75 }
76
77 if(*index == last_filled){
78 ret = (*do_ops)(mmu, ops, last_filled, 0, flush);
79 *index = -1;
80 }
81
82 ops[++*index] = ((struct host_vm_op) { .type = MUNMAP,
83 .u = { .munmap = {
84 .addr = addr,
85 .len = len } } });
86 return ret;
87}
88
89static int add_mprotect(unsigned long addr, unsigned long len, int r, int w,
90 int x, struct host_vm_op *ops, int *index,
91 int last_filled, union mm_context *mmu, void **flush,
92 int (*do_ops)(union mm_context *, struct host_vm_op *,
93 int, int, void **))
94{
95 struct host_vm_op *last;
96 int ret = 0;
97
98 if(*index != -1){
99 last = &ops[*index];
100 if((last->type == MPROTECT) &&
101 (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
102 (last->u.mprotect.r == r) && (last->u.mprotect.w == w) &&
103 (last->u.mprotect.x == x)){
104 last->u.mprotect.len += len;
105 return 0;
106 }
107 }
108
109 if(*index == last_filled){
110 ret = (*do_ops)(mmu, ops, last_filled, 0, flush);
111 *index = -1;
112 }
113
114 ops[++*index] = ((struct host_vm_op) { .type = MPROTECT,
115 .u = { .mprotect = {
116 .addr = addr,
117 .len = len,
118 .r = r,
119 .w = w,
120 .x = x } } });
121 return ret;
122}
123
18#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) 124#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
19 125
20void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 126void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
21 unsigned long end_addr, int force, 127 unsigned long end_addr, int force,
22 void (*do_ops)(union mm_context *, struct host_vm_op *, 128 int (*do_ops)(union mm_context *, struct host_vm_op *,
23 int)) 129 int, int, void **))
24{ 130{
25 pgd_t *npgd; 131 pgd_t *npgd;
26 pud_t *npud; 132 pud_t *npud;
@@ -29,21 +135,24 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
29 union mm_context *mmu = &mm->context; 135 union mm_context *mmu = &mm->context;
30 unsigned long addr, end; 136 unsigned long addr, end;
31 int r, w, x; 137 int r, w, x;
32 struct host_vm_op ops[16]; 138 struct host_vm_op ops[1];
139 void *flush = NULL;
33 int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1; 140 int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1;
141 int ret = 0;
34 142
35 if(mm == NULL) return; 143 if(mm == NULL) return;
36 144
37 for(addr = start_addr; addr < end_addr;){ 145 ops[0].type = NONE;
146 for(addr = start_addr; addr < end_addr && !ret;){
38 npgd = pgd_offset(mm, addr); 147 npgd = pgd_offset(mm, addr);
39 if(!pgd_present(*npgd)){ 148 if(!pgd_present(*npgd)){
40 end = ADD_ROUND(addr, PGDIR_SIZE); 149 end = ADD_ROUND(addr, PGDIR_SIZE);
41 if(end > end_addr) 150 if(end > end_addr)
42 end = end_addr; 151 end = end_addr;
43 if(force || pgd_newpage(*npgd)){ 152 if(force || pgd_newpage(*npgd)){
44 op_index = add_munmap(addr, end - addr, ops, 153 ret = add_munmap(addr, end - addr, ops,
45 op_index, last_op, mmu, 154 &op_index, last_op, mmu,
46 do_ops); 155 &flush, do_ops);
47 pgd_mkuptodate(*npgd); 156 pgd_mkuptodate(*npgd);
48 } 157 }
49 addr = end; 158 addr = end;
@@ -56,9 +165,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
56 if(end > end_addr) 165 if(end > end_addr)
57 end = end_addr; 166 end = end_addr;
58 if(force || pud_newpage(*npud)){ 167 if(force || pud_newpage(*npud)){
59 op_index = add_munmap(addr, end - addr, ops, 168 ret = add_munmap(addr, end - addr, ops,
60 op_index, last_op, mmu, 169 &op_index, last_op, mmu,
61 do_ops); 170 &flush, do_ops);
62 pud_mkuptodate(*npud); 171 pud_mkuptodate(*npud);
63 } 172 }
64 addr = end; 173 addr = end;
@@ -71,9 +180,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
71 if(end > end_addr) 180 if(end > end_addr)
72 end = end_addr; 181 end = end_addr;
73 if(force || pmd_newpage(*npmd)){ 182 if(force || pmd_newpage(*npmd)){
74 op_index = add_munmap(addr, end - addr, ops, 183 ret = add_munmap(addr, end - addr, ops,
75 op_index, last_op, mmu, 184 &op_index, last_op, mmu,
76 do_ops); 185 &flush, do_ops);
77 pmd_mkuptodate(*npmd); 186 pmd_mkuptodate(*npmd);
78 } 187 }
79 addr = end; 188 addr = end;
@@ -92,24 +201,32 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
92 } 201 }
93 if(force || pte_newpage(*npte)){ 202 if(force || pte_newpage(*npte)){
94 if(pte_present(*npte)) 203 if(pte_present(*npte))
95 op_index = add_mmap(addr, 204 ret = add_mmap(addr,
96 pte_val(*npte) & PAGE_MASK, 205 pte_val(*npte) & PAGE_MASK,
97 PAGE_SIZE, r, w, x, ops, 206 PAGE_SIZE, r, w, x, ops,
98 op_index, last_op, mmu, 207 &op_index, last_op, mmu,
99 do_ops); 208 &flush, do_ops);
100 else op_index = add_munmap(addr, PAGE_SIZE, ops, 209 else ret = add_munmap(addr, PAGE_SIZE, ops,
101 op_index, last_op, mmu, 210 &op_index, last_op, mmu,
102 do_ops); 211 &flush, do_ops);
103 } 212 }
104 else if(pte_newprot(*npte)) 213 else if(pte_newprot(*npte))
105 op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, 214 ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops,
106 op_index, last_op, mmu, 215 &op_index, last_op, mmu,
107 do_ops); 216 &flush, do_ops);
108 217
109 *npte = pte_mkuptodate(*npte); 218 *npte = pte_mkuptodate(*npte);
110 addr += PAGE_SIZE; 219 addr += PAGE_SIZE;
111 } 220 }
112 (*do_ops)(mmu, ops, op_index); 221
222 if(!ret)
223 ret = (*do_ops)(mmu, ops, op_index, 1, &flush);
224
225 /* This is not an else because ret is modified above */
226 if(ret) {
227 printk("fix_range_common: failed, killing current process\n");
228 force_sig(SIGKILL, current);
229 }
113} 230}
114 231
115int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 232int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
@@ -226,106 +343,6 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr)
226 return(pte_offset_map(pmd, addr)); 343 return(pte_offset_map(pmd, addr));
227} 344}
228 345
229int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
230 int r, int w, int x, struct host_vm_op *ops, int index,
231 int last_filled, union mm_context *mmu,
232 void (*do_ops)(union mm_context *, struct host_vm_op *, int))
233{
234 __u64 offset;
235 struct host_vm_op *last;
236 int fd;
237
238 fd = phys_mapping(phys, &offset);
239 if(index != -1){
240 last = &ops[index];
241 if((last->type == MMAP) &&
242 (last->u.mmap.addr + last->u.mmap.len == virt) &&
243 (last->u.mmap.r == r) && (last->u.mmap.w == w) &&
244 (last->u.mmap.x == x) && (last->u.mmap.fd == fd) &&
245 (last->u.mmap.offset + last->u.mmap.len == offset)){
246 last->u.mmap.len += len;
247 return(index);
248 }
249 }
250
251 if(index == last_filled){
252 (*do_ops)(mmu, ops, last_filled);
253 index = -1;
254 }
255
256 ops[++index] = ((struct host_vm_op) { .type = MMAP,
257 .u = { .mmap = {
258 .addr = virt,
259 .len = len,
260 .r = r,
261 .w = w,
262 .x = x,
263 .fd = fd,
264 .offset = offset }
265 } });
266 return(index);
267}
268
269int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops,
270 int index, int last_filled, union mm_context *mmu,
271 void (*do_ops)(union mm_context *, struct host_vm_op *, int))
272{
273 struct host_vm_op *last;
274
275 if(index != -1){
276 last = &ops[index];
277 if((last->type == MUNMAP) &&
278 (last->u.munmap.addr + last->u.mmap.len == addr)){
279 last->u.munmap.len += len;
280 return(index);
281 }
282 }
283
284 if(index == last_filled){
285 (*do_ops)(mmu, ops, last_filled);
286 index = -1;
287 }
288
289 ops[++index] = ((struct host_vm_op) { .type = MUNMAP,
290 .u = { .munmap = {
291 .addr = addr,
292 .len = len } } });
293 return(index);
294}
295
296int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x,
297 struct host_vm_op *ops, int index, int last_filled,
298 union mm_context *mmu,
299 void (*do_ops)(union mm_context *, struct host_vm_op *, int))
300{
301 struct host_vm_op *last;
302
303 if(index != -1){
304 last = &ops[index];
305 if((last->type == MPROTECT) &&
306 (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
307 (last->u.mprotect.r == r) && (last->u.mprotect.w == w) &&
308 (last->u.mprotect.x == x)){
309 last->u.mprotect.len += len;
310 return(index);
311 }
312 }
313
314 if(index == last_filled){
315 (*do_ops)(mmu, ops, last_filled);
316 index = -1;
317 }
318
319 ops[++index] = ((struct host_vm_op) { .type = MPROTECT,
320 .u = { .mprotect = {
321 .addr = addr,
322 .len = len,
323 .r = r,
324 .w = w,
325 .x = x } } });
326 return(index);
327}
328
329void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) 346void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
330{ 347{
331 address &= PAGE_MASK; 348 address &= PAGE_MASK;
diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c
index c20aef120598..b5fc89fe9eab 100644
--- a/arch/um/kernel/trap_kern.c
+++ b/arch/um/kernel/trap_kern.c
@@ -26,6 +26,7 @@
26#include "mem.h" 26#include "mem.h"
27#include "mem_kern.h" 27#include "mem_kern.h"
28 28
29/* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */
29int handle_page_fault(unsigned long address, unsigned long ip, 30int handle_page_fault(unsigned long address, unsigned long ip,
30 int is_write, int is_user, int *code_out) 31 int is_write, int is_user, int *code_out)
31{ 32{
@@ -35,7 +36,6 @@ int handle_page_fault(unsigned long address, unsigned long ip,
35 pud_t *pud; 36 pud_t *pud;
36 pmd_t *pmd; 37 pmd_t *pmd;
37 pte_t *pte; 38 pte_t *pte;
38 unsigned long page;
39 int err = -EFAULT; 39 int err = -EFAULT;
40 40
41 *code_out = SEGV_MAPERR; 41 *code_out = SEGV_MAPERR;
@@ -52,7 +52,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
52 else if(expand_stack(vma, address)) 52 else if(expand_stack(vma, address))
53 goto out; 53 goto out;
54 54
55 good_area: 55good_area:
56 *code_out = SEGV_ACCERR; 56 *code_out = SEGV_ACCERR;
57 if(is_write && !(vma->vm_flags & VM_WRITE)) 57 if(is_write && !(vma->vm_flags & VM_WRITE))
58 goto out; 58 goto out;
@@ -60,9 +60,8 @@ int handle_page_fault(unsigned long address, unsigned long ip,
60 if(!(vma->vm_flags & (VM_READ | VM_EXEC))) 60 if(!(vma->vm_flags & (VM_READ | VM_EXEC)))
61 goto out; 61 goto out;
62 62
63 page = address & PAGE_MASK;
64 do { 63 do {
65 survive: 64survive:
66 switch (handle_mm_fault(mm, vma, address, is_write)){ 65 switch (handle_mm_fault(mm, vma, address, is_write)){
67 case VM_FAULT_MINOR: 66 case VM_FAULT_MINOR:
68 current->min_flt++; 67 current->min_flt++;
@@ -79,16 +78,16 @@ int handle_page_fault(unsigned long address, unsigned long ip,
79 default: 78 default:
80 BUG(); 79 BUG();
81 } 80 }
82 pgd = pgd_offset(mm, page); 81 pgd = pgd_offset(mm, address);
83 pud = pud_offset(pgd, page); 82 pud = pud_offset(pgd, address);
84 pmd = pmd_offset(pud, page); 83 pmd = pmd_offset(pud, address);
85 pte = pte_offset_kernel(pmd, page); 84 pte = pte_offset_kernel(pmd, address);
86 } while(!pte_present(*pte)); 85 } while(!pte_present(*pte));
87 err = 0; 86 err = 0;
88 *pte = pte_mkyoung(*pte); 87 *pte = pte_mkyoung(*pte);
89 if(pte_write(*pte)) *pte = pte_mkdirty(*pte); 88 if(pte_write(*pte)) *pte = pte_mkdirty(*pte);
90 flush_tlb_page(vma, page); 89 flush_tlb_page(vma, address);
91 out: 90out:
92 up_read(&mm->mmap_sem); 91 up_read(&mm->mmap_sem);
93 return(err); 92 return(err);
94 93
@@ -144,19 +143,18 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc)
144 panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", 143 panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
145 address, ip); 144 address, ip);
146 145
147 if(err == -EACCES){ 146 if (err == -EACCES) {
148 si.si_signo = SIGBUS; 147 si.si_signo = SIGBUS;
149 si.si_errno = 0; 148 si.si_errno = 0;
150 si.si_code = BUS_ADRERR; 149 si.si_code = BUS_ADRERR;
151 si.si_addr = (void *)address; 150 si.si_addr = (void *)address;
152 current->thread.arch.faultinfo = fi; 151 current->thread.arch.faultinfo = fi;
153 force_sig_info(SIGBUS, &si, current); 152 force_sig_info(SIGBUS, &si, current);
154 } 153 } else if (err == -ENOMEM) {
155 else if(err == -ENOMEM){
156 printk("VM: killing process %s\n", current->comm); 154 printk("VM: killing process %s\n", current->comm);
157 do_exit(SIGKILL); 155 do_exit(SIGKILL);
158 } 156 } else {
159 else { 157 BUG_ON(err != -EFAULT);
160 si.si_signo = SIGSEGV; 158 si.si_signo = SIGSEGV;
161 si.si_addr = (void *) address; 159 si.si_addr = (void *) address;
162 current->thread.arch.faultinfo = fi; 160 current->thread.arch.faultinfo = fi;
@@ -200,30 +198,3 @@ void winch(int sig, union uml_pt_regs *regs)
200void trap_init(void) 198void trap_init(void)
201{ 199{
202} 200}
203
204DEFINE_SPINLOCK(trap_lock);
205
206static int trap_index = 0;
207
208int next_trap_index(int limit)
209{
210 int ret;
211
212 spin_lock(&trap_lock);
213 ret = trap_index;
214 if(++trap_index == limit)
215 trap_index = 0;
216 spin_unlock(&trap_lock);
217 return(ret);
218}
219
220/*
221 * Overrides for Emacs so that we follow Linus's tabbing style.
222 * Emacs will notice this stuff at the end of the file and automatically
223 * adjust the settings for this buffer only. This must remain at the end
224 * of the file.
225 * ---------------------------------------------------------------------------
226 * Local variables:
227 * c-file-style: "linux"
228 * End:
229 */
diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c
index f825a6eda3f5..e9ccd6b8d3c7 100644
--- a/arch/um/kernel/trap_user.c
+++ b/arch/um/kernel/trap_user.c
@@ -40,35 +40,14 @@ void kill_child_dead(int pid)
40 } while(1); 40 } while(1);
41} 41}
42 42
43/* Unlocked - don't care if this is a bit off */
44int nsegfaults = 0;
45
46struct {
47 unsigned long address;
48 int is_write;
49 int pid;
50 unsigned long sp;
51 int is_user;
52} segfault_record[1024];
53
54void segv_handler(int sig, union uml_pt_regs *regs) 43void segv_handler(int sig, union uml_pt_regs *regs)
55{ 44{
56 int index, max;
57 struct faultinfo * fi = UPT_FAULTINFO(regs); 45 struct faultinfo * fi = UPT_FAULTINFO(regs);
58 46
59 if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){ 47 if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){
60 bad_segv(*fi, UPT_IP(regs)); 48 bad_segv(*fi, UPT_IP(regs));
61 return; 49 return;
62 } 50 }
63 max = sizeof(segfault_record)/sizeof(segfault_record[0]);
64 index = next_trap_index(max);
65
66 nsegfaults++;
67 segfault_record[index].address = FAULT_ADDRESS(*fi);
68 segfault_record[index].pid = os_getpid();
69 segfault_record[index].is_write = FAULT_WRITE(*fi);
70 segfault_record[index].sp = UPT_SP(regs);
71 segfault_record[index].is_user = UPT_IS_USER(regs);
72 segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); 51 segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
73} 52}
74 53
diff --git a/arch/um/kernel/tt/syscall_kern.c b/arch/um/kernel/tt/syscall_kern.c
index 2650a628719e..3d29c90514cc 100644
--- a/arch/um/kernel/tt/syscall_kern.c
+++ b/arch/um/kernel/tt/syscall_kern.c
@@ -12,36 +12,41 @@
12#include "asm/uaccess.h" 12#include "asm/uaccess.h"
13#include "asm/stat.h" 13#include "asm/stat.h"
14#include "sysdep/syscalls.h" 14#include "sysdep/syscalls.h"
15#include "sysdep/sigcontext.h"
15#include "kern_util.h" 16#include "kern_util.h"
17#include "syscall.h"
16 18
17extern syscall_handler_t *sys_call_table[]; 19void syscall_handler_tt(int sig, struct pt_regs *regs)
18
19long execute_syscall_tt(void *r)
20{ 20{
21 struct pt_regs *regs = r; 21 void *sc;
22 long res; 22 long result;
23 int syscall; 23 int syscall;
24
25#ifdef CONFIG_SYSCALL_DEBUG 24#ifdef CONFIG_SYSCALL_DEBUG
25 int index;
26 index = record_syscall_start(syscall);
27#endif
28 sc = UPT_SC(&regs->regs);
29 SC_START_SYSCALL(sc);
30
31 syscall_trace(&regs->regs, 0);
32
26 current->thread.nsyscalls++; 33 current->thread.nsyscalls++;
27 nsyscalls++; 34 nsyscalls++;
28#endif
29 syscall = UPT_SYSCALL_NR(&regs->regs); 35 syscall = UPT_SYSCALL_NR(&regs->regs);
30 36
31 if((syscall >= NR_syscalls) || (syscall < 0)) 37 if((syscall >= NR_syscalls) || (syscall < 0))
32 res = -ENOSYS; 38 result = -ENOSYS;
33 else res = EXECUTE_SYSCALL(syscall, regs); 39 else result = EXECUTE_SYSCALL(syscall, regs);
34 40
35 return(res); 41 /* regs->sc may have changed while the system call ran (there may
36} 42 * have been an interrupt or segfault), so it needs to be refreshed.
43 */
44 UPT_SC(&regs->regs) = sc;
37 45
38/* 46 SC_SET_SYSCALL_RETURN(sc, result);
39 * Overrides for Emacs so that we follow Linus's tabbing style. 47
40 * Emacs will notice this stuff at the end of the file and automatically 48 syscall_trace(&regs->regs, 1);
41 * adjust the settings for this buffer only. This must remain at the end 49#ifdef CONFIG_SYSCALL_DEBUG
42 * of the file. 50 record_syscall_end(index, result);
43 * --------------------------------------------------------------------------- 51#endif
44 * Local variables: 52}
45 * c-file-style: "linux"
46 * End:
47 */
diff --git a/arch/um/kernel/tt/syscall_user.c b/arch/um/kernel/tt/syscall_user.c
index b218316cfdb2..902987bf379b 100644
--- a/arch/um/kernel/tt/syscall_user.c
+++ b/arch/um/kernel/tt/syscall_user.c
@@ -13,42 +13,9 @@
13#include "task.h" 13#include "task.h"
14#include "user_util.h" 14#include "user_util.h"
15#include "kern_util.h" 15#include "kern_util.h"
16#include "syscall_user.h" 16#include "syscall.h"
17#include "tt.h" 17#include "tt.h"
18 18
19
20void syscall_handler_tt(int sig, union uml_pt_regs *regs)
21{
22 void *sc;
23 long result;
24 int syscall;
25#ifdef UML_CONFIG_DEBUG_SYSCALL
26 int index;
27#endif
28
29 syscall = UPT_SYSCALL_NR(regs);
30 sc = UPT_SC(regs);
31 SC_START_SYSCALL(sc);
32
33#ifdef UML_CONFIG_DEBUG_SYSCALL
34 index = record_syscall_start(syscall);
35#endif
36 syscall_trace(regs, 0);
37 result = execute_syscall_tt(regs);
38
39 /* regs->sc may have changed while the system call ran (there may
40 * have been an interrupt or segfault), so it needs to be refreshed.
41 */
42 UPT_SC(regs) = sc;
43
44 SC_SET_SYSCALL_RETURN(sc, result);
45
46 syscall_trace(regs, 1);
47#ifdef UML_CONFIG_DEBUG_SYSCALL
48 record_syscall_end(index, result);
49#endif
50}
51
52void do_sigtrap(void *task) 19void do_sigtrap(void *task)
53{ 20{
54 UPT_SYSCALL_NR(TASK_REGS(task)) = -1; 21 UPT_SYSCALL_NR(TASK_REGS(task)) = -1;
diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c
index 2eefb43bc9c2..f1d85dbb45b9 100644
--- a/arch/um/kernel/tt/tlb.c
+++ b/arch/um/kernel/tt/tlb.c
@@ -17,25 +17,31 @@
17#include "os.h" 17#include "os.h"
18#include "tlb.h" 18#include "tlb.h"
19 19
20static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) 20static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last,
21 int finished, void **flush)
21{ 22{
22 struct host_vm_op *op; 23 struct host_vm_op *op;
23 int i; 24 int i, ret=0;
24 25
25 for(i = 0; i <= last; i++){ 26 for(i = 0; i <= last && !ret; i++){
26 op = &ops[i]; 27 op = &ops[i];
27 switch(op->type){ 28 switch(op->type){
28 case MMAP: 29 case MMAP:
29 os_map_memory((void *) op->u.mmap.addr, op->u.mmap.fd, 30 ret = os_map_memory((void *) op->u.mmap.addr,
30 op->u.mmap.offset, op->u.mmap.len, 31 op->u.mmap.fd, op->u.mmap.offset,
31 op->u.mmap.r, op->u.mmap.w, 32 op->u.mmap.len, op->u.mmap.r,
32 op->u.mmap.x); 33 op->u.mmap.w, op->u.mmap.x);
33 break; 34 break;
34 case MUNMAP: 35 case MUNMAP:
35 os_unmap_memory((void *) op->u.munmap.addr, 36 ret = os_unmap_memory((void *) op->u.munmap.addr,
36 op->u.munmap.len); 37 op->u.munmap.len);
37 break; 38 break;
38 case MPROTECT: 39 case MPROTECT:
40 ret = protect_memory(op->u.mprotect.addr,
41 op->u.munmap.len,
42 op->u.mprotect.r,
43 op->u.mprotect.w,
44 op->u.mprotect.x, 1);
39 protect_memory(op->u.mprotect.addr, op->u.munmap.len, 45 protect_memory(op->u.mprotect.addr, op->u.munmap.len,
40 op->u.mprotect.r, op->u.mprotect.w, 46 op->u.mprotect.r, op->u.mprotect.w,
41 op->u.mprotect.x, 1); 47 op->u.mprotect.x, 1);
@@ -45,6 +51,8 @@ static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last)
45 break; 51 break;
46 } 52 }
47 } 53 }
54
55 return ret;
48} 56}
49 57
50static void fix_range(struct mm_struct *mm, unsigned long start_addr, 58static void fix_range(struct mm_struct *mm, unsigned long start_addr,
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index ca2bb6f09a7d..09f6f7ce4695 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -126,7 +126,7 @@ unsigned long start_vm;
126unsigned long end_vm; 126unsigned long end_vm;
127int ncpus = 1; 127int ncpus = 1;
128 128
129#ifdef CONFIG_MODE_TT 129#ifdef CONFIG_CMDLINE_ON_HOST
130/* Pointer set in linux_main, the array itself is private to each thread, 130/* Pointer set in linux_main, the array itself is private to each thread,
131 * and changed at address space creation time so this poses no concurrency 131 * and changed at address space creation time so this poses no concurrency
132 * problems. 132 * problems.
@@ -141,7 +141,7 @@ long physmem_size = 32 * 1024 * 1024;
141 141
142void set_cmdline(char *cmd) 142void set_cmdline(char *cmd)
143{ 143{
144#ifdef CONFIG_MODE_TT 144#ifdef CONFIG_CMDLINE_ON_HOST
145 char *umid, *ptr; 145 char *umid, *ptr;
146 146
147 if(CHOOSE_MODE(honeypot, 0)) return; 147 if(CHOOSE_MODE(honeypot, 0)) return;
@@ -333,6 +333,7 @@ int linux_main(int argc, char **argv)
333 if(have_root == 0) 333 if(have_root == 0)
334 add_arg(DEFAULT_COMMAND_LINE); 334 add_arg(DEFAULT_COMMAND_LINE);
335 335
336 os_early_checks();
336 mode_tt = force_tt ? 1 : !can_do_skas(); 337 mode_tt = force_tt ? 1 : !can_do_skas();
337#ifndef CONFIG_MODE_TT 338#ifndef CONFIG_MODE_TT
338 if (mode_tt) { 339 if (mode_tt) {
@@ -385,7 +386,7 @@ int linux_main(int argc, char **argv)
385 386
386 setup_machinename(system_utsname.machine); 387 setup_machinename(system_utsname.machine);
387 388
388#ifdef CONFIG_MODE_TT 389#ifdef CONFIG_CMDLINE_ON_HOST
389 argv1_begin = argv[1]; 390 argv1_begin = argv[1];
390 argv1_end = &argv[1][strlen(argv[1])]; 391 argv1_end = &argv[1][strlen(argv[1])];
391#endif 392#endif
@@ -470,7 +471,6 @@ void __init setup_arch(char **cmdline_p)
470void __init check_bugs(void) 471void __init check_bugs(void)
471{ 472{
472 arch_check_bugs(); 473 arch_check_bugs();
473 check_ptrace();
474 check_sigio(); 474 check_sigio();
475 check_devanon(); 475 check_devanon();
476} 476}