diff options
author | Jeff Dike <jdike@addtoit.com> | 2007-05-06 17:51:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-07 15:13:03 -0400 |
commit | 3d564047a5f45cb628ec72514f68076e532988f3 (patch) | |
tree | 3a4247baed8e66bfe5d159f058a88c1a5b7e7ed1 /arch/um/kernel | |
parent | f9d6e5f83b40d8ff73a74d4bba2c5f51d6048b12 (diff) |
uml: start fixing os_read_file and os_write_file
This patch starts the removal of a very old, very broken piece of code. This
stems from the problem of passing a userspace buffer into read() or write() on
the host. If that buffer had not yet been faulted in, read and write will
return -EFAULT.
To avoid this problem, the solution was to fault the buffer in before the
system call by touching the pages that hold the buffer by doing a copy-user of
a byte to each page. This is obviously bogus, but it does usually work, in tt
mode, since the kernel and process are in the same address space and userspace
addresses can be accessed directly in the kernel.
In skas mode, where the kernel and process are in separate address spaces, it
is completely bogus because the userspace address, which is invalid in the
kernel, is passed into the system call instead of the corresponding physical
address, which would be valid. Here, it appears that this code, on every host
read() or write(), tries to fault in a random process page. This doesn't seem
to cause any correctness problems, but there is a performance impact. This
patch, and the ones following, result in a 10-15% performance gain on a kernel
build.
This code can't be immediately tossed out because when it is, you can't log
in. Apparently, there is some code in the console driver which depends on
this somehow.
However, we can start removing it by switching the code which does I/O using
kernel addresses to using plain read() and write(). This patch introduces
os_read_file_k and os_write_file_k for use with kernel buffers and converts
all call locations which use obvious kernel buffers to use them. These
include I/O using buffers which are local variables which are on the stack or
kmalloc-ed. Later patches will handle the less obvious cases, followed by a
mass conversion back to the original interface.
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/um/kernel')
-rw-r--r-- | arch/um/kernel/ksyms.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/physmem.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/sigio.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/smp.c | 12 | ||||
-rw-r--r-- | arch/um/kernel/tt/process_kern.c | 7 | ||||
-rw-r--r-- | arch/um/kernel/tt/ptproxy/proxy.c | 9 | ||||
-rw-r--r-- | arch/um/kernel/tt/tracer.c | 2 |
7 files changed, 20 insertions, 16 deletions
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 7b3e53fb8070..7c158448b9fd 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c | |||
@@ -62,7 +62,9 @@ EXPORT_SYMBOL(os_get_exec_close); | |||
62 | EXPORT_SYMBOL(os_set_exec_close); | 62 | EXPORT_SYMBOL(os_set_exec_close); |
63 | EXPORT_SYMBOL(os_getpid); | 63 | EXPORT_SYMBOL(os_getpid); |
64 | EXPORT_SYMBOL(os_open_file); | 64 | EXPORT_SYMBOL(os_open_file); |
65 | EXPORT_SYMBOL(os_read_file_k); | ||
65 | EXPORT_SYMBOL(os_read_file); | 66 | EXPORT_SYMBOL(os_read_file); |
67 | EXPORT_SYMBOL(os_write_file_k); | ||
66 | EXPORT_SYMBOL(os_write_file); | 68 | EXPORT_SYMBOL(os_write_file); |
67 | EXPORT_SYMBOL(os_seek_file); | 69 | EXPORT_SYMBOL(os_seek_file); |
68 | EXPORT_SYMBOL(os_lock_file); | 70 | EXPORT_SYMBOL(os_lock_file); |
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index df1ad3ba130c..a9856209006b 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c | |||
@@ -341,7 +341,7 @@ void setup_physmem(unsigned long start, unsigned long reserve_end, | |||
341 | * from physmem_fd, so it needs to be written out there. | 341 | * from physmem_fd, so it needs to be written out there. |
342 | */ | 342 | */ |
343 | os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); | 343 | os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); |
344 | os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); | 344 | os_write_file_k(physmem_fd, &__syscall_stub_start, PAGE_SIZE); |
345 | 345 | ||
346 | bootmap_size = init_bootmem(pfn, pfn + delta); | 346 | bootmap_size = init_bootmem(pfn, pfn + delta); |
347 | free_bootmem(__pa(reserve_end) + bootmap_size, | 347 | free_bootmem(__pa(reserve_end) + bootmap_size, |
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index 89f9866a1354..f756e78085e4 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c | |||
@@ -21,7 +21,7 @@ static irqreturn_t sigio_interrupt(int irq, void *data) | |||
21 | { | 21 | { |
22 | char c; | 22 | char c; |
23 | 23 | ||
24 | os_read_file(sigio_irq_fd, &c, sizeof(c)); | 24 | os_read_file_k(sigio_irq_fd, &c, sizeof(c)); |
25 | reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); | 25 | reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); |
26 | return IRQ_HANDLED; | 26 | return IRQ_HANDLED; |
27 | } | 27 | } |
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index 62dd093cbcd7..47b690893c06 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c | |||
@@ -47,7 +47,7 @@ struct task_struct *idle_threads[NR_CPUS]; | |||
47 | 47 | ||
48 | void smp_send_reschedule(int cpu) | 48 | void smp_send_reschedule(int cpu) |
49 | { | 49 | { |
50 | os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1); | 50 | os_write_file_k(cpu_data[cpu].ipi_pipe[1], "R", 1); |
51 | num_reschedules_sent++; | 51 | num_reschedules_sent++; |
52 | } | 52 | } |
53 | 53 | ||
@@ -59,7 +59,7 @@ void smp_send_stop(void) | |||
59 | for(i = 0; i < num_online_cpus(); i++){ | 59 | for(i = 0; i < num_online_cpus(); i++){ |
60 | if(i == current_thread->cpu) | 60 | if(i == current_thread->cpu) |
61 | continue; | 61 | continue; |
62 | os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); | 62 | os_write_file_k(cpu_data[i].ipi_pipe[1], "S", 1); |
63 | } | 63 | } |
64 | printk("done\n"); | 64 | printk("done\n"); |
65 | } | 65 | } |
@@ -108,8 +108,8 @@ static struct task_struct *idle_thread(int cpu) | |||
108 | { .pid = new_task->thread.mode.tt.extern_pid, | 108 | { .pid = new_task->thread.mode.tt.extern_pid, |
109 | .task = new_task } ); | 109 | .task = new_task } ); |
110 | idle_threads[cpu] = new_task; | 110 | idle_threads[cpu] = new_task; |
111 | CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c, | 111 | CHOOSE_MODE(os_write_file_k(new_task->thread.mode.tt.switch_pipe[1], &c, |
112 | sizeof(c)), | 112 | sizeof(c)), |
113 | ({ panic("skas mode doesn't support SMP"); })); | 113 | ({ panic("skas mode doesn't support SMP"); })); |
114 | return(new_task); | 114 | return(new_task); |
115 | } | 115 | } |
@@ -179,7 +179,7 @@ void IPI_handler(int cpu) | |||
179 | int fd; | 179 | int fd; |
180 | 180 | ||
181 | fd = cpu_data[cpu].ipi_pipe[0]; | 181 | fd = cpu_data[cpu].ipi_pipe[0]; |
182 | while (os_read_file(fd, &c, 1) == 1) { | 182 | while (os_read_file_k(fd, &c, 1) == 1) { |
183 | switch (c) { | 183 | switch (c) { |
184 | case 'C': | 184 | case 'C': |
185 | smp_call_function_slave(cpu); | 185 | smp_call_function_slave(cpu); |
@@ -239,7 +239,7 @@ int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, | |||
239 | info = _info; | 239 | info = _info; |
240 | 240 | ||
241 | for_each_online_cpu(i) | 241 | for_each_online_cpu(i) |
242 | os_write_file(cpu_data[i].ipi_pipe[1], "C", 1); | 242 | os_write_file_k(cpu_data[i].ipi_pipe[1], "C", 1); |
243 | 243 | ||
244 | while (atomic_read(&scf_started) != cpus) | 244 | while (atomic_read(&scf_started) != cpus) |
245 | barrier(); | 245 | barrier(); |
diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index 8029f72afaa7..c81bd2074930 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c | |||
@@ -57,14 +57,15 @@ void switch_to_tt(void *prev, void *next) | |||
57 | * nor the value in "to" (since it was the task which stole us the CPU, | 57 | * nor the value in "to" (since it was the task which stole us the CPU, |
58 | * which we don't care about). */ | 58 | * which we don't care about). */ |
59 | 59 | ||
60 | err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); | 60 | err = os_write_file_k(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); |
61 | if(err != sizeof(c)) | 61 | if(err != sizeof(c)) |
62 | panic("write of switch_pipe failed, err = %d", -err); | 62 | panic("write of switch_pipe failed, err = %d", -err); |
63 | 63 | ||
64 | if(from->thread.mode.tt.switch_pipe[0] == -1) | 64 | if(from->thread.mode.tt.switch_pipe[0] == -1) |
65 | os_kill_process(os_getpid(), 0); | 65 | os_kill_process(os_getpid(), 0); |
66 | 66 | ||
67 | err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c)); | 67 | err = os_read_file_k(from->thread.mode.tt.switch_pipe[0], &c, |
68 | sizeof(c)); | ||
68 | if(err != sizeof(c)) | 69 | if(err != sizeof(c)) |
69 | panic("read of switch_pipe failed, errno = %d", -err); | 70 | panic("read of switch_pipe failed, errno = %d", -err); |
70 | 71 | ||
@@ -113,7 +114,7 @@ void suspend_new_thread(int fd) | |||
113 | char c; | 114 | char c; |
114 | 115 | ||
115 | os_stop_process(os_getpid()); | 116 | os_stop_process(os_getpid()); |
116 | err = os_read_file(fd, &c, sizeof(c)); | 117 | err = os_read_file_k(fd, &c, sizeof(c)); |
117 | if(err != sizeof(c)) | 118 | if(err != sizeof(c)) |
118 | panic("read failed in suspend_new_thread, err = %d", -err); | 119 | panic("read failed in suspend_new_thread, err = %d", -err); |
119 | } | 120 | } |
diff --git a/arch/um/kernel/tt/ptproxy/proxy.c b/arch/um/kernel/tt/ptproxy/proxy.c index c88e7b5d8a76..007beb6b7c00 100644 --- a/arch/um/kernel/tt/ptproxy/proxy.c +++ b/arch/um/kernel/tt/ptproxy/proxy.c | |||
@@ -338,13 +338,14 @@ int start_debugger(char *prog, int startup, int stop, int *fd_out) | |||
338 | "err = %d\n", -fd); | 338 | "err = %d\n", -fd); |
339 | exit(1); | 339 | exit(1); |
340 | } | 340 | } |
341 | os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1); | 341 | os_write_file_k(fd, gdb_init_string, |
342 | sizeof(gdb_init_string) - 1); | ||
342 | if(startup){ | 343 | if(startup){ |
343 | if(stop){ | 344 | if(stop){ |
344 | os_write_file(fd, "b start_kernel\n", | 345 | os_write_file_k(fd, "b start_kernel\n", |
345 | strlen("b start_kernel\n")); | 346 | strlen("b start_kernel\n")); |
346 | } | 347 | } |
347 | os_write_file(fd, "c\n", strlen("c\n")); | 348 | os_write_file_k(fd, "c\n", strlen("c\n")); |
348 | } | 349 | } |
349 | if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ | 350 | if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ |
350 | printk("start_debugger : PTRACE_TRACEME failed, " | 351 | printk("start_debugger : PTRACE_TRACEME failed, " |
diff --git a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c index c23588393f6e..264da6c5a5c3 100644 --- a/arch/um/kernel/tt/tracer.c +++ b/arch/um/kernel/tt/tracer.c | |||
@@ -44,7 +44,7 @@ static void tracer_winch_handler(int sig) | |||
44 | int n; | 44 | int n; |
45 | char c = 1; | 45 | char c = 1; |
46 | 46 | ||
47 | n = os_write_file(tracer_winch[1], &c, sizeof(c)); | 47 | n = os_write_file_k(tracer_winch[1], &c, sizeof(c)); |
48 | if(n != sizeof(c)) | 48 | if(n != sizeof(c)) |
49 | printk("tracer_winch_handler - write failed, err = %d\n", -n); | 49 | printk("tracer_winch_handler - write failed, err = %d\n", -n); |
50 | } | 50 | } |