aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Dike <jdike@addtoit.com>2005-09-03 18:57:45 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:06:23 -0400
commit75e5584c89d213d6089f64f22cd899fb172e4c95 (patch)
tree22bb81b9c699e06b3c8163933654fe3f84ae469d
parent30f7dabb083f8ff4ce541b5ac4e5d70cc173051a (diff)
[PATCH] uml: use host AIO support
This patch makes UML use host AIO support when it (and /usr/include/linux/aio_abi.h) are present. This is only the support, with no consumers - a consumer is coming in the next patch. Signed-off-by: Jeff Dike <jdike@addtoit.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/um/include/aio.h28
-rw-r--r--arch/um/include/init.h10
-rw-r--r--arch/um/include/irq_kern.h3
-rw-r--r--arch/um/kernel/irq.c41
-rw-r--r--arch/um/os-Linux/Makefile10
-rw-r--r--arch/um/os-Linux/aio.c398
6 files changed, 475 insertions, 15 deletions
diff --git a/arch/um/include/aio.h b/arch/um/include/aio.h
new file mode 100644
index 000000000000..423bae9153f8
--- /dev/null
+++ b/arch/um/include/aio.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (C) 2004 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#ifndef AIO_H__
7#define AIO_H__
8
9enum aio_type { AIO_READ, AIO_WRITE, AIO_MMAP };
10
11struct aio_thread_reply {
12 void *data;
13 int err;
14};
15
16struct aio_context {
17 int reply_fd;
18 struct aio_context *next;
19};
20
21#define INIT_AIO_CONTEXT { .reply_fd = -1, \
22 .next = NULL }
23
24extern int submit_aio(enum aio_type type, int fd, char *buf, int len,
25 unsigned long long offset, int reply_fd,
26 struct aio_context *aio);
27
28#endif
diff --git a/arch/um/include/init.h b/arch/um/include/init.h
index 55c2693f8778..cbd79a8d213d 100644
--- a/arch/um/include/init.h
+++ b/arch/um/include/init.h
@@ -111,7 +111,15 @@ extern struct uml_param __uml_setup_start, __uml_setup_end;
111 111
112#ifndef __KERNEL__ 112#ifndef __KERNEL__
113 113
114#define __initcall(fn) static initcall_t __initcall_##fn __init_call = fn 114#define __define_initcall(level,fn) \
115 static initcall_t __initcall_##fn __attribute_used__ \
116 __attribute__((__section__(".initcall" level ".init"))) = fn
117
118/* Userspace initcalls shouldn't depend on anything in the kernel, so we'll
119 * make them run first.
120 */
121#define __initcall(fn) __define_initcall("1", fn)
122
115#define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn 123#define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn
116 124
117#define __init_call __attribute__ ((unused,__section__ (".initcall.init"))) 125#define __init_call __attribute__ ((unused,__section__ (".initcall.init")))
diff --git a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h
index 3af52a634c4c..c222d56b1494 100644
--- a/arch/um/include/irq_kern.h
+++ b/arch/um/include/irq_kern.h
@@ -7,12 +7,15 @@
7#define __IRQ_KERN_H__ 7#define __IRQ_KERN_H__
8 8
9#include "linux/interrupt.h" 9#include "linux/interrupt.h"
10#include "asm/ptrace.h"
10 11
11extern int um_request_irq(unsigned int irq, int fd, int type, 12extern int um_request_irq(unsigned int irq, int fd, int type,
12 irqreturn_t (*handler)(int, void *, 13 irqreturn_t (*handler)(int, void *,
13 struct pt_regs *), 14 struct pt_regs *),
14 unsigned long irqflags, const char * devname, 15 unsigned long irqflags, const char * devname,
15 void *dev_id); 16 void *dev_id);
17extern int init_aio_irq(int irq, char *name,
18 irqreturn_t (*handler)(int, void *, struct pt_regs *));
16 19
17#endif 20#endif
18 21
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 9f18061ef4c9..dcd814971995 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -31,7 +31,7 @@
31#include "kern_util.h" 31#include "kern_util.h"
32#include "irq_user.h" 32#include "irq_user.h"
33#include "irq_kern.h" 33#include "irq_kern.h"
34 34#include "os.h"
35 35
36/* 36/*
37 * Generic, controller-independent functions: 37 * Generic, controller-independent functions:
@@ -168,13 +168,32 @@ void __init init_IRQ(void)
168 } 168 }
169} 169}
170 170
171/* 171int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *,
172 * Overrides for Emacs so that we follow Linus's tabbing style. 172 struct pt_regs *))
173 * Emacs will notice this stuff at the end of the file and automatically 173{
174 * adjust the settings for this buffer only. This must remain at the end 174 int fds[2], err;
175 * of the file. 175
176 * --------------------------------------------------------------------------- 176 err = os_pipe(fds, 1, 1);
177 * Local variables: 177 if(err){
178 * c-file-style: "linux" 178 printk("init_aio_irq - os_pipe failed, err = %d\n", -err);
179 * End: 179 goto out;
180 */ 180 }
181
182 err = um_request_irq(irq, fds[0], IRQ_READ, handler,
183 SA_INTERRUPT | SA_SAMPLE_RANDOM, name,
184 (void *) (long) fds[0]);
185 if(err){
186 printk("init_aio_irq - : um_request_irq failed, err = %d\n",
187 err);
188 goto out_close;
189 }
190
191 err = fds[1];
192 goto out;
193
194 out_close:
195 os_close_file(fds[0]);
196 os_close_file(fds[1]);
197 out:
198 return(err);
199}
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 4ddf540284ce..351d96934679 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -3,11 +3,15 @@
3# Licensed under the GPL 3# Licensed under the GPL
4# 4#
5 5
6obj-y = elf_aux.o file.o process.o signal.o time.o tty.o user_syms.o drivers/ \ 6obj-y = aio.o elf_aux.o file.o process.o signal.o time.o tty.o user_syms.o \
7 sys-$(SUBARCH)/ 7 drivers/ sys-$(SUBARCH)/
8 8
9USER_OBJS := elf_aux.o file.o process.o signal.o time.o tty.o 9USER_OBJS := aio.o elf_aux.o file.o process.o signal.o time.o tty.o
10 10
11CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH) 11CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH)
12 12
13HAVE_AIO_ABI := $(shell [ -r /usr/include/linux/aio_abi.h ] && \
14 echo -DHAVE_AIO_ABI )
15CFLAGS_aio.o += $(HAVE_AIO_ABI)
16
13include arch/um/scripts/Makefile.rules 17include arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
new file mode 100644
index 000000000000..f2ca2992bbd6
--- /dev/null
+++ b/arch/um/os-Linux/aio.c
@@ -0,0 +1,398 @@
1/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
13#include "helper.h"
14#include "aio.h"
15#include "init.h"
16#include "user.h"
17#include "mode.h"
18
19struct aio_thread_req {
20 enum aio_type type;
21 int io_fd;
22 unsigned long long offset;
23 char *buf;
24 int len;
25 struct aio_context *aio;
26};
27
28static int aio_req_fd_r = -1;
29static int aio_req_fd_w = -1;
30
31#if defined(HAVE_AIO_ABI)
32#include <linux/aio_abi.h>
33
34/* If we have the headers, we are going to build with AIO enabled.
35 * If we don't have aio in libc, we define the necessary stubs here.
36 */
37
38#if !defined(HAVE_AIO_LIBC)
39
40static long io_setup(int n, aio_context_t *ctxp)
41{
42 return syscall(__NR_io_setup, n, ctxp);
43}
44
45static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
46{
47 return syscall(__NR_io_submit, ctx, nr, iocbpp);
48}
49
50static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
51 struct io_event *events, struct timespec *timeout)
52{
53 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
54}
55
56#endif
57
58/* The AIO_MMAP cases force the mmapped page into memory here
59 * rather than in whatever place first touches the data. I used
60 * to do this by touching the page, but that's delicate because
61 * gcc is prone to optimizing that away. So, what's done here
62 * is we read from the descriptor from which the page was
63 * mapped. The caller is required to pass an offset which is
64 * inside the page that was mapped. Thus, when the read
65 * returns, we know that the page is in the page cache, and
66 * that it now backs the mmapped area.
67 */
68
69static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
70 int len, unsigned long long offset, struct aio_context *aio)
71{
72 struct iocb iocb, *iocbp = &iocb;
73 char c;
74 int err;
75
76 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
77 .aio_reqprio = 0,
78 .aio_fildes = fd,
79 .aio_buf = (unsigned long) buf,
80 .aio_nbytes = len,
81 .aio_offset = offset,
82 .aio_reserved1 = 0,
83 .aio_reserved2 = 0,
84 .aio_reserved3 = 0 });
85
86 switch(type){
87 case AIO_READ:
88 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
89 err = io_submit(ctx, 1, &iocbp);
90 break;
91 case AIO_WRITE:
92 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
93 err = io_submit(ctx, 1, &iocbp);
94 break;
95 case AIO_MMAP:
96 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
97 iocb.aio_buf = (unsigned long) &c;
98 iocb.aio_nbytes = sizeof(c);
99 err = io_submit(ctx, 1, &iocbp);
100 break;
101 default:
102 printk("Bogus op in do_aio - %d\n", type);
103 err = -EINVAL;
104 break;
105 }
106 if(err > 0)
107 err = 0;
108
109 return err;
110}
111
112static aio_context_t ctx = 0;
113
114static int aio_thread(void *arg)
115{
116 struct aio_thread_reply reply;
117 struct io_event event;
118 int err, n, reply_fd;
119
120 signal(SIGWINCH, SIG_IGN);
121
122 while(1){
123 n = io_getevents(ctx, 1, 1, &event, NULL);
124 if(n < 0){
125 if(errno == EINTR)
126 continue;
127 printk("aio_thread - io_getevents failed, "
128 "errno = %d\n", errno);
129 }
130 else {
131 reply = ((struct aio_thread_reply)
132 { .data = (void *) (long) event.data,
133 .err = event.res });
134 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
135 err = os_write_file(reply_fd, &reply, sizeof(reply));
136 if(err != sizeof(reply))
137 printk("not_aio_thread - write failed, "
138 "fd = %d, err = %d\n",
139 aio_req_fd_r, -err);
140 }
141 }
142 return 0;
143}
144
145#endif
146
147static int do_not_aio(struct aio_thread_req *req)
148{
149 char c;
150 int err;
151
152 switch(req->type){
153 case AIO_READ:
154 err = os_seek_file(req->io_fd, req->offset);
155 if(err)
156 goto out;
157
158 err = os_read_file(req->io_fd, req->buf, req->len);
159 break;
160 case AIO_WRITE:
161 err = os_seek_file(req->io_fd, req->offset);
162 if(err)
163 goto out;
164
165 err = os_write_file(req->io_fd, req->buf, req->len);
166 break;
167 case AIO_MMAP:
168 err = os_seek_file(req->io_fd, req->offset);
169 if(err)
170 goto out;
171
172 err = os_read_file(req->io_fd, &c, sizeof(c));
173 break;
174 default:
175 printk("do_not_aio - bad request type : %d\n", req->type);
176 err = -EINVAL;
177 break;
178 }
179
180 out:
181 return err;
182}
183
184static int not_aio_thread(void *arg)
185{
186 struct aio_thread_req req;
187 struct aio_thread_reply reply;
188 int err;
189
190 signal(SIGWINCH, SIG_IGN);
191 while(1){
192 err = os_read_file(aio_req_fd_r, &req, sizeof(req));
193 if(err != sizeof(req)){
194 if(err < 0)
195 printk("not_aio_thread - read failed, "
196 "fd = %d, err = %d\n", aio_req_fd_r,
197 -err);
198 else {
199 printk("not_aio_thread - short read, fd = %d, "
200 "length = %d\n", aio_req_fd_r, err);
201 }
202 continue;
203 }
204 err = do_not_aio(&req);
205 reply = ((struct aio_thread_reply) { .data = req.aio,
206 .err = err });
207 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
208 if(err != sizeof(reply))
209 printk("not_aio_thread - write failed, fd = %d, "
210 "err = %d\n", aio_req_fd_r, -err);
211 }
212}
213
214static int aio_pid = -1;
215
216static int init_aio_24(void)
217{
218 unsigned long stack;
219 int fds[2], err;
220
221 err = os_pipe(fds, 1, 1);
222 if(err)
223 goto out;
224
225 aio_req_fd_w = fds[0];
226 aio_req_fd_r = fds[1];
227 err = run_helper_thread(not_aio_thread, NULL,
228 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
229 if(err < 0)
230 goto out_close_pipe;
231
232 aio_pid = err;
233 goto out;
234
235 out_close_pipe:
236 os_close_file(fds[0]);
237 os_close_file(fds[1]);
238 aio_req_fd_w = -1;
239 aio_req_fd_r = -1;
240 out:
241#ifndef HAVE_AIO_ABI
242 printk("/usr/include/linux/aio_abi.h not present during build\n");
243#endif
244 printk("2.6 host AIO support not used - falling back to I/O "
245 "thread\n");
246 return 0;
247}
248
249#ifdef HAVE_AIO_ABI
250#define DEFAULT_24_AIO 0
251static int init_aio_26(void)
252{
253 unsigned long stack;
254 int err;
255
256 if(io_setup(256, &ctx)){
257 printk("aio_thread failed to initialize context, err = %d\n",
258 errno);
259 return -errno;
260 }
261
262 err = run_helper_thread(aio_thread, NULL,
263 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
264 if(err < 0)
265 return -errno;
266
267 aio_pid = err;
268
269 printk("Using 2.6 host AIO\n");
270 return 0;
271}
272
273static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
274 unsigned long long offset, struct aio_context *aio)
275{
276 struct aio_thread_reply reply;
277 int err;
278
279 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
280 if(err){
281 reply = ((struct aio_thread_reply) { .data = aio,
282 .err = err });
283 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
284 if(err != sizeof(reply))
285 printk("submit_aio_26 - write failed, "
286 "fd = %d, err = %d\n", aio->reply_fd, -err);
287 else err = 0;
288 }
289
290 return err;
291}
292
293#else
294#define DEFAULT_24_AIO 1
295static int init_aio_26(void)
296{
297 return -ENOSYS;
298}
299
300static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
301 unsigned long long offset, struct aio_context *aio)
302{
303 return -ENOSYS;
304}
305#endif
306
307static int aio_24 = DEFAULT_24_AIO;
308
309static int __init set_aio_24(char *name, int *add)
310{
311 aio_24 = 1;
312 return 0;
313}
314
315__uml_setup("aio=2.4", set_aio_24,
316"aio=2.4\n"
317" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
318" available. 2.4 AIO is a single thread that handles one request at a\n"
319" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
320" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
321" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
322" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
323" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
324" your /usr/include/linux in order to build an AIO-capable UML\n\n"
325);
326
327static int init_aio(void)
328{
329 int err;
330
331 CHOOSE_MODE(({
332 if(!aio_24){
333 printk("Disabling 2.6 AIO in tt mode\n");
334 aio_24 = 1;
335 } }), (void) 0);
336
337 if(!aio_24){
338 err = init_aio_26();
339 if(err && (errno == ENOSYS)){
340 printk("2.6 AIO not supported on the host - "
341 "reverting to 2.4 AIO\n");
342 aio_24 = 1;
343 }
344 else return err;
345 }
346
347 if(aio_24)
348 return init_aio_24();
349
350 return 0;
351}
352
353/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
354 * needs to be called when the kernel is running because it calls run_helper,
355 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
356 * kernel does not run __exitcalls on shutdown, and can't because many of them
357 * break when called outside of module unloading.
358 */
359__initcall(init_aio);
360
361static void exit_aio(void)
362{
363 if(aio_pid != -1)
364 os_kill_process(aio_pid, 1);
365}
366
367__uml_exitcall(exit_aio);
368
369static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
370 unsigned long long offset, struct aio_context *aio)
371{
372 struct aio_thread_req req = { .type = type,
373 .io_fd = io_fd,
374 .offset = offset,
375 .buf = buf,
376 .len = len,
377 .aio = aio,
378 };
379 int err;
380
381 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
382 if(err == sizeof(req))
383 err = 0;
384
385 return err;
386}
387
388int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
389 unsigned long long offset, int reply_fd,
390 struct aio_context *aio)
391{
392 aio->reply_fd = reply_fd;
393 if(aio_24)
394 return submit_aio_24(type, io_fd, buf, len, offset, aio);
395 else {
396 return submit_aio_26(type, io_fd, buf, len, offset, aio);
397 }
398}