diff options
author | Jeff Dike <jdike@addtoit.com> | 2005-09-03 18:57:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@evo.osdl.org> | 2005-09-05 03:06:23 -0400 |
commit | 75e5584c89d213d6089f64f22cd899fb172e4c95 (patch) | |
tree | 22bb81b9c699e06b3c8163933654fe3f84ae469d /arch/um/os-Linux/aio.c | |
parent | 30f7dabb083f8ff4ce541b5ac4e5d70cc173051a (diff) |
[PATCH] uml: use host AIO support
This patch makes UML use host AIO support when it (and
/usr/include/linux/aio_abi.h) are present. This is only the support, with no
consumers - a consumer is coming in the next patch.
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/um/os-Linux/aio.c')
-rw-r--r-- | arch/um/os-Linux/aio.c | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c new file mode 100644 index 000000000000..f2ca2992bbd6 --- /dev/null +++ b/arch/um/os-Linux/aio.c | |||
@@ -0,0 +1,398 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) | ||
3 | * Licensed under the GPL | ||
4 | */ | ||
5 | |||
6 | #include <stdlib.h> | ||
7 | #include <unistd.h> | ||
8 | #include <signal.h> | ||
9 | #include <errno.h> | ||
10 | #include <sched.h> | ||
11 | #include <sys/syscall.h> | ||
12 | #include "os.h" | ||
13 | #include "helper.h" | ||
14 | #include "aio.h" | ||
15 | #include "init.h" | ||
16 | #include "user.h" | ||
17 | #include "mode.h" | ||
18 | |||
19 | struct aio_thread_req { | ||
20 | enum aio_type type; | ||
21 | int io_fd; | ||
22 | unsigned long long offset; | ||
23 | char *buf; | ||
24 | int len; | ||
25 | struct aio_context *aio; | ||
26 | }; | ||
27 | |||
28 | static int aio_req_fd_r = -1; | ||
29 | static int aio_req_fd_w = -1; | ||
30 | |||
31 | #if defined(HAVE_AIO_ABI) | ||
32 | #include <linux/aio_abi.h> | ||
33 | |||
34 | /* If we have the headers, we are going to build with AIO enabled. | ||
35 | * If we don't have aio in libc, we define the necessary stubs here. | ||
36 | */ | ||
37 | |||
38 | #if !defined(HAVE_AIO_LIBC) | ||
39 | |||
40 | static long io_setup(int n, aio_context_t *ctxp) | ||
41 | { | ||
42 | return syscall(__NR_io_setup, n, ctxp); | ||
43 | } | ||
44 | |||
45 | static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) | ||
46 | { | ||
47 | return syscall(__NR_io_submit, ctx, nr, iocbpp); | ||
48 | } | ||
49 | |||
50 | static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, | ||
51 | struct io_event *events, struct timespec *timeout) | ||
52 | { | ||
53 | return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); | ||
54 | } | ||
55 | |||
56 | #endif | ||
57 | |||
58 | /* The AIO_MMAP cases force the mmapped page into memory here | ||
59 | * rather than in whatever place first touches the data. I used | ||
60 | * to do this by touching the page, but that's delicate because | ||
61 | * gcc is prone to optimizing that away. So, what's done here | ||
62 | * is we read from the descriptor from which the page was | ||
63 | * mapped. The caller is required to pass an offset which is | ||
64 | * inside the page that was mapped. Thus, when the read | ||
65 | * returns, we know that the page is in the page cache, and | ||
66 | * that it now backs the mmapped area. | ||
67 | */ | ||
68 | |||
69 | static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, | ||
70 | int len, unsigned long long offset, struct aio_context *aio) | ||
71 | { | ||
72 | struct iocb iocb, *iocbp = &iocb; | ||
73 | char c; | ||
74 | int err; | ||
75 | |||
76 | iocb = ((struct iocb) { .aio_data = (unsigned long) aio, | ||
77 | .aio_reqprio = 0, | ||
78 | .aio_fildes = fd, | ||
79 | .aio_buf = (unsigned long) buf, | ||
80 | .aio_nbytes = len, | ||
81 | .aio_offset = offset, | ||
82 | .aio_reserved1 = 0, | ||
83 | .aio_reserved2 = 0, | ||
84 | .aio_reserved3 = 0 }); | ||
85 | |||
86 | switch(type){ | ||
87 | case AIO_READ: | ||
88 | iocb.aio_lio_opcode = IOCB_CMD_PREAD; | ||
89 | err = io_submit(ctx, 1, &iocbp); | ||
90 | break; | ||
91 | case AIO_WRITE: | ||
92 | iocb.aio_lio_opcode = IOCB_CMD_PWRITE; | ||
93 | err = io_submit(ctx, 1, &iocbp); | ||
94 | break; | ||
95 | case AIO_MMAP: | ||
96 | iocb.aio_lio_opcode = IOCB_CMD_PREAD; | ||
97 | iocb.aio_buf = (unsigned long) &c; | ||
98 | iocb.aio_nbytes = sizeof(c); | ||
99 | err = io_submit(ctx, 1, &iocbp); | ||
100 | break; | ||
101 | default: | ||
102 | printk("Bogus op in do_aio - %d\n", type); | ||
103 | err = -EINVAL; | ||
104 | break; | ||
105 | } | ||
106 | if(err > 0) | ||
107 | err = 0; | ||
108 | |||
109 | return err; | ||
110 | } | ||
111 | |||
112 | static aio_context_t ctx = 0; | ||
113 | |||
114 | static int aio_thread(void *arg) | ||
115 | { | ||
116 | struct aio_thread_reply reply; | ||
117 | struct io_event event; | ||
118 | int err, n, reply_fd; | ||
119 | |||
120 | signal(SIGWINCH, SIG_IGN); | ||
121 | |||
122 | while(1){ | ||
123 | n = io_getevents(ctx, 1, 1, &event, NULL); | ||
124 | if(n < 0){ | ||
125 | if(errno == EINTR) | ||
126 | continue; | ||
127 | printk("aio_thread - io_getevents failed, " | ||
128 | "errno = %d\n", errno); | ||
129 | } | ||
130 | else { | ||
131 | reply = ((struct aio_thread_reply) | ||
132 | { .data = (void *) (long) event.data, | ||
133 | .err = event.res }); | ||
134 | reply_fd = ((struct aio_context *) reply.data)->reply_fd; | ||
135 | err = os_write_file(reply_fd, &reply, sizeof(reply)); | ||
136 | if(err != sizeof(reply)) | ||
137 | printk("not_aio_thread - write failed, " | ||
138 | "fd = %d, err = %d\n", | ||
139 | aio_req_fd_r, -err); | ||
140 | } | ||
141 | } | ||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | #endif | ||
146 | |||
147 | static int do_not_aio(struct aio_thread_req *req) | ||
148 | { | ||
149 | char c; | ||
150 | int err; | ||
151 | |||
152 | switch(req->type){ | ||
153 | case AIO_READ: | ||
154 | err = os_seek_file(req->io_fd, req->offset); | ||
155 | if(err) | ||
156 | goto out; | ||
157 | |||
158 | err = os_read_file(req->io_fd, req->buf, req->len); | ||
159 | break; | ||
160 | case AIO_WRITE: | ||
161 | err = os_seek_file(req->io_fd, req->offset); | ||
162 | if(err) | ||
163 | goto out; | ||
164 | |||
165 | err = os_write_file(req->io_fd, req->buf, req->len); | ||
166 | break; | ||
167 | case AIO_MMAP: | ||
168 | err = os_seek_file(req->io_fd, req->offset); | ||
169 | if(err) | ||
170 | goto out; | ||
171 | |||
172 | err = os_read_file(req->io_fd, &c, sizeof(c)); | ||
173 | break; | ||
174 | default: | ||
175 | printk("do_not_aio - bad request type : %d\n", req->type); | ||
176 | err = -EINVAL; | ||
177 | break; | ||
178 | } | ||
179 | |||
180 | out: | ||
181 | return err; | ||
182 | } | ||
183 | |||
184 | static int not_aio_thread(void *arg) | ||
185 | { | ||
186 | struct aio_thread_req req; | ||
187 | struct aio_thread_reply reply; | ||
188 | int err; | ||
189 | |||
190 | signal(SIGWINCH, SIG_IGN); | ||
191 | while(1){ | ||
192 | err = os_read_file(aio_req_fd_r, &req, sizeof(req)); | ||
193 | if(err != sizeof(req)){ | ||
194 | if(err < 0) | ||
195 | printk("not_aio_thread - read failed, " | ||
196 | "fd = %d, err = %d\n", aio_req_fd_r, | ||
197 | -err); | ||
198 | else { | ||
199 | printk("not_aio_thread - short read, fd = %d, " | ||
200 | "length = %d\n", aio_req_fd_r, err); | ||
201 | } | ||
202 | continue; | ||
203 | } | ||
204 | err = do_not_aio(&req); | ||
205 | reply = ((struct aio_thread_reply) { .data = req.aio, | ||
206 | .err = err }); | ||
207 | err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply)); | ||
208 | if(err != sizeof(reply)) | ||
209 | printk("not_aio_thread - write failed, fd = %d, " | ||
210 | "err = %d\n", aio_req_fd_r, -err); | ||
211 | } | ||
212 | } | ||
213 | |||
214 | static int aio_pid = -1; | ||
215 | |||
216 | static int init_aio_24(void) | ||
217 | { | ||
218 | unsigned long stack; | ||
219 | int fds[2], err; | ||
220 | |||
221 | err = os_pipe(fds, 1, 1); | ||
222 | if(err) | ||
223 | goto out; | ||
224 | |||
225 | aio_req_fd_w = fds[0]; | ||
226 | aio_req_fd_r = fds[1]; | ||
227 | err = run_helper_thread(not_aio_thread, NULL, | ||
228 | CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); | ||
229 | if(err < 0) | ||
230 | goto out_close_pipe; | ||
231 | |||
232 | aio_pid = err; | ||
233 | goto out; | ||
234 | |||
235 | out_close_pipe: | ||
236 | os_close_file(fds[0]); | ||
237 | os_close_file(fds[1]); | ||
238 | aio_req_fd_w = -1; | ||
239 | aio_req_fd_r = -1; | ||
240 | out: | ||
241 | #ifndef HAVE_AIO_ABI | ||
242 | printk("/usr/include/linux/aio_abi.h not present during build\n"); | ||
243 | #endif | ||
244 | printk("2.6 host AIO support not used - falling back to I/O " | ||
245 | "thread\n"); | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | #ifdef HAVE_AIO_ABI | ||
250 | #define DEFAULT_24_AIO 0 | ||
251 | static int init_aio_26(void) | ||
252 | { | ||
253 | unsigned long stack; | ||
254 | int err; | ||
255 | |||
256 | if(io_setup(256, &ctx)){ | ||
257 | printk("aio_thread failed to initialize context, err = %d\n", | ||
258 | errno); | ||
259 | return -errno; | ||
260 | } | ||
261 | |||
262 | err = run_helper_thread(aio_thread, NULL, | ||
263 | CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); | ||
264 | if(err < 0) | ||
265 | return -errno; | ||
266 | |||
267 | aio_pid = err; | ||
268 | |||
269 | printk("Using 2.6 host AIO\n"); | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, | ||
274 | unsigned long long offset, struct aio_context *aio) | ||
275 | { | ||
276 | struct aio_thread_reply reply; | ||
277 | int err; | ||
278 | |||
279 | err = do_aio(ctx, type, io_fd, buf, len, offset, aio); | ||
280 | if(err){ | ||
281 | reply = ((struct aio_thread_reply) { .data = aio, | ||
282 | .err = err }); | ||
283 | err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); | ||
284 | if(err != sizeof(reply)) | ||
285 | printk("submit_aio_26 - write failed, " | ||
286 | "fd = %d, err = %d\n", aio->reply_fd, -err); | ||
287 | else err = 0; | ||
288 | } | ||
289 | |||
290 | return err; | ||
291 | } | ||
292 | |||
293 | #else | ||
294 | #define DEFAULT_24_AIO 1 | ||
295 | static int init_aio_26(void) | ||
296 | { | ||
297 | return -ENOSYS; | ||
298 | } | ||
299 | |||
300 | static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, | ||
301 | unsigned long long offset, struct aio_context *aio) | ||
302 | { | ||
303 | return -ENOSYS; | ||
304 | } | ||
305 | #endif | ||
306 | |||
307 | static int aio_24 = DEFAULT_24_AIO; | ||
308 | |||
309 | static int __init set_aio_24(char *name, int *add) | ||
310 | { | ||
311 | aio_24 = 1; | ||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | __uml_setup("aio=2.4", set_aio_24, | ||
316 | "aio=2.4\n" | ||
317 | " This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n" | ||
318 | " available. 2.4 AIO is a single thread that handles one request at a\n" | ||
319 | " time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n" | ||
320 | " interface to handle an arbitrary number of pending requests. 2.6 AIO \n" | ||
321 | " is not available in tt mode, on 2.4 hosts, or when UML is built with\n" | ||
322 | " /usr/include/linux/aio_abi.h not available. Many distributions don't\n" | ||
323 | " include aio_abi.h, so you will need to copy it from a kernel tree to\n" | ||
324 | " your /usr/include/linux in order to build an AIO-capable UML\n\n" | ||
325 | ); | ||
326 | |||
327 | static int init_aio(void) | ||
328 | { | ||
329 | int err; | ||
330 | |||
331 | CHOOSE_MODE(({ | ||
332 | if(!aio_24){ | ||
333 | printk("Disabling 2.6 AIO in tt mode\n"); | ||
334 | aio_24 = 1; | ||
335 | } }), (void) 0); | ||
336 | |||
337 | if(!aio_24){ | ||
338 | err = init_aio_26(); | ||
339 | if(err && (errno == ENOSYS)){ | ||
340 | printk("2.6 AIO not supported on the host - " | ||
341 | "reverting to 2.4 AIO\n"); | ||
342 | aio_24 = 1; | ||
343 | } | ||
344 | else return err; | ||
345 | } | ||
346 | |||
347 | if(aio_24) | ||
348 | return init_aio_24(); | ||
349 | |||
350 | return 0; | ||
351 | } | ||
352 | |||
353 | /* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio | ||
354 | * needs to be called when the kernel is running because it calls run_helper, | ||
355 | * which needs get_free_page. exit_aio is a __uml_exitcall because the generic | ||
356 | * kernel does not run __exitcalls on shutdown, and can't because many of them | ||
357 | * break when called outside of module unloading. | ||
358 | */ | ||
359 | __initcall(init_aio); | ||
360 | |||
361 | static void exit_aio(void) | ||
362 | { | ||
363 | if(aio_pid != -1) | ||
364 | os_kill_process(aio_pid, 1); | ||
365 | } | ||
366 | |||
367 | __uml_exitcall(exit_aio); | ||
368 | |||
369 | static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, | ||
370 | unsigned long long offset, struct aio_context *aio) | ||
371 | { | ||
372 | struct aio_thread_req req = { .type = type, | ||
373 | .io_fd = io_fd, | ||
374 | .offset = offset, | ||
375 | .buf = buf, | ||
376 | .len = len, | ||
377 | .aio = aio, | ||
378 | }; | ||
379 | int err; | ||
380 | |||
381 | err = os_write_file(aio_req_fd_w, &req, sizeof(req)); | ||
382 | if(err == sizeof(req)) | ||
383 | err = 0; | ||
384 | |||
385 | return err; | ||
386 | } | ||
387 | |||
388 | int submit_aio(enum aio_type type, int io_fd, char *buf, int len, | ||
389 | unsigned long long offset, int reply_fd, | ||
390 | struct aio_context *aio) | ||
391 | { | ||
392 | aio->reply_fd = reply_fd; | ||
393 | if(aio_24) | ||
394 | return submit_aio_24(type, io_fd, buf, len, offset, aio); | ||
395 | else { | ||
396 | return submit_aio_26(type, io_fd, buf, len, offset, aio); | ||
397 | } | ||
398 | } | ||