diff options
Diffstat (limited to 'arch/um/os-Linux/aio.c')
-rw-r--r-- | arch/um/os-Linux/aio.c | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c new file mode 100644 index 000000000000..b04897cd995d --- /dev/null +++ b/arch/um/os-Linux/aio.c | |||
@@ -0,0 +1,414 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) | ||
3 | * Licensed under the GPL | ||
4 | */ | ||
5 | |||
6 | #include <stdlib.h> | ||
7 | #include <unistd.h> | ||
8 | #include <signal.h> | ||
9 | #include <string.h> | ||
10 | #include <errno.h> | ||
11 | #include <sched.h> | ||
12 | #include <sys/syscall.h> | ||
13 | #include "os.h" | ||
14 | #include "helper.h" | ||
15 | #include "aio.h" | ||
16 | #include "init.h" | ||
17 | #include "user.h" | ||
18 | #include "mode.h" | ||
19 | |||
20 | static int aio_req_fd_r = -1; | ||
21 | static int aio_req_fd_w = -1; | ||
22 | |||
23 | static int update_aio(struct aio_context *aio, int res) | ||
24 | { | ||
25 | if(res < 0) | ||
26 | aio->len = res; | ||
27 | else if((res == 0) && (aio->type == AIO_READ)){ | ||
28 | /* This is the EOF case - we have hit the end of the file | ||
29 | * and it ends in a partial block, so we fill the end of | ||
30 | * the block with zeros and claim success. | ||
31 | */ | ||
32 | memset(aio->data, 0, aio->len); | ||
33 | aio->len = 0; | ||
34 | } | ||
35 | else if(res > 0){ | ||
36 | aio->len -= res; | ||
37 | aio->data += res; | ||
38 | aio->offset += res; | ||
39 | return aio->len; | ||
40 | } | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | #if defined(HAVE_AIO_ABI) | ||
46 | #include <linux/aio_abi.h> | ||
47 | |||
48 | /* If we have the headers, we are going to build with AIO enabled. | ||
49 | * If we don't have aio in libc, we define the necessary stubs here. | ||
50 | */ | ||
51 | |||
52 | #if !defined(HAVE_AIO_LIBC) | ||
53 | |||
54 | static long io_setup(int n, aio_context_t *ctxp) | ||
55 | { | ||
56 | return syscall(__NR_io_setup, n, ctxp); | ||
57 | } | ||
58 | |||
59 | static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) | ||
60 | { | ||
61 | return syscall(__NR_io_submit, ctx, nr, iocbpp); | ||
62 | } | ||
63 | |||
64 | static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, | ||
65 | struct io_event *events, struct timespec *timeout) | ||
66 | { | ||
67 | return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); | ||
68 | } | ||
69 | |||
70 | #endif | ||
71 | |||
72 | /* The AIO_MMAP cases force the mmapped page into memory here | ||
73 | * rather than in whatever place first touches the data. I used | ||
74 | * to do this by touching the page, but that's delicate because | ||
75 | * gcc is prone to optimizing that away. So, what's done here | ||
76 | * is we read from the descriptor from which the page was | ||
77 | * mapped. The caller is required to pass an offset which is | ||
78 | * inside the page that was mapped. Thus, when the read | ||
79 | * returns, we know that the page is in the page cache, and | ||
80 | * that it now backs the mmapped area. | ||
81 | */ | ||
82 | |||
83 | static int do_aio(aio_context_t ctx, struct aio_context *aio) | ||
84 | { | ||
85 | struct iocb iocb, *iocbp = &iocb; | ||
86 | char c; | ||
87 | int err; | ||
88 | |||
89 | iocb = ((struct iocb) { .aio_data = (unsigned long) aio, | ||
90 | .aio_reqprio = 0, | ||
91 | .aio_fildes = aio->fd, | ||
92 | .aio_buf = (unsigned long) aio->data, | ||
93 | .aio_nbytes = aio->len, | ||
94 | .aio_offset = aio->offset, | ||
95 | .aio_reserved1 = 0, | ||
96 | .aio_reserved2 = 0, | ||
97 | .aio_reserved3 = 0 }); | ||
98 | |||
99 | switch(aio->type){ | ||
100 | case AIO_READ: | ||
101 | iocb.aio_lio_opcode = IOCB_CMD_PREAD; | ||
102 | break; | ||
103 | case AIO_WRITE: | ||
104 | iocb.aio_lio_opcode = IOCB_CMD_PWRITE; | ||
105 | break; | ||
106 | case AIO_MMAP: | ||
107 | iocb.aio_lio_opcode = IOCB_CMD_PREAD; | ||
108 | iocb.aio_buf = (unsigned long) &c; | ||
109 | iocb.aio_nbytes = sizeof(c); | ||
110 | break; | ||
111 | default: | ||
112 | printk("Bogus op in do_aio - %d\n", aio->type); | ||
113 | err = -EINVAL; | ||
114 | goto out; | ||
115 | } | ||
116 | |||
117 | err = io_submit(ctx, 1, &iocbp); | ||
118 | if(err > 0) | ||
119 | err = 0; | ||
120 | |||
121 | out: | ||
122 | return err; | ||
123 | } | ||
124 | |||
125 | static aio_context_t ctx = 0; | ||
126 | |||
127 | static int aio_thread(void *arg) | ||
128 | { | ||
129 | struct aio_thread_reply reply; | ||
130 | struct aio_context *aio; | ||
131 | struct io_event event; | ||
132 | int err, n; | ||
133 | |||
134 | signal(SIGWINCH, SIG_IGN); | ||
135 | |||
136 | while(1){ | ||
137 | n = io_getevents(ctx, 1, 1, &event, NULL); | ||
138 | if(n < 0){ | ||
139 | if(errno == EINTR) | ||
140 | continue; | ||
141 | printk("aio_thread - io_getevents failed, " | ||
142 | "errno = %d\n", errno); | ||
143 | } | ||
144 | else { | ||
145 | aio = (struct aio_context *) event.data; | ||
146 | if(update_aio(aio, event.res)){ | ||
147 | do_aio(ctx, aio); | ||
148 | continue; | ||
149 | } | ||
150 | |||
151 | reply = ((struct aio_thread_reply) | ||
152 | { .data = aio, | ||
153 | .err = aio->len }); | ||
154 | err = os_write_file(aio->reply_fd, &reply, | ||
155 | sizeof(reply)); | ||
156 | if(err != sizeof(reply)) | ||
157 | printk("aio_thread - write failed, " | ||
158 | "fd = %d, err = %d\n", aio->reply_fd, | ||
159 | -err); | ||
160 | } | ||
161 | } | ||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | #endif | ||
166 | |||
167 | static int do_not_aio(struct aio_context *aio) | ||
168 | { | ||
169 | char c; | ||
170 | int err; | ||
171 | |||
172 | switch(aio->type){ | ||
173 | case AIO_READ: | ||
174 | err = os_seek_file(aio->fd, aio->offset); | ||
175 | if(err) | ||
176 | goto out; | ||
177 | |||
178 | err = os_read_file(aio->fd, aio->data, aio->len); | ||
179 | break; | ||
180 | case AIO_WRITE: | ||
181 | err = os_seek_file(aio->fd, aio->offset); | ||
182 | if(err) | ||
183 | goto out; | ||
184 | |||
185 | err = os_write_file(aio->fd, aio->data, aio->len); | ||
186 | break; | ||
187 | case AIO_MMAP: | ||
188 | err = os_seek_file(aio->fd, aio->offset); | ||
189 | if(err) | ||
190 | goto out; | ||
191 | |||
192 | err = os_read_file(aio->fd, &c, sizeof(c)); | ||
193 | break; | ||
194 | default: | ||
195 | printk("do_not_aio - bad request type : %d\n", aio->type); | ||
196 | err = -EINVAL; | ||
197 | break; | ||
198 | } | ||
199 | |||
200 | out: | ||
201 | return err; | ||
202 | } | ||
203 | |||
204 | static int not_aio_thread(void *arg) | ||
205 | { | ||
206 | struct aio_context *aio; | ||
207 | struct aio_thread_reply reply; | ||
208 | int err; | ||
209 | |||
210 | signal(SIGWINCH, SIG_IGN); | ||
211 | while(1){ | ||
212 | err = os_read_file(aio_req_fd_r, &aio, sizeof(aio)); | ||
213 | if(err != sizeof(aio)){ | ||
214 | if(err < 0) | ||
215 | printk("not_aio_thread - read failed, " | ||
216 | "fd = %d, err = %d\n", aio_req_fd_r, | ||
217 | -err); | ||
218 | else { | ||
219 | printk("not_aio_thread - short read, fd = %d, " | ||
220 | "length = %d\n", aio_req_fd_r, err); | ||
221 | } | ||
222 | continue; | ||
223 | } | ||
224 | again: | ||
225 | err = do_not_aio(aio); | ||
226 | |||
227 | if(update_aio(aio, err)) | ||
228 | goto again; | ||
229 | |||
230 | reply = ((struct aio_thread_reply) { .data = aio, | ||
231 | .err = aio->len }); | ||
232 | err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); | ||
233 | if(err != sizeof(reply)) | ||
234 | printk("not_aio_thread - write failed, fd = %d, " | ||
235 | "err = %d\n", aio_req_fd_r, -err); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | static int submit_aio_24(struct aio_context *aio) | ||
240 | { | ||
241 | int err; | ||
242 | |||
243 | err = os_write_file(aio_req_fd_w, &aio, sizeof(aio)); | ||
244 | if(err == sizeof(aio)) | ||
245 | err = 0; | ||
246 | |||
247 | return err; | ||
248 | } | ||
249 | |||
250 | static int aio_pid = -1; | ||
251 | static int (*submit_proc)(struct aio_context *aio); | ||
252 | |||
253 | static int init_aio_24(void) | ||
254 | { | ||
255 | unsigned long stack; | ||
256 | int fds[2], err; | ||
257 | |||
258 | err = os_pipe(fds, 1, 1); | ||
259 | if(err) | ||
260 | goto out; | ||
261 | |||
262 | aio_req_fd_w = fds[0]; | ||
263 | aio_req_fd_r = fds[1]; | ||
264 | err = run_helper_thread(not_aio_thread, NULL, | ||
265 | CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); | ||
266 | if(err < 0) | ||
267 | goto out_close_pipe; | ||
268 | |||
269 | aio_pid = err; | ||
270 | goto out; | ||
271 | |||
272 | out_close_pipe: | ||
273 | os_close_file(fds[0]); | ||
274 | os_close_file(fds[1]); | ||
275 | aio_req_fd_w = -1; | ||
276 | aio_req_fd_r = -1; | ||
277 | out: | ||
278 | #ifndef HAVE_AIO_ABI | ||
279 | printk("/usr/include/linux/aio_abi.h not present during build\n"); | ||
280 | #endif | ||
281 | printk("2.6 host AIO support not used - falling back to I/O " | ||
282 | "thread\n"); | ||
283 | |||
284 | submit_proc = submit_aio_24; | ||
285 | |||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | #ifdef HAVE_AIO_ABI | ||
290 | #define DEFAULT_24_AIO 0 | ||
291 | static int submit_aio_26(struct aio_context *aio) | ||
292 | { | ||
293 | struct aio_thread_reply reply; | ||
294 | int err; | ||
295 | |||
296 | err = do_aio(ctx, aio); | ||
297 | if(err){ | ||
298 | reply = ((struct aio_thread_reply) { .data = aio, | ||
299 | .err = err }); | ||
300 | err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); | ||
301 | if(err != sizeof(reply)) | ||
302 | printk("submit_aio_26 - write failed, " | ||
303 | "fd = %d, err = %d\n", aio->reply_fd, -err); | ||
304 | else err = 0; | ||
305 | } | ||
306 | |||
307 | return err; | ||
308 | } | ||
309 | |||
310 | static int init_aio_26(void) | ||
311 | { | ||
312 | unsigned long stack; | ||
313 | int err; | ||
314 | |||
315 | if(io_setup(256, &ctx)){ | ||
316 | printk("aio_thread failed to initialize context, err = %d\n", | ||
317 | errno); | ||
318 | return -errno; | ||
319 | } | ||
320 | |||
321 | err = run_helper_thread(aio_thread, NULL, | ||
322 | CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); | ||
323 | if(err < 0) | ||
324 | return -errno; | ||
325 | |||
326 | aio_pid = err; | ||
327 | |||
328 | printk("Using 2.6 host AIO\n"); | ||
329 | |||
330 | submit_proc = submit_aio_26; | ||
331 | |||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | #else | ||
336 | #define DEFAULT_24_AIO 1 | ||
337 | static int submit_aio_26(struct aio_context *aio) | ||
338 | { | ||
339 | return -ENOSYS; | ||
340 | } | ||
341 | |||
342 | static int init_aio_26(void) | ||
343 | { | ||
344 | submit_proc = submit_aio_26; | ||
345 | return -ENOSYS; | ||
346 | } | ||
347 | #endif | ||
348 | |||
349 | static int aio_24 = DEFAULT_24_AIO; | ||
350 | |||
351 | static int __init set_aio_24(char *name, int *add) | ||
352 | { | ||
353 | aio_24 = 1; | ||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | __uml_setup("aio=2.4", set_aio_24, | ||
358 | "aio=2.4\n" | ||
359 | " This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n" | ||
360 | " available. 2.4 AIO is a single thread that handles one request at a\n" | ||
361 | " time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n" | ||
362 | " interface to handle an arbitrary number of pending requests. 2.6 AIO \n" | ||
363 | " is not available in tt mode, on 2.4 hosts, or when UML is built with\n" | ||
364 | " /usr/include/linux/aio_abi.h not available. Many distributions don't\n" | ||
365 | " include aio_abi.h, so you will need to copy it from a kernel tree to\n" | ||
366 | " your /usr/include/linux in order to build an AIO-capable UML\n\n" | ||
367 | ); | ||
368 | |||
369 | static int init_aio(void) | ||
370 | { | ||
371 | int err; | ||
372 | |||
373 | CHOOSE_MODE(({ | ||
374 | if(!aio_24){ | ||
375 | printk("Disabling 2.6 AIO in tt mode\n"); | ||
376 | aio_24 = 1; | ||
377 | } }), (void) 0); | ||
378 | |||
379 | if(!aio_24){ | ||
380 | err = init_aio_26(); | ||
381 | if(err && (errno == ENOSYS)){ | ||
382 | printk("2.6 AIO not supported on the host - " | ||
383 | "reverting to 2.4 AIO\n"); | ||
384 | aio_24 = 1; | ||
385 | } | ||
386 | else return err; | ||
387 | } | ||
388 | |||
389 | if(aio_24) | ||
390 | return init_aio_24(); | ||
391 | |||
392 | return 0; | ||
393 | } | ||
394 | |||
395 | /* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio | ||
396 | * needs to be called when the kernel is running because it calls run_helper, | ||
397 | * which needs get_free_page. exit_aio is a __uml_exitcall because the generic | ||
398 | * kernel does not run __exitcalls on shutdown, and can't because many of them | ||
399 | * break when called outside of module unloading. | ||
400 | */ | ||
401 | __initcall(init_aio); | ||
402 | |||
403 | static void exit_aio(void) | ||
404 | { | ||
405 | if(aio_pid != -1) | ||
406 | os_kill_process(aio_pid, 1); | ||
407 | } | ||
408 | |||
409 | __uml_exitcall(exit_aio); | ||
410 | |||
411 | int submit_aio(struct aio_context *aio) | ||
412 | { | ||
413 | return (*submit_proc)(aio); | ||
414 | } | ||