diff options
Diffstat (limited to 'fs/aio.c')
-rw-r--r-- | fs/aio.c | 166 |
1 files changed, 118 insertions, 48 deletions
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/aio_abi.h> | 15 | #include <linux/aio_abi.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/syscalls.h> | 17 | #include <linux/syscalls.h> |
18 | #include <linux/uio.h> | ||
18 | 19 | ||
19 | #define DEBUG 0 | 20 | #define DEBUG 0 |
20 | 21 | ||
@@ -414,6 +415,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) | |||
414 | req->ki_retry = NULL; | 415 | req->ki_retry = NULL; |
415 | req->ki_dtor = NULL; | 416 | req->ki_dtor = NULL; |
416 | req->private = NULL; | 417 | req->private = NULL; |
418 | req->ki_iovec = NULL; | ||
417 | INIT_LIST_HEAD(&req->ki_run_list); | 419 | INIT_LIST_HEAD(&req->ki_run_list); |
418 | 420 | ||
419 | /* Check if the completion queue has enough free space to | 421 | /* Check if the completion queue has enough free space to |
@@ -459,6 +461,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) | |||
459 | 461 | ||
460 | if (req->ki_dtor) | 462 | if (req->ki_dtor) |
461 | req->ki_dtor(req); | 463 | req->ki_dtor(req); |
464 | if (req->ki_iovec != &req->ki_inline_vec) | ||
465 | kfree(req->ki_iovec); | ||
462 | kmem_cache_free(kiocb_cachep, req); | 466 | kmem_cache_free(kiocb_cachep, req); |
463 | ctx->reqs_active--; | 467 | ctx->reqs_active--; |
464 | 468 | ||
@@ -1300,63 +1304,63 @@ asmlinkage long sys_io_destroy(aio_context_t ctx) | |||
1300 | return -EINVAL; | 1304 | return -EINVAL; |
1301 | } | 1305 | } |
1302 | 1306 | ||
1303 | /* | 1307 | static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret) |
1304 | * aio_p{read,write} are the default ki_retry methods for | ||
1305 | * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially | ||
1306 | * multiple calls to f_op->aio_read(). They loop around partial progress | ||
1307 | * instead of returning -EIOCBRETRY because they don't have the means to call | ||
1308 | * kick_iocb(). | ||
1309 | */ | ||
1310 | static ssize_t aio_pread(struct kiocb *iocb) | ||
1311 | { | 1308 | { |
1312 | struct file *file = iocb->ki_filp; | 1309 | struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg]; |
1313 | struct address_space *mapping = file->f_mapping; | 1310 | |
1314 | struct inode *inode = mapping->host; | 1311 | BUG_ON(ret <= 0); |
1315 | ssize_t ret = 0; | 1312 | |
1316 | 1313 | while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) { | |
1317 | do { | 1314 | ssize_t this = min((ssize_t)iov->iov_len, ret); |
1318 | ret = file->f_op->aio_read(iocb, iocb->ki_buf, | 1315 | iov->iov_base += this; |
1319 | iocb->ki_left, iocb->ki_pos); | 1316 | iov->iov_len -= this; |
1320 | /* | 1317 | iocb->ki_left -= this; |
1321 | * Can't just depend on iocb->ki_left to determine | 1318 | ret -= this; |
1322 | * whether we are done. This may have been a short read. | 1319 | if (iov->iov_len == 0) { |
1323 | */ | 1320 | iocb->ki_cur_seg++; |
1324 | if (ret > 0) { | 1321 | iov++; |
1325 | iocb->ki_buf += ret; | ||
1326 | iocb->ki_left -= ret; | ||
1327 | } | 1322 | } |
1323 | } | ||
1328 | 1324 | ||
1329 | /* | 1325 | /* the caller should not have done more io than what fit in |
1330 | * For pipes and sockets we return once we have some data; for | 1326 | * the remaining iovecs */ |
1331 | * regular files we retry till we complete the entire read or | 1327 | BUG_ON(ret > 0 && iocb->ki_left == 0); |
1332 | * find that we can't read any more data (e.g short reads). | ||
1333 | */ | ||
1334 | } while (ret > 0 && iocb->ki_left > 0 && | ||
1335 | !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)); | ||
1336 | |||
1337 | /* This means we must have transferred all that we could */ | ||
1338 | /* No need to retry anymore */ | ||
1339 | if ((ret == 0) || (iocb->ki_left == 0)) | ||
1340 | ret = iocb->ki_nbytes - iocb->ki_left; | ||
1341 | |||
1342 | return ret; | ||
1343 | } | 1328 | } |
1344 | 1329 | ||
1345 | /* see aio_pread() */ | 1330 | static ssize_t aio_rw_vect_retry(struct kiocb *iocb) |
1346 | static ssize_t aio_pwrite(struct kiocb *iocb) | ||
1347 | { | 1331 | { |
1348 | struct file *file = iocb->ki_filp; | 1332 | struct file *file = iocb->ki_filp; |
1333 | struct address_space *mapping = file->f_mapping; | ||
1334 | struct inode *inode = mapping->host; | ||
1335 | ssize_t (*rw_op)(struct kiocb *, const struct iovec *, | ||
1336 | unsigned long, loff_t); | ||
1349 | ssize_t ret = 0; | 1337 | ssize_t ret = 0; |
1338 | unsigned short opcode; | ||
1339 | |||
1340 | if ((iocb->ki_opcode == IOCB_CMD_PREADV) || | ||
1341 | (iocb->ki_opcode == IOCB_CMD_PREAD)) { | ||
1342 | rw_op = file->f_op->aio_read; | ||
1343 | opcode = IOCB_CMD_PREADV; | ||
1344 | } else { | ||
1345 | rw_op = file->f_op->aio_write; | ||
1346 | opcode = IOCB_CMD_PWRITEV; | ||
1347 | } | ||
1350 | 1348 | ||
1351 | do { | 1349 | do { |
1352 | ret = file->f_op->aio_write(iocb, iocb->ki_buf, | 1350 | ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], |
1353 | iocb->ki_left, iocb->ki_pos); | 1351 | iocb->ki_nr_segs - iocb->ki_cur_seg, |
1354 | if (ret > 0) { | 1352 | iocb->ki_pos); |
1355 | iocb->ki_buf += ret; | 1353 | if (ret > 0) |
1356 | iocb->ki_left -= ret; | 1354 | aio_advance_iovec(iocb, ret); |
1357 | } | 1355 | |
1358 | } while (ret > 0 && iocb->ki_left > 0); | 1356 | /* retry all partial writes. retry partial reads as long as its a |
1357 | * regular file. */ | ||
1358 | } while (ret > 0 && iocb->ki_left > 0 && | ||
1359 | (opcode == IOCB_CMD_PWRITEV || | ||
1360 | (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); | ||
1359 | 1361 | ||
1362 | /* This means we must have transferred all that we could */ | ||
1363 | /* No need to retry anymore */ | ||
1360 | if ((ret == 0) || (iocb->ki_left == 0)) | 1364 | if ((ret == 0) || (iocb->ki_left == 0)) |
1361 | ret = iocb->ki_nbytes - iocb->ki_left; | 1365 | ret = iocb->ki_nbytes - iocb->ki_left; |
1362 | 1366 | ||
@@ -1383,6 +1387,38 @@ static ssize_t aio_fsync(struct kiocb *iocb) | |||
1383 | return ret; | 1387 | return ret; |
1384 | } | 1388 | } |
1385 | 1389 | ||
1390 | static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb) | ||
1391 | { | ||
1392 | ssize_t ret; | ||
1393 | |||
1394 | ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf, | ||
1395 | kiocb->ki_nbytes, 1, | ||
1396 | &kiocb->ki_inline_vec, &kiocb->ki_iovec); | ||
1397 | if (ret < 0) | ||
1398 | goto out; | ||
1399 | |||
1400 | kiocb->ki_nr_segs = kiocb->ki_nbytes; | ||
1401 | kiocb->ki_cur_seg = 0; | ||
1402 | /* ki_nbytes/left now reflect bytes instead of segs */ | ||
1403 | kiocb->ki_nbytes = ret; | ||
1404 | kiocb->ki_left = ret; | ||
1405 | |||
1406 | ret = 0; | ||
1407 | out: | ||
1408 | return ret; | ||
1409 | } | ||
1410 | |||
1411 | static ssize_t aio_setup_single_vector(struct kiocb *kiocb) | ||
1412 | { | ||
1413 | kiocb->ki_iovec = &kiocb->ki_inline_vec; | ||
1414 | kiocb->ki_iovec->iov_base = kiocb->ki_buf; | ||
1415 | kiocb->ki_iovec->iov_len = kiocb->ki_left; | ||
1416 | kiocb->ki_nr_segs = 1; | ||
1417 | kiocb->ki_cur_seg = 0; | ||
1418 | kiocb->ki_nbytes = kiocb->ki_left; | ||
1419 | return 0; | ||
1420 | } | ||
1421 | |||
1386 | /* | 1422 | /* |
1387 | * aio_setup_iocb: | 1423 | * aio_setup_iocb: |
1388 | * Performs the initial checks and aio retry method | 1424 | * Performs the initial checks and aio retry method |
@@ -1405,9 +1441,12 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) | |||
1405 | ret = security_file_permission(file, MAY_READ); | 1441 | ret = security_file_permission(file, MAY_READ); |
1406 | if (unlikely(ret)) | 1442 | if (unlikely(ret)) |
1407 | break; | 1443 | break; |
1444 | ret = aio_setup_single_vector(kiocb); | ||
1445 | if (ret) | ||
1446 | break; | ||
1408 | ret = -EINVAL; | 1447 | ret = -EINVAL; |
1409 | if (file->f_op->aio_read) | 1448 | if (file->f_op->aio_read) |
1410 | kiocb->ki_retry = aio_pread; | 1449 | kiocb->ki_retry = aio_rw_vect_retry; |
1411 | break; | 1450 | break; |
1412 | case IOCB_CMD_PWRITE: | 1451 | case IOCB_CMD_PWRITE: |
1413 | ret = -EBADF; | 1452 | ret = -EBADF; |
@@ -1420,9 +1459,40 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) | |||
1420 | ret = security_file_permission(file, MAY_WRITE); | 1459 | ret = security_file_permission(file, MAY_WRITE); |
1421 | if (unlikely(ret)) | 1460 | if (unlikely(ret)) |
1422 | break; | 1461 | break; |
1462 | ret = aio_setup_single_vector(kiocb); | ||
1463 | if (ret) | ||
1464 | break; | ||
1465 | ret = -EINVAL; | ||
1466 | if (file->f_op->aio_write) | ||
1467 | kiocb->ki_retry = aio_rw_vect_retry; | ||
1468 | break; | ||
1469 | case IOCB_CMD_PREADV: | ||
1470 | ret = -EBADF; | ||
1471 | if (unlikely(!(file->f_mode & FMODE_READ))) | ||
1472 | break; | ||
1473 | ret = security_file_permission(file, MAY_READ); | ||
1474 | if (unlikely(ret)) | ||
1475 | break; | ||
1476 | ret = aio_setup_vectored_rw(READ, kiocb); | ||
1477 | if (ret) | ||
1478 | break; | ||
1479 | ret = -EINVAL; | ||
1480 | if (file->f_op->aio_read) | ||
1481 | kiocb->ki_retry = aio_rw_vect_retry; | ||
1482 | break; | ||
1483 | case IOCB_CMD_PWRITEV: | ||
1484 | ret = -EBADF; | ||
1485 | if (unlikely(!(file->f_mode & FMODE_WRITE))) | ||
1486 | break; | ||
1487 | ret = security_file_permission(file, MAY_WRITE); | ||
1488 | if (unlikely(ret)) | ||
1489 | break; | ||
1490 | ret = aio_setup_vectored_rw(WRITE, kiocb); | ||
1491 | if (ret) | ||
1492 | break; | ||
1423 | ret = -EINVAL; | 1493 | ret = -EINVAL; |
1424 | if (file->f_op->aio_write) | 1494 | if (file->f_op->aio_write) |
1425 | kiocb->ki_retry = aio_pwrite; | 1495 | kiocb->ki_retry = aio_rw_vect_retry; |
1426 | break; | 1496 | break; |
1427 | case IOCB_CMD_FDSYNC: | 1497 | case IOCB_CMD_FDSYNC: |
1428 | ret = -EINVAL; | 1498 | ret = -EINVAL; |