diff options
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 158 | 
1 files changed, 76 insertions, 82 deletions
| diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 13622d5ba068..d24e78f32f3e 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
| @@ -202,23 +202,17 @@ xfs_setfilesize( | |||
| 202 | } | 202 | } | 
| 203 | 203 | ||
| 204 | /* | 204 | /* | 
| 205 | * Schedule IO completion handling on a xfsdatad if this was | 205 | * Schedule IO completion handling on the final put of an ioend. | 
| 206 | * the final hold on this ioend. If we are asked to wait, | ||
| 207 | * flush the workqueue. | ||
| 208 | */ | 206 | */ | 
| 209 | STATIC void | 207 | STATIC void | 
| 210 | xfs_finish_ioend( | 208 | xfs_finish_ioend( | 
| 211 | xfs_ioend_t *ioend, | 209 | struct xfs_ioend *ioend) | 
| 212 | int wait) | ||
| 213 | { | 210 | { | 
| 214 | if (atomic_dec_and_test(&ioend->io_remaining)) { | 211 | if (atomic_dec_and_test(&ioend->io_remaining)) { | 
| 215 | struct workqueue_struct *wq; | 212 | if (ioend->io_type == IO_UNWRITTEN) | 
| 216 | 213 | queue_work(xfsconvertd_workqueue, &ioend->io_work); | |
| 217 | wq = (ioend->io_type == IO_UNWRITTEN) ? | 214 | else | 
| 218 | xfsconvertd_workqueue : xfsdatad_workqueue; | 215 | queue_work(xfsdatad_workqueue, &ioend->io_work); | 
| 219 | queue_work(wq, &ioend->io_work); | ||
| 220 | if (wait) | ||
| 221 | flush_workqueue(wq); | ||
| 222 | } | 216 | } | 
| 223 | } | 217 | } | 
| 224 | 218 | ||
| @@ -262,7 +256,7 @@ xfs_end_io( | |||
| 262 | */ | 256 | */ | 
| 263 | if (error == EAGAIN) { | 257 | if (error == EAGAIN) { | 
| 264 | atomic_inc(&ioend->io_remaining); | 258 | atomic_inc(&ioend->io_remaining); | 
| 265 | xfs_finish_ioend(ioend, 0); | 259 | xfs_finish_ioend(ioend); | 
| 266 | /* ensure we don't spin on blocked ioends */ | 260 | /* ensure we don't spin on blocked ioends */ | 
| 267 | delay(1); | 261 | delay(1); | 
| 268 | } else { | 262 | } else { | 
| @@ -273,6 +267,17 @@ xfs_end_io( | |||
| 273 | } | 267 | } | 
| 274 | 268 | ||
| 275 | /* | 269 | /* | 
| 270 | * Call IO completion handling in caller context on the final put of an ioend. | ||
| 271 | */ | ||
| 272 | STATIC void | ||
| 273 | xfs_finish_ioend_sync( | ||
| 274 | struct xfs_ioend *ioend) | ||
| 275 | { | ||
| 276 | if (atomic_dec_and_test(&ioend->io_remaining)) | ||
| 277 | xfs_end_io(&ioend->io_work); | ||
| 278 | } | ||
| 279 | |||
| 280 | /* | ||
| 276 | * Allocate and initialise an IO completion structure. | 281 | * Allocate and initialise an IO completion structure. | 
| 277 | * We need to track unwritten extent write completion here initially. | 282 | * We need to track unwritten extent write completion here initially. | 
| 278 | * We'll need to extend this for updating the ondisk inode size later | 283 | * We'll need to extend this for updating the ondisk inode size later | 
| @@ -353,7 +358,7 @@ xfs_end_bio( | |||
| 353 | bio->bi_end_io = NULL; | 358 | bio->bi_end_io = NULL; | 
| 354 | bio_put(bio); | 359 | bio_put(bio); | 
| 355 | 360 | ||
| 356 | xfs_finish_ioend(ioend, 0); | 361 | xfs_finish_ioend(ioend); | 
| 357 | } | 362 | } | 
| 358 | 363 | ||
| 359 | STATIC void | 364 | STATIC void | 
| @@ -495,7 +500,7 @@ xfs_submit_ioend( | |||
| 495 | } | 500 | } | 
| 496 | if (bio) | 501 | if (bio) | 
| 497 | xfs_submit_ioend_bio(wbc, ioend, bio); | 502 | xfs_submit_ioend_bio(wbc, ioend, bio); | 
| 498 | xfs_finish_ioend(ioend, 0); | 503 | xfs_finish_ioend(ioend); | 
| 499 | } while ((ioend = next) != NULL); | 504 | } while ((ioend = next) != NULL); | 
| 500 | } | 505 | } | 
| 501 | 506 | ||
| @@ -1406,70 +1411,56 @@ xfs_get_blocks_direct( | |||
| 1406 | return __xfs_get_blocks(inode, iblock, bh_result, create, 1); | 1411 | return __xfs_get_blocks(inode, iblock, bh_result, create, 1); | 
| 1407 | } | 1412 | } | 
| 1408 | 1413 | ||
| 1414 | /* | ||
| 1415 | * Complete a direct I/O write request. | ||
| 1416 | * | ||
| 1417 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | ||
| 1418 | * need to issue a transaction to convert the range from unwritten to written | ||
| 1419 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | ||
| 1420 | * to do this and we are done. But in case this was a successfull AIO | ||
| 1421 | * request this handler is called from interrupt context, from which we | ||
| 1422 | * can't start transactions. In that case offload the I/O completion to | ||
| 1423 | * the workqueues we also use for buffered I/O completion. | ||
| 1424 | */ | ||
| 1409 | STATIC void | 1425 | STATIC void | 
| 1410 | xfs_end_io_direct( | 1426 | xfs_end_io_direct_write( | 
| 1411 | struct kiocb *iocb, | 1427 | struct kiocb *iocb, | 
| 1412 | loff_t offset, | 1428 | loff_t offset, | 
| 1413 | ssize_t size, | 1429 | ssize_t size, | 
| 1414 | void *private, | 1430 | void *private, | 
| 1415 | int ret, | 1431 | int ret, | 
| 1416 | bool is_async) | 1432 | bool is_async) | 
| 1417 | { | 1433 | { | 
| 1418 | xfs_ioend_t *ioend = iocb->private; | 1434 | struct xfs_ioend *ioend = iocb->private; | 
| 1419 | bool complete_aio = is_async; | ||
| 1420 | 1435 | ||
| 1421 | /* | 1436 | /* | 
| 1422 | * Non-NULL private data means we need to issue a transaction to | 1437 | * blockdev_direct_IO can return an error even after the I/O | 
| 1423 | * convert a range from unwritten to written extents. This needs | 1438 | * completion handler was called. Thus we need to protect | 
| 1424 | * to happen from process context but aio+dio I/O completion | 1439 | * against double-freeing. | 
| 1425 | * happens from irq context so we need to defer it to a workqueue. | ||
| 1426 | * This is not necessary for synchronous direct I/O, but we do | ||
| 1427 | * it anyway to keep the code uniform and simpler. | ||
| 1428 | * | ||
| 1429 | * Well, if only it were that simple. Because synchronous direct I/O | ||
| 1430 | * requires extent conversion to occur *before* we return to userspace, | ||
| 1431 | * we have to wait for extent conversion to complete. Look at the | ||
| 1432 | * iocb that has been passed to us to determine if this is AIO or | ||
| 1433 | * not. If it is synchronous, tell xfs_finish_ioend() to kick the | ||
| 1434 | * workqueue and wait for it to complete. | ||
| 1435 | * | ||
| 1436 | * The core direct I/O code might be changed to always call the | ||
| 1437 | * completion handler in the future, in which case all this can | ||
| 1438 | * go away. | ||
| 1439 | */ | 1440 | */ | 
| 1441 | iocb->private = NULL; | ||
| 1442 | |||
| 1440 | ioend->io_offset = offset; | 1443 | ioend->io_offset = offset; | 
| 1441 | ioend->io_size = size; | 1444 | ioend->io_size = size; | 
| 1442 | if (ioend->io_type == IO_READ) { | 1445 | if (private && size > 0) | 
| 1443 | xfs_finish_ioend(ioend, 0); | 1446 | ioend->io_type = IO_UNWRITTEN; | 
| 1444 | } else if (private && size > 0) { | 1447 | |
| 1445 | if (is_async) { | 1448 | if (is_async) { | 
| 1449 | /* | ||
| 1450 | * If we are converting an unwritten extent we need to delay | ||
| 1451 | * the AIO completion until after the unwrittent extent | ||
| 1452 | * conversion has completed, otherwise do it ASAP. | ||
| 1453 | */ | ||
| 1454 | if (ioend->io_type == IO_UNWRITTEN) { | ||
| 1446 | ioend->io_iocb = iocb; | 1455 | ioend->io_iocb = iocb; | 
| 1447 | ioend->io_result = ret; | 1456 | ioend->io_result = ret; | 
| 1448 | complete_aio = false; | ||
| 1449 | xfs_finish_ioend(ioend, 0); | ||
| 1450 | } else { | 1457 | } else { | 
| 1451 | xfs_finish_ioend(ioend, 1); | 1458 | aio_complete(iocb, ret, 0); | 
| 1452 | } | 1459 | } | 
| 1460 | xfs_finish_ioend(ioend); | ||
| 1453 | } else { | 1461 | } else { | 
| 1454 | /* | 1462 | xfs_finish_ioend_sync(ioend); | 
| 1455 | * A direct I/O write ioend starts it's life in unwritten | ||
| 1456 | * state in case they map an unwritten extent. This write | ||
| 1457 | * didn't map an unwritten extent so switch it's completion | ||
| 1458 | * handler. | ||
| 1459 | */ | ||
| 1460 | ioend->io_type = IO_NEW; | ||
| 1461 | xfs_finish_ioend(ioend, 0); | ||
| 1462 | } | 1463 | } | 
| 1463 | |||
| 1464 | /* | ||
| 1465 | * blockdev_direct_IO can return an error even after the I/O | ||
| 1466 | * completion handler was called. Thus we need to protect | ||
| 1467 | * against double-freeing. | ||
| 1468 | */ | ||
| 1469 | iocb->private = NULL; | ||
| 1470 | |||
| 1471 | if (complete_aio) | ||
| 1472 | aio_complete(iocb, ret, 0); | ||
| 1473 | } | 1464 | } | 
| 1474 | 1465 | ||
| 1475 | STATIC ssize_t | 1466 | STATIC ssize_t | 
| @@ -1480,23 +1471,26 @@ xfs_vm_direct_IO( | |||
| 1480 | loff_t offset, | 1471 | loff_t offset, | 
| 1481 | unsigned long nr_segs) | 1472 | unsigned long nr_segs) | 
| 1482 | { | 1473 | { | 
| 1483 | struct file *file = iocb->ki_filp; | 1474 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 
| 1484 | struct inode *inode = file->f_mapping->host; | 1475 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); | 
| 1485 | struct block_device *bdev; | 1476 | ssize_t ret; | 
| 1486 | ssize_t ret; | 1477 | |
| 1487 | 1478 | if (rw & WRITE) { | |
| 1488 | bdev = xfs_find_bdev_for_inode(inode); | 1479 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); | 
| 1489 | 1480 | ||
| 1490 | iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? | 1481 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | 
| 1491 | IO_UNWRITTEN : IO_READ); | 1482 | offset, nr_segs, | 
| 1492 | 1483 | xfs_get_blocks_direct, | |
| 1493 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | 1484 | xfs_end_io_direct_write); | 
| 1494 | offset, nr_segs, | 1485 | if (ret != -EIOCBQUEUED && iocb->private) | 
| 1495 | xfs_get_blocks_direct, | 1486 | xfs_destroy_ioend(iocb->private); | 
| 1496 | xfs_end_io_direct); | 1487 | } else { | 
| 1488 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | ||
| 1489 | offset, nr_segs, | ||
| 1490 | xfs_get_blocks_direct, | ||
| 1491 | NULL); | ||
| 1492 | } | ||
| 1497 | 1493 | ||
| 1498 | if (unlikely(ret != -EIOCBQUEUED && iocb->private)) | ||
| 1499 | xfs_destroy_ioend(iocb->private); | ||
| 1500 | return ret; | 1494 | return ret; | 
| 1501 | } | 1495 | } | 
| 1502 | 1496 | ||
