diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 84 |
1 files changed, 38 insertions, 46 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 427f4690ad6d..28f116bdc405 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3468,7 +3468,7 @@ out: | |||
3468 | return ret; | 3468 | return ret; |
3469 | } | 3469 | } |
3470 | 3470 | ||
3471 | static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, | 3471 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
3472 | struct buffer_head *bh_result, int create) | 3472 | struct buffer_head *bh_result, int create) |
3473 | { | 3473 | { |
3474 | handle_t *handle = NULL; | 3474 | handle_t *handle = NULL; |
@@ -3476,28 +3476,14 @@ static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, | |||
3476 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 3476 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
3477 | int dio_credits; | 3477 | int dio_credits; |
3478 | 3478 | ||
3479 | ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", | 3479 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
3480 | inode->i_ino, create); | 3480 | inode->i_ino, create); |
3481 | /* | 3481 | /* |
3482 | * DIO VFS code passes create = 0 flag for write to | 3482 | * ext4_get_block in prepare for a DIO write or buffer write. |
3483 | * the middle of file. It does this to avoid block | 3483 | * We allocate an uinitialized extent if blocks haven't been allocated. |
3484 | * allocation for holes, to prevent expose stale data | 3484 | * The extent will be converted to initialized after IO complete. |
3485 | * out when there is parallel buffered read (which does | ||
3486 | * not hold the i_mutex lock) while direct IO write has | ||
3487 | * not completed. DIO request on holes finally falls back | ||
3488 | * to buffered IO for this reason. | ||
3489 | * | ||
3490 | * For ext4 extent based file, since we support fallocate, | ||
3491 | * new allocated extent as uninitialized, for holes, we | ||
3492 | * could fallocate blocks for holes, thus parallel | ||
3493 | * buffered IO read will zero out the page when read on | ||
3494 | * a hole while parallel DIO write to the hole has not completed. | ||
3495 | * | ||
3496 | * when we come here, we know it's a direct IO write to | ||
3497 | * to the middle of file (<i_size) | ||
3498 | * so it's safe to override the create flag from VFS. | ||
3499 | */ | 3485 | */ |
3500 | create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; | 3486 | create = EXT4_GET_BLOCKS_IO_CREATE_EXT; |
3501 | 3487 | ||
3502 | if (max_blocks > DIO_MAX_BLOCKS) | 3488 | if (max_blocks > DIO_MAX_BLOCKS) |
3503 | max_blocks = DIO_MAX_BLOCKS; | 3489 | max_blocks = DIO_MAX_BLOCKS; |
@@ -3524,19 +3510,20 @@ static void ext4_free_io_end(ext4_io_end_t *io) | |||
3524 | iput(io->inode); | 3510 | iput(io->inode); |
3525 | kfree(io); | 3511 | kfree(io); |
3526 | } | 3512 | } |
3527 | static void dump_aio_dio_list(struct inode * inode) | 3513 | |
3514 | static void dump_completed_IO(struct inode * inode) | ||
3528 | { | 3515 | { |
3529 | #ifdef EXT4_DEBUG | 3516 | #ifdef EXT4_DEBUG |
3530 | struct list_head *cur, *before, *after; | 3517 | struct list_head *cur, *before, *after; |
3531 | ext4_io_end_t *io, *io0, *io1; | 3518 | ext4_io_end_t *io, *io0, *io1; |
3532 | 3519 | ||
3533 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3520 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ |
3534 | ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); | 3521 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); |
3535 | return; | 3522 | return; |
3536 | } | 3523 | } |
3537 | 3524 | ||
3538 | ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); | 3525 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); |
3539 | list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ | 3526 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ |
3540 | cur = &io->list; | 3527 | cur = &io->list; |
3541 | before = cur->prev; | 3528 | before = cur->prev; |
3542 | io0 = container_of(before, ext4_io_end_t, list); | 3529 | io0 = container_of(before, ext4_io_end_t, list); |
@@ -3552,21 +3539,21 @@ static void dump_aio_dio_list(struct inode * inode) | |||
3552 | /* | 3539 | /* |
3553 | * check a range of space and convert unwritten extents to written. | 3540 | * check a range of space and convert unwritten extents to written. |
3554 | */ | 3541 | */ |
3555 | static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | 3542 | static int ext4_end_io_nolock(ext4_io_end_t *io) |
3556 | { | 3543 | { |
3557 | struct inode *inode = io->inode; | 3544 | struct inode *inode = io->inode; |
3558 | loff_t offset = io->offset; | 3545 | loff_t offset = io->offset; |
3559 | ssize_t size = io->size; | 3546 | ssize_t size = io->size; |
3560 | int ret = 0; | 3547 | int ret = 0; |
3561 | 3548 | ||
3562 | ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," | 3549 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
3563 | "list->prev 0x%p\n", | 3550 | "list->prev 0x%p\n", |
3564 | io, inode->i_ino, io->list.next, io->list.prev); | 3551 | io, inode->i_ino, io->list.next, io->list.prev); |
3565 | 3552 | ||
3566 | if (list_empty(&io->list)) | 3553 | if (list_empty(&io->list)) |
3567 | return ret; | 3554 | return ret; |
3568 | 3555 | ||
3569 | if (io->flag != DIO_AIO_UNWRITTEN) | 3556 | if (io->flag != EXT4_IO_UNWRITTEN) |
3570 | return ret; | 3557 | return ret; |
3571 | 3558 | ||
3572 | if (offset + size <= i_size_read(inode)) | 3559 | if (offset + size <= i_size_read(inode)) |
@@ -3584,17 +3571,18 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | |||
3584 | io->flag = 0; | 3571 | io->flag = 0; |
3585 | return ret; | 3572 | return ret; |
3586 | } | 3573 | } |
3574 | |||
3587 | /* | 3575 | /* |
3588 | * work on completed aio dio IO, to convert unwritten extents to extents | 3576 | * work on completed aio dio IO, to convert unwritten extents to extents |
3589 | */ | 3577 | */ |
3590 | static void ext4_end_aio_dio_work(struct work_struct *work) | 3578 | static void ext4_end_io_work(struct work_struct *work) |
3591 | { | 3579 | { |
3592 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | 3580 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); |
3593 | struct inode *inode = io->inode; | 3581 | struct inode *inode = io->inode; |
3594 | int ret = 0; | 3582 | int ret = 0; |
3595 | 3583 | ||
3596 | mutex_lock(&inode->i_mutex); | 3584 | mutex_lock(&inode->i_mutex); |
3597 | ret = ext4_end_aio_dio_nolock(io); | 3585 | ret = ext4_end_io_nolock(io); |
3598 | if (ret >= 0) { | 3586 | if (ret >= 0) { |
3599 | if (!list_empty(&io->list)) | 3587 | if (!list_empty(&io->list)) |
3600 | list_del_init(&io->list); | 3588 | list_del_init(&io->list); |
@@ -3602,32 +3590,35 @@ static void ext4_end_aio_dio_work(struct work_struct *work) | |||
3602 | } | 3590 | } |
3603 | mutex_unlock(&inode->i_mutex); | 3591 | mutex_unlock(&inode->i_mutex); |
3604 | } | 3592 | } |
3593 | |||
3605 | /* | 3594 | /* |
3606 | * This function is called from ext4_sync_file(). | 3595 | * This function is called from ext4_sync_file(). |
3607 | * | 3596 | * |
3608 | * When AIO DIO IO is completed, the work to convert unwritten | 3597 | * When IO is completed, the work to convert unwritten extents to |
3609 | * extents to written is queued on workqueue but may not get immediately | 3598 | * written is queued on workqueue but may not get immediately |
3610 | * scheduled. When fsync is called, we need to ensure the | 3599 | * scheduled. When fsync is called, we need to ensure the |
3611 | * conversion is complete before fsync returns. | 3600 | * conversion is complete before fsync returns. |
3612 | * The inode keeps track of a list of completed AIO from DIO path | 3601 | * The inode keeps track of a list of pending/completed IO that |
3613 | * that might needs to do the conversion. This function walks through | 3602 | * might needs to do the conversion. This function walks through |
3614 | * the list and convert the related unwritten extents to written. | 3603 | * the list and convert the related unwritten extents for completed IO |
3604 | * to written. | ||
3605 | * The function return the number of pending IOs on success. | ||
3615 | */ | 3606 | */ |
3616 | int flush_aio_dio_completed_IO(struct inode *inode) | 3607 | int flush_completed_IO(struct inode *inode) |
3617 | { | 3608 | { |
3618 | ext4_io_end_t *io; | 3609 | ext4_io_end_t *io; |
3619 | int ret = 0; | 3610 | int ret = 0; |
3620 | int ret2 = 0; | 3611 | int ret2 = 0; |
3621 | 3612 | ||
3622 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) | 3613 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)) |
3623 | return ret; | 3614 | return ret; |
3624 | 3615 | ||
3625 | dump_aio_dio_list(inode); | 3616 | dump_completed_IO(inode); |
3626 | while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3617 | while (!list_empty(&EXT4_I(inode)->i_completed_io_list)){ |
3627 | io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, | 3618 | io = list_entry(EXT4_I(inode)->i_completed_io_list.next, |
3628 | ext4_io_end_t, list); | 3619 | ext4_io_end_t, list); |
3629 | /* | 3620 | /* |
3630 | * Calling ext4_end_aio_dio_nolock() to convert completed | 3621 | * Calling ext4_end_io_nolock() to convert completed |
3631 | * IO to written. | 3622 | * IO to written. |
3632 | * | 3623 | * |
3633 | * When ext4_sync_file() is called, run_queue() may already | 3624 | * When ext4_sync_file() is called, run_queue() may already |
@@ -3640,7 +3631,7 @@ int flush_aio_dio_completed_IO(struct inode *inode) | |||
3640 | * avoid double converting from both fsync and background work | 3631 | * avoid double converting from both fsync and background work |
3641 | * queue work. | 3632 | * queue work. |
3642 | */ | 3633 | */ |
3643 | ret = ext4_end_aio_dio_nolock(io); | 3634 | ret = ext4_end_io_nolock(io); |
3644 | if (ret < 0) | 3635 | if (ret < 0) |
3645 | ret2 = ret; | 3636 | ret2 = ret; |
3646 | else | 3637 | else |
@@ -3662,7 +3653,7 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode) | |||
3662 | io->offset = 0; | 3653 | io->offset = 0; |
3663 | io->size = 0; | 3654 | io->size = 0; |
3664 | io->error = 0; | 3655 | io->error = 0; |
3665 | INIT_WORK(&io->work, ext4_end_aio_dio_work); | 3656 | INIT_WORK(&io->work, ext4_end_io_work); |
3666 | INIT_LIST_HEAD(&io->list); | 3657 | INIT_LIST_HEAD(&io->list); |
3667 | } | 3658 | } |
3668 | 3659 | ||
@@ -3685,7 +3676,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3685 | size); | 3676 | size); |
3686 | 3677 | ||
3687 | /* if not aio dio with unwritten extents, just free io and return */ | 3678 | /* if not aio dio with unwritten extents, just free io and return */ |
3688 | if (io_end->flag != DIO_AIO_UNWRITTEN){ | 3679 | if (io_end->flag != EXT4_IO_UNWRITTEN){ |
3689 | ext4_free_io_end(io_end); | 3680 | ext4_free_io_end(io_end); |
3690 | iocb->private = NULL; | 3681 | iocb->private = NULL; |
3691 | return; | 3682 | return; |
@@ -3700,9 +3691,10 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3700 | 3691 | ||
3701 | /* Add the io_end to per-inode completed aio dio list*/ | 3692 | /* Add the io_end to per-inode completed aio dio list*/ |
3702 | list_add_tail(&io_end->list, | 3693 | list_add_tail(&io_end->list, |
3703 | &EXT4_I(io_end->inode)->i_aio_dio_complete_list); | 3694 | &EXT4_I(io_end->inode)->i_completed_io_list); |
3704 | iocb->private = NULL; | 3695 | iocb->private = NULL; |
3705 | } | 3696 | } |
3697 | |||
3706 | /* | 3698 | /* |
3707 | * For ext4 extent files, ext4 will do direct-io write to holes, | 3699 | * For ext4 extent files, ext4 will do direct-io write to holes, |
3708 | * preallocated extents, and those write extend the file, no need to | 3700 | * preallocated extents, and those write extend the file, no need to |
@@ -3772,7 +3764,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3772 | ret = blockdev_direct_IO(rw, iocb, inode, | 3764 | ret = blockdev_direct_IO(rw, iocb, inode, |
3773 | inode->i_sb->s_bdev, iov, | 3765 | inode->i_sb->s_bdev, iov, |
3774 | offset, nr_segs, | 3766 | offset, nr_segs, |
3775 | ext4_get_block_dio_write, | 3767 | ext4_get_block_write, |
3776 | ext4_end_io_dio); | 3768 | ext4_end_io_dio); |
3777 | if (iocb->private) | 3769 | if (iocb->private) |
3778 | EXT4_I(inode)->cur_aio_dio = NULL; | 3770 | EXT4_I(inode)->cur_aio_dio = NULL; |