aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c84
1 files changed, 38 insertions, 46 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 427f4690ad6d..28f116bdc405 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3468,7 +3468,7 @@ out:
3468 return ret; 3468 return ret;
3469} 3469}
3470 3470
3471static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, 3471static int ext4_get_block_write(struct inode *inode, sector_t iblock,
3472 struct buffer_head *bh_result, int create) 3472 struct buffer_head *bh_result, int create)
3473{ 3473{
3474 handle_t *handle = NULL; 3474 handle_t *handle = NULL;
@@ -3476,28 +3476,14 @@ static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock,
3476 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 3476 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
3477 int dio_credits; 3477 int dio_credits;
3478 3478
3479 ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", 3479 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
3480 inode->i_ino, create); 3480 inode->i_ino, create);
3481 /* 3481 /*
3482 * DIO VFS code passes create = 0 flag for write to 3482 * ext4_get_block in prepare for a DIO write or buffer write.
3483 * the middle of file. It does this to avoid block 3483 * We allocate an uinitialized extent if blocks haven't been allocated.
3484 * allocation for holes, to prevent expose stale data 3484 * The extent will be converted to initialized after IO complete.
3485 * out when there is parallel buffered read (which does
3486 * not hold the i_mutex lock) while direct IO write has
3487 * not completed. DIO request on holes finally falls back
3488 * to buffered IO for this reason.
3489 *
3490 * For ext4 extent based file, since we support fallocate,
3491 * new allocated extent as uninitialized, for holes, we
3492 * could fallocate blocks for holes, thus parallel
3493 * buffered IO read will zero out the page when read on
3494 * a hole while parallel DIO write to the hole has not completed.
3495 *
3496 * when we come here, we know it's a direct IO write to
3497 * to the middle of file (<i_size)
3498 * so it's safe to override the create flag from VFS.
3499 */ 3485 */
3500 create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; 3486 create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
3501 3487
3502 if (max_blocks > DIO_MAX_BLOCKS) 3488 if (max_blocks > DIO_MAX_BLOCKS)
3503 max_blocks = DIO_MAX_BLOCKS; 3489 max_blocks = DIO_MAX_BLOCKS;
@@ -3524,19 +3510,20 @@ static void ext4_free_io_end(ext4_io_end_t *io)
3524 iput(io->inode); 3510 iput(io->inode);
3525 kfree(io); 3511 kfree(io);
3526} 3512}
3527static void dump_aio_dio_list(struct inode * inode) 3513
3514static void dump_completed_IO(struct inode * inode)
3528{ 3515{
3529#ifdef EXT4_DEBUG 3516#ifdef EXT4_DEBUG
3530 struct list_head *cur, *before, *after; 3517 struct list_head *cur, *before, *after;
3531 ext4_io_end_t *io, *io0, *io1; 3518 ext4_io_end_t *io, *io0, *io1;
3532 3519
3533 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3520 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
3534 ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); 3521 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
3535 return; 3522 return;
3536 } 3523 }
3537 3524
3538 ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); 3525 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
3539 list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ 3526 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
3540 cur = &io->list; 3527 cur = &io->list;
3541 before = cur->prev; 3528 before = cur->prev;
3542 io0 = container_of(before, ext4_io_end_t, list); 3529 io0 = container_of(before, ext4_io_end_t, list);
@@ -3552,21 +3539,21 @@ static void dump_aio_dio_list(struct inode * inode)
3552/* 3539/*
3553 * check a range of space and convert unwritten extents to written. 3540 * check a range of space and convert unwritten extents to written.
3554 */ 3541 */
3555static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) 3542static int ext4_end_io_nolock(ext4_io_end_t *io)
3556{ 3543{
3557 struct inode *inode = io->inode; 3544 struct inode *inode = io->inode;
3558 loff_t offset = io->offset; 3545 loff_t offset = io->offset;
3559 ssize_t size = io->size; 3546 ssize_t size = io->size;
3560 int ret = 0; 3547 int ret = 0;
3561 3548
3562 ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," 3549 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3563 "list->prev 0x%p\n", 3550 "list->prev 0x%p\n",
3564 io, inode->i_ino, io->list.next, io->list.prev); 3551 io, inode->i_ino, io->list.next, io->list.prev);
3565 3552
3566 if (list_empty(&io->list)) 3553 if (list_empty(&io->list))
3567 return ret; 3554 return ret;
3568 3555
3569 if (io->flag != DIO_AIO_UNWRITTEN) 3556 if (io->flag != EXT4_IO_UNWRITTEN)
3570 return ret; 3557 return ret;
3571 3558
3572 if (offset + size <= i_size_read(inode)) 3559 if (offset + size <= i_size_read(inode))
@@ -3584,17 +3571,18 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
3584 io->flag = 0; 3571 io->flag = 0;
3585 return ret; 3572 return ret;
3586} 3573}
3574
3587/* 3575/*
3588 * work on completed aio dio IO, to convert unwritten extents to extents 3576 * work on completed aio dio IO, to convert unwritten extents to extents
3589 */ 3577 */
3590static void ext4_end_aio_dio_work(struct work_struct *work) 3578static void ext4_end_io_work(struct work_struct *work)
3591{ 3579{
3592 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 3580 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3593 struct inode *inode = io->inode; 3581 struct inode *inode = io->inode;
3594 int ret = 0; 3582 int ret = 0;
3595 3583
3596 mutex_lock(&inode->i_mutex); 3584 mutex_lock(&inode->i_mutex);
3597 ret = ext4_end_aio_dio_nolock(io); 3585 ret = ext4_end_io_nolock(io);
3598 if (ret >= 0) { 3586 if (ret >= 0) {
3599 if (!list_empty(&io->list)) 3587 if (!list_empty(&io->list))
3600 list_del_init(&io->list); 3588 list_del_init(&io->list);
@@ -3602,32 +3590,35 @@ static void ext4_end_aio_dio_work(struct work_struct *work)
3602 } 3590 }
3603 mutex_unlock(&inode->i_mutex); 3591 mutex_unlock(&inode->i_mutex);
3604} 3592}
3593
3605/* 3594/*
3606 * This function is called from ext4_sync_file(). 3595 * This function is called from ext4_sync_file().
3607 * 3596 *
3608 * When AIO DIO IO is completed, the work to convert unwritten 3597 * When IO is completed, the work to convert unwritten extents to
3609 * extents to written is queued on workqueue but may not get immediately 3598 * written is queued on workqueue but may not get immediately
3610 * scheduled. When fsync is called, we need to ensure the 3599 * scheduled. When fsync is called, we need to ensure the
3611 * conversion is complete before fsync returns. 3600 * conversion is complete before fsync returns.
3612 * The inode keeps track of a list of completed AIO from DIO path 3601 * The inode keeps track of a list of pending/completed IO that
3613 * that might needs to do the conversion. This function walks through 3602 * might needs to do the conversion. This function walks through
3614 * the list and convert the related unwritten extents to written. 3603 * the list and convert the related unwritten extents for completed IO
3604 * to written.
3605 * The function return the number of pending IOs on success.
3615 */ 3606 */
3616int flush_aio_dio_completed_IO(struct inode *inode) 3607int flush_completed_IO(struct inode *inode)
3617{ 3608{
3618 ext4_io_end_t *io; 3609 ext4_io_end_t *io;
3619 int ret = 0; 3610 int ret = 0;
3620 int ret2 = 0; 3611 int ret2 = 0;
3621 3612
3622 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) 3613 if (list_empty(&EXT4_I(inode)->i_completed_io_list))
3623 return ret; 3614 return ret;
3624 3615
3625 dump_aio_dio_list(inode); 3616 dump_completed_IO(inode);
3626 while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3617 while (!list_empty(&EXT4_I(inode)->i_completed_io_list)){
3627 io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, 3618 io = list_entry(EXT4_I(inode)->i_completed_io_list.next,
3628 ext4_io_end_t, list); 3619 ext4_io_end_t, list);
3629 /* 3620 /*
3630 * Calling ext4_end_aio_dio_nolock() to convert completed 3621 * Calling ext4_end_io_nolock() to convert completed
3631 * IO to written. 3622 * IO to written.
3632 * 3623 *
3633 * When ext4_sync_file() is called, run_queue() may already 3624 * When ext4_sync_file() is called, run_queue() may already
@@ -3640,7 +3631,7 @@ int flush_aio_dio_completed_IO(struct inode *inode)
3640 * avoid double converting from both fsync and background work 3631 * avoid double converting from both fsync and background work
3641 * queue work. 3632 * queue work.
3642 */ 3633 */
3643 ret = ext4_end_aio_dio_nolock(io); 3634 ret = ext4_end_io_nolock(io);
3644 if (ret < 0) 3635 if (ret < 0)
3645 ret2 = ret; 3636 ret2 = ret;
3646 else 3637 else
@@ -3662,7 +3653,7 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
3662 io->offset = 0; 3653 io->offset = 0;
3663 io->size = 0; 3654 io->size = 0;
3664 io->error = 0; 3655 io->error = 0;
3665 INIT_WORK(&io->work, ext4_end_aio_dio_work); 3656 INIT_WORK(&io->work, ext4_end_io_work);
3666 INIT_LIST_HEAD(&io->list); 3657 INIT_LIST_HEAD(&io->list);
3667 } 3658 }
3668 3659
@@ -3685,7 +3676,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3685 size); 3676 size);
3686 3677
3687 /* if not aio dio with unwritten extents, just free io and return */ 3678 /* if not aio dio with unwritten extents, just free io and return */
3688 if (io_end->flag != DIO_AIO_UNWRITTEN){ 3679 if (io_end->flag != EXT4_IO_UNWRITTEN){
3689 ext4_free_io_end(io_end); 3680 ext4_free_io_end(io_end);
3690 iocb->private = NULL; 3681 iocb->private = NULL;
3691 return; 3682 return;
@@ -3700,9 +3691,10 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3700 3691
3701 /* Add the io_end to per-inode completed aio dio list*/ 3692 /* Add the io_end to per-inode completed aio dio list*/
3702 list_add_tail(&io_end->list, 3693 list_add_tail(&io_end->list,
3703 &EXT4_I(io_end->inode)->i_aio_dio_complete_list); 3694 &EXT4_I(io_end->inode)->i_completed_io_list);
3704 iocb->private = NULL; 3695 iocb->private = NULL;
3705} 3696}
3697
3706/* 3698/*
3707 * For ext4 extent files, ext4 will do direct-io write to holes, 3699 * For ext4 extent files, ext4 will do direct-io write to holes,
3708 * preallocated extents, and those write extend the file, no need to 3700 * preallocated extents, and those write extend the file, no need to
@@ -3772,7 +3764,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3772 ret = blockdev_direct_IO(rw, iocb, inode, 3764 ret = blockdev_direct_IO(rw, iocb, inode,
3773 inode->i_sb->s_bdev, iov, 3765 inode->i_sb->s_bdev, iov,
3774 offset, nr_segs, 3766 offset, nr_segs,
3775 ext4_get_block_dio_write, 3767 ext4_get_block_write,
3776 ext4_end_io_dio); 3768 ext4_end_io_dio);
3777 if (iocb->private) 3769 if (iocb->private)
3778 EXT4_I(inode)->cur_aio_dio = NULL; 3770 EXT4_I(inode)->cur_aio_dio = NULL;