aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorJiaying Zhang <jiayingz@google.com>2010-03-02 13:28:44 -0500
committerTheodore Ts'o <tytso@mit.edu>2010-03-02 13:28:44 -0500
commitc7064ef13b2181a489836349f9baf87df0dab28f (patch)
tree433d86d9ed4285e5c5e7f24fbbaa7f48fad09381 /fs/ext4/inode.c
parentb8b8afe236e97b6359d46d3a3f8c46455e192271 (diff)
ext4: mechanical rename some of the direct I/O get_block's identifiers
This commit renames some of the direct I/O's block allocation flags, variables, and functions introduced in Mingming's "Direct IO for holes and fallocate" patches so that they can be used by ext4's buffered write path as well. Also changed the related function comments accordingly to cover both direct write and buffered write cases. Signed-off-by: Jiaying Zhang <jiayingz@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c84
1 files changed, 38 insertions, 46 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 427f4690ad6d..28f116bdc405 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3468,7 +3468,7 @@ out:
3468 return ret; 3468 return ret;
3469} 3469}
3470 3470
3471static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, 3471static int ext4_get_block_write(struct inode *inode, sector_t iblock,
3472 struct buffer_head *bh_result, int create) 3472 struct buffer_head *bh_result, int create)
3473{ 3473{
3474 handle_t *handle = NULL; 3474 handle_t *handle = NULL;
@@ -3476,28 +3476,14 @@ static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock,
3476 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 3476 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
3477 int dio_credits; 3477 int dio_credits;
3478 3478
3479 ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", 3479 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
3480 inode->i_ino, create); 3480 inode->i_ino, create);
3481 /* 3481 /*
3482 * DIO VFS code passes create = 0 flag for write to 3482 * ext4_get_block in prepare for a DIO write or buffer write.
3483 * the middle of file. It does this to avoid block 3483 * We allocate an uinitialized extent if blocks haven't been allocated.
3484 * allocation for holes, to prevent expose stale data 3484 * The extent will be converted to initialized after IO complete.
3485 * out when there is parallel buffered read (which does
3486 * not hold the i_mutex lock) while direct IO write has
3487 * not completed. DIO request on holes finally falls back
3488 * to buffered IO for this reason.
3489 *
3490 * For ext4 extent based file, since we support fallocate,
3491 * new allocated extent as uninitialized, for holes, we
3492 * could fallocate blocks for holes, thus parallel
3493 * buffered IO read will zero out the page when read on
3494 * a hole while parallel DIO write to the hole has not completed.
3495 *
3496 * when we come here, we know it's a direct IO write to
3497 * to the middle of file (<i_size)
3498 * so it's safe to override the create flag from VFS.
3499 */ 3485 */
3500 create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; 3486 create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
3501 3487
3502 if (max_blocks > DIO_MAX_BLOCKS) 3488 if (max_blocks > DIO_MAX_BLOCKS)
3503 max_blocks = DIO_MAX_BLOCKS; 3489 max_blocks = DIO_MAX_BLOCKS;
@@ -3524,19 +3510,20 @@ static void ext4_free_io_end(ext4_io_end_t *io)
3524 iput(io->inode); 3510 iput(io->inode);
3525 kfree(io); 3511 kfree(io);
3526} 3512}
3527static void dump_aio_dio_list(struct inode * inode) 3513
3514static void dump_completed_IO(struct inode * inode)
3528{ 3515{
3529#ifdef EXT4_DEBUG 3516#ifdef EXT4_DEBUG
3530 struct list_head *cur, *before, *after; 3517 struct list_head *cur, *before, *after;
3531 ext4_io_end_t *io, *io0, *io1; 3518 ext4_io_end_t *io, *io0, *io1;
3532 3519
3533 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3520 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
3534 ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); 3521 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
3535 return; 3522 return;
3536 } 3523 }
3537 3524
3538 ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); 3525 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
3539 list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ 3526 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
3540 cur = &io->list; 3527 cur = &io->list;
3541 before = cur->prev; 3528 before = cur->prev;
3542 io0 = container_of(before, ext4_io_end_t, list); 3529 io0 = container_of(before, ext4_io_end_t, list);
@@ -3552,21 +3539,21 @@ static void dump_aio_dio_list(struct inode * inode)
3552/* 3539/*
3553 * check a range of space and convert unwritten extents to written. 3540 * check a range of space and convert unwritten extents to written.
3554 */ 3541 */
3555static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) 3542static int ext4_end_io_nolock(ext4_io_end_t *io)
3556{ 3543{
3557 struct inode *inode = io->inode; 3544 struct inode *inode = io->inode;
3558 loff_t offset = io->offset; 3545 loff_t offset = io->offset;
3559 ssize_t size = io->size; 3546 ssize_t size = io->size;
3560 int ret = 0; 3547 int ret = 0;
3561 3548
3562 ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," 3549 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3563 "list->prev 0x%p\n", 3550 "list->prev 0x%p\n",
3564 io, inode->i_ino, io->list.next, io->list.prev); 3551 io, inode->i_ino, io->list.next, io->list.prev);
3565 3552
3566 if (list_empty(&io->list)) 3553 if (list_empty(&io->list))
3567 return ret; 3554 return ret;
3568 3555
3569 if (io->flag != DIO_AIO_UNWRITTEN) 3556 if (io->flag != EXT4_IO_UNWRITTEN)
3570 return ret; 3557 return ret;
3571 3558
3572 if (offset + size <= i_size_read(inode)) 3559 if (offset + size <= i_size_read(inode))
@@ -3584,17 +3571,18 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
3584 io->flag = 0; 3571 io->flag = 0;
3585 return ret; 3572 return ret;
3586} 3573}
3574
3587/* 3575/*
3588 * work on completed aio dio IO, to convert unwritten extents to extents 3576 * work on completed aio dio IO, to convert unwritten extents to extents
3589 */ 3577 */
3590static void ext4_end_aio_dio_work(struct work_struct *work) 3578static void ext4_end_io_work(struct work_struct *work)
3591{ 3579{
3592 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 3580 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3593 struct inode *inode = io->inode; 3581 struct inode *inode = io->inode;
3594 int ret = 0; 3582 int ret = 0;
3595 3583
3596 mutex_lock(&inode->i_mutex); 3584 mutex_lock(&inode->i_mutex);
3597 ret = ext4_end_aio_dio_nolock(io); 3585 ret = ext4_end_io_nolock(io);
3598 if (ret >= 0) { 3586 if (ret >= 0) {
3599 if (!list_empty(&io->list)) 3587 if (!list_empty(&io->list))
3600 list_del_init(&io->list); 3588 list_del_init(&io->list);
@@ -3602,32 +3590,35 @@ static void ext4_end_aio_dio_work(struct work_struct *work)
3602 } 3590 }
3603 mutex_unlock(&inode->i_mutex); 3591 mutex_unlock(&inode->i_mutex);
3604} 3592}
3593
3605/* 3594/*
3606 * This function is called from ext4_sync_file(). 3595 * This function is called from ext4_sync_file().
3607 * 3596 *
3608 * When AIO DIO IO is completed, the work to convert unwritten 3597 * When IO is completed, the work to convert unwritten extents to
3609 * extents to written is queued on workqueue but may not get immediately 3598 * written is queued on workqueue but may not get immediately
3610 * scheduled. When fsync is called, we need to ensure the 3599 * scheduled. When fsync is called, we need to ensure the
3611 * conversion is complete before fsync returns. 3600 * conversion is complete before fsync returns.
3612 * The inode keeps track of a list of completed AIO from DIO path 3601 * The inode keeps track of a list of pending/completed IO that
3613 * that might needs to do the conversion. This function walks through 3602 * might needs to do the conversion. This function walks through
3614 * the list and convert the related unwritten extents to written. 3603 * the list and convert the related unwritten extents for completed IO
3604 * to written.
3605 * The function return the number of pending IOs on success.
3615 */ 3606 */
3616int flush_aio_dio_completed_IO(struct inode *inode) 3607int flush_completed_IO(struct inode *inode)
3617{ 3608{
3618 ext4_io_end_t *io; 3609 ext4_io_end_t *io;
3619 int ret = 0; 3610 int ret = 0;
3620 int ret2 = 0; 3611 int ret2 = 0;
3621 3612
3622 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) 3613 if (list_empty(&EXT4_I(inode)->i_completed_io_list))
3623 return ret; 3614 return ret;
3624 3615
3625 dump_aio_dio_list(inode); 3616 dump_completed_IO(inode);
3626 while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3617 while (!list_empty(&EXT4_I(inode)->i_completed_io_list)){
3627 io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, 3618 io = list_entry(EXT4_I(inode)->i_completed_io_list.next,
3628 ext4_io_end_t, list); 3619 ext4_io_end_t, list);
3629 /* 3620 /*
3630 * Calling ext4_end_aio_dio_nolock() to convert completed 3621 * Calling ext4_end_io_nolock() to convert completed
3631 * IO to written. 3622 * IO to written.
3632 * 3623 *
3633 * When ext4_sync_file() is called, run_queue() may already 3624 * When ext4_sync_file() is called, run_queue() may already
@@ -3640,7 +3631,7 @@ int flush_aio_dio_completed_IO(struct inode *inode)
3640 * avoid double converting from both fsync and background work 3631 * avoid double converting from both fsync and background work
3641 * queue work. 3632 * queue work.
3642 */ 3633 */
3643 ret = ext4_end_aio_dio_nolock(io); 3634 ret = ext4_end_io_nolock(io);
3644 if (ret < 0) 3635 if (ret < 0)
3645 ret2 = ret; 3636 ret2 = ret;
3646 else 3637 else
@@ -3662,7 +3653,7 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
3662 io->offset = 0; 3653 io->offset = 0;
3663 io->size = 0; 3654 io->size = 0;
3664 io->error = 0; 3655 io->error = 0;
3665 INIT_WORK(&io->work, ext4_end_aio_dio_work); 3656 INIT_WORK(&io->work, ext4_end_io_work);
3666 INIT_LIST_HEAD(&io->list); 3657 INIT_LIST_HEAD(&io->list);
3667 } 3658 }
3668 3659
@@ -3685,7 +3676,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3685 size); 3676 size);
3686 3677
3687 /* if not aio dio with unwritten extents, just free io and return */ 3678 /* if not aio dio with unwritten extents, just free io and return */
3688 if (io_end->flag != DIO_AIO_UNWRITTEN){ 3679 if (io_end->flag != EXT4_IO_UNWRITTEN){
3689 ext4_free_io_end(io_end); 3680 ext4_free_io_end(io_end);
3690 iocb->private = NULL; 3681 iocb->private = NULL;
3691 return; 3682 return;
@@ -3700,9 +3691,10 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3700 3691
3701 /* Add the io_end to per-inode completed aio dio list*/ 3692 /* Add the io_end to per-inode completed aio dio list*/
3702 list_add_tail(&io_end->list, 3693 list_add_tail(&io_end->list,
3703 &EXT4_I(io_end->inode)->i_aio_dio_complete_list); 3694 &EXT4_I(io_end->inode)->i_completed_io_list);
3704 iocb->private = NULL; 3695 iocb->private = NULL;
3705} 3696}
3697
3706/* 3698/*
3707 * For ext4 extent files, ext4 will do direct-io write to holes, 3699 * For ext4 extent files, ext4 will do direct-io write to holes,
3708 * preallocated extents, and those write extend the file, no need to 3700 * preallocated extents, and those write extend the file, no need to
@@ -3772,7 +3764,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3772 ret = blockdev_direct_IO(rw, iocb, inode, 3764 ret = blockdev_direct_IO(rw, iocb, inode,
3773 inode->i_sb->s_bdev, iov, 3765 inode->i_sb->s_bdev, iov,
3774 offset, nr_segs, 3766 offset, nr_segs,
3775 ext4_get_block_dio_write, 3767 ext4_get_block_write,
3776 ext4_end_io_dio); 3768 ext4_end_io_dio);
3777 if (iocb->private) 3769 if (iocb->private)
3778 EXT4_I(inode)->cur_aio_dio = NULL; 3770 EXT4_I(inode)->cur_aio_dio = NULL;