diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/ext4.h | 12 | ||||
-rw-r--r-- | fs/ext4/extents.c | 19 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 5 | ||||
-rw-r--r-- | fs/ext4/inode.c | 231 | ||||
-rw-r--r-- | fs/ext4/super.c | 8 |
5 files changed, 234 insertions, 41 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ccb4dbf359c4..b491576e11c3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -127,10 +127,11 @@ struct mpage_da_data { | |||
127 | int pages_written; | 127 | int pages_written; |
128 | int retval; | 128 | int retval; |
129 | }; | 129 | }; |
130 | 130 | #define DIO_AIO_UNWRITTEN 0x1 | |
131 | typedef struct ext4_io_end { | 131 | typedef struct ext4_io_end { |
132 | struct list_head list; /* per-file finished AIO list */ | ||
132 | struct inode *inode; /* file being written to */ | 133 | struct inode *inode; /* file being written to */ |
133 | unsigned int flag; /* sync IO or AIO */ | 134 | unsigned int flag; /* unwritten or not */ |
134 | int error; /* I/O error code */ | 135 | int error; /* I/O error code */ |
135 | ext4_lblk_t offset; /* offset in the file */ | 136 | ext4_lblk_t offset; /* offset in the file */ |
136 | size_t size; /* size of the extent */ | 137 | size_t size; /* size of the extent */ |
@@ -690,6 +691,11 @@ struct ext4_inode_info { | |||
690 | __u16 i_extra_isize; | 691 | __u16 i_extra_isize; |
691 | 692 | ||
692 | spinlock_t i_block_reservation_lock; | 693 | spinlock_t i_block_reservation_lock; |
694 | |||
695 | /* completed async DIOs that might need unwritten extents handling */ | ||
696 | struct list_head i_aio_dio_complete_list; | ||
697 | /* current io_end structure for async DIO write*/ | ||
698 | ext4_io_end_t *cur_aio_dio; | ||
693 | }; | 699 | }; |
694 | 700 | ||
695 | /* | 701 | /* |
@@ -1419,7 +1425,7 @@ extern int ext4_block_truncate_page(handle_t *handle, | |||
1419 | struct address_space *mapping, loff_t from); | 1425 | struct address_space *mapping, loff_t from); |
1420 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1426 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1421 | extern qsize_t ext4_get_reserved_space(struct inode *inode); | 1427 | extern qsize_t ext4_get_reserved_space(struct inode *inode); |
1422 | 1428 | extern int flush_aio_dio_completed_IO(struct inode *inode); | |
1423 | /* ioctl.c */ | 1429 | /* ioctl.c */ |
1424 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1430 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
1425 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); | 1431 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a38e651c004e..10a63096a95a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3033,6 +3033,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3033 | { | 3033 | { |
3034 | int ret = 0; | 3034 | int ret = 0; |
3035 | int err = 0; | 3035 | int err = 0; |
3036 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | ||
3036 | 3037 | ||
3037 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" | 3038 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" |
3038 | "block %llu, max_blocks %u, flags %d, allocated %u", | 3039 | "block %llu, max_blocks %u, flags %d, allocated %u", |
@@ -3045,6 +3046,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3045 | ret = ext4_split_unwritten_extents(handle, | 3046 | ret = ext4_split_unwritten_extents(handle, |
3046 | inode, path, iblock, | 3047 | inode, path, iblock, |
3047 | max_blocks, flags); | 3048 | max_blocks, flags); |
3049 | /* flag the io_end struct that we need convert when IO done */ | ||
3050 | if (io) | ||
3051 | io->flag = DIO_AIO_UNWRITTEN; | ||
3048 | goto out; | 3052 | goto out; |
3049 | } | 3053 | } |
3050 | /* DIO end_io complete, convert the filled extent to written */ | 3054 | /* DIO end_io complete, convert the filled extent to written */ |
@@ -3130,6 +3134,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3130 | int err = 0, depth, ret, cache_type; | 3134 | int err = 0, depth, ret, cache_type; |
3131 | unsigned int allocated = 0; | 3135 | unsigned int allocated = 0; |
3132 | struct ext4_allocation_request ar; | 3136 | struct ext4_allocation_request ar; |
3137 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | ||
3133 | 3138 | ||
3134 | __clear_bit(BH_New, &bh_result->b_state); | 3139 | __clear_bit(BH_New, &bh_result->b_state); |
3135 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3140 | ext_debug("blocks %u/%u requested for inode %lu\n", |
@@ -3279,8 +3284,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3279 | /* try to insert new extent into found leaf and return */ | 3284 | /* try to insert new extent into found leaf and return */ |
3280 | ext4_ext_store_pblock(&newex, newblock); | 3285 | ext4_ext_store_pblock(&newex, newblock); |
3281 | newex.ee_len = cpu_to_le16(ar.len); | 3286 | newex.ee_len = cpu_to_le16(ar.len); |
3282 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */ | 3287 | /* Mark uninitialized */ |
3288 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ | ||
3283 | ext4_ext_mark_uninitialized(&newex); | 3289 | ext4_ext_mark_uninitialized(&newex); |
3290 | /* | ||
3291 | * io_end structure was created for every async | ||
3292 | * direct IO write to the middle of the file. | ||
3293 | * To avoid unecessary convertion for every aio dio rewrite | ||
3294 | * to the mid of file, here we flag the IO that is really | ||
3295 | * need the convertion. | ||
3296 | * | ||
3297 | */ | ||
3298 | if (io && flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) | ||
3299 | io->flag = DIO_AIO_UNWRITTEN; | ||
3300 | } | ||
3284 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3301 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3285 | if (err) { | 3302 | if (err) { |
3286 | /* free data blocks we just allocated */ | 3303 | /* free data blocks we just allocated */ |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 07475740b512..2b1531266ee2 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -44,6 +44,8 @@ | |||
44 | * | 44 | * |
45 | * What we do is just kick off a commit and wait on it. This will snapshot the | 45 | * What we do is just kick off a commit and wait on it. This will snapshot the |
46 | * inode to disk. | 46 | * inode to disk. |
47 | * | ||
48 | * i_mutex lock is held when entering and exiting this function | ||
47 | */ | 49 | */ |
48 | 50 | ||
49 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | 51 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) |
@@ -56,6 +58,9 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
56 | 58 | ||
57 | trace_ext4_sync_file(file, dentry, datasync); | 59 | trace_ext4_sync_file(file, dentry, datasync); |
58 | 60 | ||
61 | ret = flush_aio_dio_completed_IO(inode); | ||
62 | if (ret < 0) | ||
63 | goto out; | ||
59 | /* | 64 | /* |
60 | * data=writeback: | 65 | * data=writeback: |
61 | * The caller's filemap_fdatawrite()/wait will sync the data. | 66 | * The caller's filemap_fdatawrite()/wait will sync the data. |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5633af6a7045..118e16ca91d7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3451,6 +3451,8 @@ static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, | |||
3451 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 3451 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
3452 | int dio_credits; | 3452 | int dio_credits; |
3453 | 3453 | ||
3454 | ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", | ||
3455 | inode->i_ino, create); | ||
3454 | /* | 3456 | /* |
3455 | * DIO VFS code passes create = 0 flag for write to | 3457 | * DIO VFS code passes create = 0 flag for write to |
3456 | * the middle of file. It does this to avoid block | 3458 | * the middle of file. It does this to avoid block |
@@ -3491,55 +3493,152 @@ out: | |||
3491 | return ret; | 3493 | return ret; |
3492 | } | 3494 | } |
3493 | 3495 | ||
3494 | #define DIO_AIO 0x1 | ||
3495 | |||
3496 | static void ext4_free_io_end(ext4_io_end_t *io) | 3496 | static void ext4_free_io_end(ext4_io_end_t *io) |
3497 | { | 3497 | { |
3498 | BUG_ON(!io); | ||
3499 | iput(io->inode); | ||
3498 | kfree(io); | 3500 | kfree(io); |
3499 | } | 3501 | } |
3502 | static void dump_aio_dio_list(struct inode * inode) | ||
3503 | { | ||
3504 | #ifdef EXT4_DEBUG | ||
3505 | struct list_head *cur, *before, *after; | ||
3506 | ext4_io_end_t *io, *io0, *io1; | ||
3507 | |||
3508 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | ||
3509 | ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); | ||
3510 | return; | ||
3511 | } | ||
3512 | |||
3513 | ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); | ||
3514 | list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ | ||
3515 | cur = &io->list; | ||
3516 | before = cur->prev; | ||
3517 | io0 = container_of(before, ext4_io_end_t, list); | ||
3518 | after = cur->next; | ||
3519 | io1 = container_of(after, ext4_io_end_t, list); | ||
3520 | |||
3521 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
3522 | io, inode->i_ino, io0, io1); | ||
3523 | } | ||
3524 | #endif | ||
3525 | } | ||
3500 | 3526 | ||
3501 | /* | 3527 | /* |
3502 | * IO write completion for unwritten extents. | ||
3503 | * | ||
3504 | * check a range of space and convert unwritten extents to written. | 3528 | * check a range of space and convert unwritten extents to written. |
3505 | */ | 3529 | */ |
3506 | static void ext4_end_dio_unwritten(struct work_struct *work) | 3530 | static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) |
3507 | { | 3531 | { |
3508 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3509 | struct inode *inode = io->inode; | 3532 | struct inode *inode = io->inode; |
3510 | loff_t offset = io->offset; | 3533 | loff_t offset = io->offset; |
3511 | size_t size = io->size; | 3534 | size_t size = io->size; |
3512 | int ret = 0; | 3535 | int ret = 0; |
3513 | int aio = io->flag & DIO_AIO; | ||
3514 | 3536 | ||
3515 | if (aio) | 3537 | ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," |
3516 | mutex_lock(&inode->i_mutex); | 3538 | "list->prev 0x%p\n", |
3539 | io, inode->i_ino, io->list.next, io->list.prev); | ||
3540 | |||
3541 | if (list_empty(&io->list)) | ||
3542 | return ret; | ||
3543 | |||
3544 | if (io->flag != DIO_AIO_UNWRITTEN) | ||
3545 | return ret; | ||
3546 | |||
3517 | if (offset + size <= i_size_read(inode)) | 3547 | if (offset + size <= i_size_read(inode)) |
3518 | ret = ext4_convert_unwritten_extents(inode, offset, size); | 3548 | ret = ext4_convert_unwritten_extents(inode, offset, size); |
3519 | 3549 | ||
3520 | if (ret < 0) | 3550 | if (ret < 0) { |
3521 | printk(KERN_EMERG "%s: failed to convert unwritten" | 3551 | printk(KERN_EMERG "%s: failed to convert unwritten" |
3522 | "extents to written extents, error is %d\n", | 3552 | "extents to written extents, error is %d" |
3523 | __func__, ret); | 3553 | " io is still on inode %lu aio dio list\n", |
3554 | __func__, ret, inode->i_ino); | ||
3555 | return ret; | ||
3556 | } | ||
3557 | |||
3558 | /* clear the DIO AIO unwritten flag */ | ||
3559 | io->flag = 0; | ||
3560 | return ret; | ||
3561 | } | ||
3562 | /* | ||
3563 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
3564 | */ | ||
3565 | static void ext4_end_aio_dio_work(struct work_struct *work) | ||
3566 | { | ||
3567 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3568 | struct inode *inode = io->inode; | ||
3569 | int ret = 0; | ||
3570 | |||
3571 | mutex_lock(&inode->i_mutex); | ||
3572 | ret = ext4_end_aio_dio_nolock(io); | ||
3573 | if (ret >= 0) { | ||
3574 | if (!list_empty(&io->list)) | ||
3575 | list_del_init(&io->list); | ||
3576 | ext4_free_io_end(io); | ||
3577 | } | ||
3578 | mutex_unlock(&inode->i_mutex); | ||
3579 | } | ||
3580 | /* | ||
3581 | * This function is called from ext4_sync_file(). | ||
3582 | * | ||
3583 | * When AIO DIO IO is completed, the work to convert unwritten | ||
3584 | * extents to written is queued on workqueue but may not get immediately | ||
3585 | * scheduled. When fsync is called, we need to ensure the | ||
3586 | * conversion is complete before fsync returns. | ||
3587 | * The inode keeps track of a list of completed AIO from DIO path | ||
3588 | * that might needs to do the conversion. This function walks through | ||
3589 | * the list and convert the related unwritten extents to written. | ||
3590 | */ | ||
3591 | int flush_aio_dio_completed_IO(struct inode *inode) | ||
3592 | { | ||
3593 | ext4_io_end_t *io; | ||
3594 | int ret = 0; | ||
3595 | int ret2 = 0; | ||
3524 | 3596 | ||
3525 | ext4_free_io_end(io); | 3597 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) |
3526 | if (aio) | 3598 | return ret; |
3527 | mutex_unlock(&inode->i_mutex); | 3599 | |
3600 | dump_aio_dio_list(inode); | ||
3601 | while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | ||
3602 | io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, | ||
3603 | ext4_io_end_t, list); | ||
3604 | /* | ||
3605 | * Calling ext4_end_aio_dio_nolock() to convert completed | ||
3606 | * IO to written. | ||
3607 | * | ||
3608 | * When ext4_sync_file() is called, run_queue() may already | ||
3609 | * about to flush the work corresponding to this io structure. | ||
3610 | * It will be upset if it founds the io structure related | ||
3611 | * to the work-to-be schedule is freed. | ||
3612 | * | ||
3613 | * Thus we need to keep the io structure still valid here after | ||
3614 | * convertion finished. The io structure has a flag to | ||
3615 | * avoid double converting from both fsync and background work | ||
3616 | * queue work. | ||
3617 | */ | ||
3618 | ret = ext4_end_aio_dio_nolock(io); | ||
3619 | if (ret < 0) | ||
3620 | ret2 = ret; | ||
3621 | else | ||
3622 | list_del_init(&io->list); | ||
3623 | } | ||
3624 | return (ret2 < 0) ? ret2 : 0; | ||
3528 | } | 3625 | } |
3529 | 3626 | ||
3530 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, unsigned int flag) | 3627 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode) |
3531 | { | 3628 | { |
3532 | ext4_io_end_t *io = NULL; | 3629 | ext4_io_end_t *io = NULL; |
3533 | 3630 | ||
3534 | io = kmalloc(sizeof(*io), GFP_NOFS); | 3631 | io = kmalloc(sizeof(*io), GFP_NOFS); |
3535 | 3632 | ||
3536 | if (io) { | 3633 | if (io) { |
3634 | igrab(inode); | ||
3537 | io->inode = inode; | 3635 | io->inode = inode; |
3538 | io->flag = flag; | 3636 | io->flag = 0; |
3539 | io->offset = 0; | 3637 | io->offset = 0; |
3540 | io->size = 0; | 3638 | io->size = 0; |
3541 | io->error = 0; | 3639 | io->error = 0; |
3542 | INIT_WORK(&io->work, ext4_end_dio_unwritten); | 3640 | INIT_WORK(&io->work, ext4_end_aio_dio_work); |
3641 | INIT_LIST_HEAD(&io->list); | ||
3543 | } | 3642 | } |
3544 | 3643 | ||
3545 | return io; | 3644 | return io; |
@@ -3551,19 +3650,31 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3551 | ext4_io_end_t *io_end = iocb->private; | 3650 | ext4_io_end_t *io_end = iocb->private; |
3552 | struct workqueue_struct *wq; | 3651 | struct workqueue_struct *wq; |
3553 | 3652 | ||
3554 | /* if not hole or unwritten extents, just simple return */ | 3653 | ext_debug("ext4_end_io_dio(): io_end 0x%p" |
3555 | if (!io_end || !size || !iocb->private) | 3654 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", |
3655 | iocb->private, io_end->inode->i_ino, iocb, offset, | ||
3656 | size); | ||
3657 | /* if not async direct IO or dio with 0 bytes write, just return */ | ||
3658 | if (!io_end || !size) | ||
3556 | return; | 3659 | return; |
3660 | |||
3661 | /* if not aio dio with unwritten extents, just free io and return */ | ||
3662 | if (io_end->flag != DIO_AIO_UNWRITTEN){ | ||
3663 | ext4_free_io_end(io_end); | ||
3664 | iocb->private = NULL; | ||
3665 | return; | ||
3666 | } | ||
3667 | |||
3557 | io_end->offset = offset; | 3668 | io_end->offset = offset; |
3558 | io_end->size = size; | 3669 | io_end->size = size; |
3559 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3670 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3560 | 3671 | ||
3561 | /* We need to convert unwritten extents to written */ | 3672 | /* queue the work to convert unwritten extents to written */ |
3562 | queue_work(wq, &io_end->work); | 3673 | queue_work(wq, &io_end->work); |
3563 | 3674 | ||
3564 | if (is_sync_kiocb(iocb)) | 3675 | /* Add the io_end to per-inode completed aio dio list*/ |
3565 | flush_workqueue(wq); | 3676 | list_add_tail(&io_end->list, |
3566 | 3677 | &EXT4_I(io_end->inode)->i_aio_dio_complete_list); | |
3567 | iocb->private = NULL; | 3678 | iocb->private = NULL; |
3568 | } | 3679 | } |
3569 | /* | 3680 | /* |
@@ -3575,8 +3686,10 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3575 | * If those blocks were preallocated, we mark sure they are splited, but | 3686 | * If those blocks were preallocated, we mark sure they are splited, but |
3576 | * still keep the range to write as unintialized. | 3687 | * still keep the range to write as unintialized. |
3577 | * | 3688 | * |
3578 | * When end_io call back function called at the last IO complete time, | 3689 | * The unwrritten extents will be converted to written when DIO is completed. |
3579 | * those extents will be converted to written extents. | 3690 | * For async direct IO, since the IO may still pending when return, we |
3691 | * set up an end_io call back function, which will do the convertion | ||
3692 | * when async direct IO completed. | ||
3580 | * | 3693 | * |
3581 | * If the O_DIRECT write will extend the file then add this inode to the | 3694 | * If the O_DIRECT write will extend the file then add this inode to the |
3582 | * orphan list. So recovery will truncate it back to the original size | 3695 | * orphan list. So recovery will truncate it back to the original size |
@@ -3595,28 +3708,76 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3595 | loff_t final_size = offset + count; | 3708 | loff_t final_size = offset + count; |
3596 | if (rw == WRITE && final_size <= inode->i_size) { | 3709 | if (rw == WRITE && final_size <= inode->i_size) { |
3597 | /* | 3710 | /* |
3598 | * For DIO we fallocate blocks for holes, we fallocate blocks | 3711 | * We could direct write to holes and fallocate. |
3599 | * The fallocated extent for hole is marked as uninitialized | 3712 | * |
3713 | * Allocated blocks to fill the hole are marked as uninitialized | ||
3600 | * to prevent paralel buffered read to expose the stale data | 3714 | * to prevent paralel buffered read to expose the stale data |
3601 | * before DIO complete the data IO. | 3715 | * before DIO complete the data IO. |
3602 | * as for previously fallocated extents, ext4 get_block | 3716 | * |
3717 | * As to previously fallocated extents, ext4 get_block | ||
3603 | * will just simply mark the buffer mapped but still | 3718 | * will just simply mark the buffer mapped but still |
3604 | * keep the extents uninitialized. | 3719 | * keep the extents uninitialized. |
3605 | * | 3720 | * |
3606 | * At the end of IO, the ext4 end_io callback function | 3721 | * for non AIO case, we will convert those unwritten extents |
3607 | * will convert those unwritten extents to written, | 3722 | * to written after return back from blockdev_direct_IO. |
3608 | * | 3723 | * |
3724 | * for async DIO, the conversion needs to be defered when | ||
3725 | * the IO is completed. The ext4 end_io callback function | ||
3726 | * will be called to take care of the conversion work. | ||
3727 | * Here for async case, we allocate an io_end structure to | ||
3728 | * hook to the iocb. | ||
3609 | */ | 3729 | */ |
3610 | iocb->private = ext4_init_io_end(inode, !is_sync_kiocb(iocb)); | 3730 | iocb->private = NULL; |
3611 | if (!iocb->private) | 3731 | EXT4_I(inode)->cur_aio_dio = NULL; |
3612 | return -ENOMEM; | 3732 | if (!is_sync_kiocb(iocb)) { |
3733 | iocb->private = ext4_init_io_end(inode); | ||
3734 | if (!iocb->private) | ||
3735 | return -ENOMEM; | ||
3736 | /* | ||
3737 | * we save the io structure for current async | ||
3738 | * direct IO, so that later ext4_get_blocks() | ||
3739 | * could flag the io structure whether there | ||
3740 | * is a unwritten extents needs to be converted | ||
3741 | * when IO is completed. | ||
3742 | */ | ||
3743 | EXT4_I(inode)->cur_aio_dio = iocb->private; | ||
3744 | } | ||
3745 | |||
3613 | ret = blockdev_direct_IO(rw, iocb, inode, | 3746 | ret = blockdev_direct_IO(rw, iocb, inode, |
3614 | inode->i_sb->s_bdev, iov, | 3747 | inode->i_sb->s_bdev, iov, |
3615 | offset, nr_segs, | 3748 | offset, nr_segs, |
3616 | ext4_get_block_dio_write, | 3749 | ext4_get_block_dio_write, |
3617 | ext4_end_io_dio); | 3750 | ext4_end_io_dio); |
3751 | if (iocb->private) | ||
3752 | EXT4_I(inode)->cur_aio_dio = NULL; | ||
3753 | /* | ||
3754 | * The io_end structure takes a reference to the inode, | ||
3755 | * that structure needs to be destroyed and the | ||
3756 | * reference to the inode need to be dropped, when IO is | ||
3757 | * complete, even with 0 byte write, or failed. | ||
3758 | * | ||
3759 | * In the successful AIO DIO case, the io_end structure will be | ||
3760 | * desctroyed and the reference to the inode will be dropped | ||
3761 | * after the end_io call back function is called. | ||
3762 | * | ||
3763 | * In the case there is 0 byte write, or error case, since | ||
3764 | * VFS direct IO won't invoke the end_io call back function, | ||
3765 | * we need to free the end_io structure here. | ||
3766 | */ | ||
3767 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | ||
3768 | ext4_free_io_end(iocb->private); | ||
3769 | iocb->private = NULL; | ||
3770 | } else if (ret > 0) | ||
3771 | /* | ||
3772 | * for non AIO case, since the IO is already | ||
3773 | * completed, we could do the convertion right here | ||
3774 | */ | ||
3775 | ret = ext4_convert_unwritten_extents(inode, | ||
3776 | offset, ret); | ||
3618 | return ret; | 3777 | return ret; |
3619 | } | 3778 | } |
3779 | |||
3780 | /* for write the the end of file case, we fall back to old way */ | ||
3620 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3781 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
3621 | } | 3782 | } |
3622 | 3783 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1a03ea98fdd1..f095c60b569e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -687,6 +687,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
687 | ei->i_allocated_meta_blocks = 0; | 687 | ei->i_allocated_meta_blocks = 0; |
688 | ei->i_delalloc_reserved_flag = 0; | 688 | ei->i_delalloc_reserved_flag = 0; |
689 | spin_lock_init(&(ei->i_block_reservation_lock)); | 689 | spin_lock_init(&(ei->i_block_reservation_lock)); |
690 | INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); | ||
691 | ei->cur_aio_dio = NULL; | ||
690 | 692 | ||
691 | return &ei->vfs_inode; | 693 | return &ei->vfs_inode; |
692 | } | 694 | } |
@@ -3375,11 +3377,13 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
3375 | { | 3377 | { |
3376 | int ret = 0; | 3378 | int ret = 0; |
3377 | tid_t target; | 3379 | tid_t target; |
3380 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
3378 | 3381 | ||
3379 | trace_ext4_sync_fs(sb, wait); | 3382 | trace_ext4_sync_fs(sb, wait); |
3380 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { | 3383 | flush_workqueue(sbi->dio_unwritten_wq); |
3384 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | ||
3381 | if (wait) | 3385 | if (wait) |
3382 | jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); | 3386 | jbd2_log_wait_commit(sbi->s_journal, target); |
3383 | } | 3387 | } |
3384 | return ret; | 3388 | return ret; |
3385 | } | 3389 | } |