diff options
-rw-r--r-- | fs/ext4/ext4.h | 10 | ||||
-rw-r--r-- | fs/ext4/extents.c | 10 | ||||
-rw-r--r-- | fs/ext4/file.c | 60 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 25 | ||||
-rw-r--r-- | fs/ext4/super.c | 13 |
5 files changed, 100 insertions, 18 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0c8d97b56f34..3aa0b72b3b94 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -848,6 +848,7 @@ struct ext4_inode_info { | |||
848 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 848 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
849 | /* current io_end structure for async DIO write*/ | 849 | /* current io_end structure for async DIO write*/ |
850 | ext4_io_end_t *cur_aio_dio; | 850 | ext4_io_end_t *cur_aio_dio; |
851 | atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ | ||
851 | 852 | ||
852 | spinlock_t i_block_reservation_lock; | 853 | spinlock_t i_block_reservation_lock; |
853 | 854 | ||
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) | |||
2119 | 2120 | ||
2120 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 2121 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
2121 | 2122 | ||
2123 | /* For ioend & aio unwritten conversion wait queues */ | ||
2124 | #define EXT4_WQ_HASH_SZ 37 | ||
2125 | #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ | ||
2126 | EXT4_WQ_HASH_SZ]) | ||
2127 | #define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\ | ||
2128 | EXT4_WQ_HASH_SZ]) | ||
2129 | extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | ||
2130 | extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | ||
2131 | |||
2122 | #endif /* __KERNEL__ */ | 2132 | #endif /* __KERNEL__ */ |
2123 | 2133 | ||
2124 | #endif /* _EXT4_H */ | 2134 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 63a75810b7c3..ccce8a7e94ed 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3174 | * that this IO needs to convertion to written when IO is | 3174 | * that this IO needs to convertion to written when IO is |
3175 | * completed | 3175 | * completed |
3176 | */ | 3176 | */ |
3177 | if (io) | 3177 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { |
3178 | io->flag = EXT4_IO_END_UNWRITTEN; | 3178 | io->flag = EXT4_IO_END_UNWRITTEN; |
3179 | else | 3179 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); |
3180 | } else | ||
3180 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3181 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3181 | if (ext4_should_dioread_nolock(inode)) | 3182 | if (ext4_should_dioread_nolock(inode)) |
3182 | map->m_flags |= EXT4_MAP_UNINIT; | 3183 | map->m_flags |= EXT4_MAP_UNINIT; |
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3463 | * that we need to perform convertion when IO is done. | 3464 | * that we need to perform convertion when IO is done. |
3464 | */ | 3465 | */ |
3465 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3466 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3466 | if (io) | 3467 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { |
3467 | io->flag = EXT4_IO_END_UNWRITTEN; | 3468 | io->flag = EXT4_IO_END_UNWRITTEN; |
3468 | else | 3469 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); |
3470 | } else | ||
3469 | ext4_set_inode_state(inode, | 3471 | ext4_set_inode_state(inode, |
3470 | EXT4_STATE_DIO_UNWRITTEN); | 3472 | EXT4_STATE_DIO_UNWRITTEN); |
3471 | } | 3473 | } |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2e8322c8aa88..7b80d543b89e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp) | |||
55 | return 0; | 55 | return 0; |
56 | } | 56 | } |
57 | 57 | ||
58 | static void ext4_aiodio_wait(struct inode *inode) | ||
59 | { | ||
60 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | ||
61 | |||
62 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This tests whether the IO in question is block-aligned or not. | ||
67 | * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they | ||
68 | * are converted to written only after the IO is complete. Until they are | ||
69 | * mapped, these blocks appear as holes, so dio_zero_block() will assume that | ||
70 | * it needs to zero out portions of the start and/or end block. If 2 AIO | ||
71 | * threads are at work on the same unwritten block, they must be synchronized | ||
72 | * or one thread will zero the other's data, causing corruption. | ||
73 | */ | ||
74 | static int | ||
75 | ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, | ||
76 | unsigned long nr_segs, loff_t pos) | ||
77 | { | ||
78 | struct super_block *sb = inode->i_sb; | ||
79 | int blockmask = sb->s_blocksize - 1; | ||
80 | size_t count = iov_length(iov, nr_segs); | ||
81 | loff_t final_size = pos + count; | ||
82 | |||
83 | if (pos >= inode->i_size) | ||
84 | return 0; | ||
85 | |||
86 | if ((pos & blockmask) || (final_size & blockmask)) | ||
87 | return 1; | ||
88 | |||
89 | return 0; | ||
90 | } | ||
91 | |||
58 | static ssize_t | 92 | static ssize_t |
59 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | 93 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, |
60 | unsigned long nr_segs, loff_t pos) | 94 | unsigned long nr_segs, loff_t pos) |
61 | { | 95 | { |
62 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 96 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
97 | int unaligned_aio = 0; | ||
98 | int ret; | ||
63 | 99 | ||
64 | /* | 100 | /* |
65 | * If we have encountered a bitmap-format file, the size limit | 101 | * If we have encountered a bitmap-format file, the size limit |
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
78 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, | 114 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, |
79 | sbi->s_bitmap_maxbytes - pos); | 115 | sbi->s_bitmap_maxbytes - pos); |
80 | } | 116 | } |
117 | } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) && | ||
118 | !is_sync_kiocb(iocb))) { | ||
119 | unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos); | ||
81 | } | 120 | } |
82 | 121 | ||
83 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 122 | /* Unaligned direct AIO must be serialized; see comment above */ |
123 | if (unaligned_aio) { | ||
124 | static unsigned long unaligned_warn_time; | ||
125 | |||
126 | /* Warn about this once per day */ | ||
127 | if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ)) | ||
128 | ext4_msg(inode->i_sb, KERN_WARNING, | ||
129 | "Unaligned AIO/DIO on inode %ld by %s; " | ||
130 | "performance will be poor.", | ||
131 | inode->i_ino, current->comm); | ||
132 | mutex_lock(ext4_aio_mutex(inode)); | ||
133 | ext4_aiodio_wait(inode); | ||
134 | } | ||
135 | |||
136 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
137 | |||
138 | if (unaligned_aio) | ||
139 | mutex_unlock(ext4_aio_mutex(inode)); | ||
140 | |||
141 | return ret; | ||
84 | } | 142 | } |
85 | 143 | ||
86 | static const struct vm_operations_struct ext4_file_vm_ops = { | 144 | static const struct vm_operations_struct ext4_file_vm_ops = { |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4e9b0a242f4c..955cc309142f 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -32,14 +32,8 @@ | |||
32 | 32 | ||
33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; | 33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; |
34 | 34 | ||
35 | #define WQ_HASH_SZ 37 | ||
36 | #define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ]) | ||
37 | static wait_queue_head_t ioend_wq[WQ_HASH_SZ]; | ||
38 | |||
39 | int __init ext4_init_pageio(void) | 35 | int __init ext4_init_pageio(void) |
40 | { | 36 | { |
41 | int i; | ||
42 | |||
43 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); | 37 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); |
44 | if (io_page_cachep == NULL) | 38 | if (io_page_cachep == NULL) |
45 | return -ENOMEM; | 39 | return -ENOMEM; |
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void) | |||
48 | kmem_cache_destroy(io_page_cachep); | 42 | kmem_cache_destroy(io_page_cachep); |
49 | return -ENOMEM; | 43 | return -ENOMEM; |
50 | } | 44 | } |
51 | for (i = 0; i < WQ_HASH_SZ; i++) | ||
52 | init_waitqueue_head(&ioend_wq[i]); | ||
53 | |||
54 | return 0; | 45 | return 0; |
55 | } | 46 | } |
56 | 47 | ||
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void) | |||
62 | 53 | ||
63 | void ext4_ioend_wait(struct inode *inode) | 54 | void ext4_ioend_wait(struct inode *inode) |
64 | { | 55 | { |
65 | wait_queue_head_t *wq = to_ioend_wq(inode); | 56 | wait_queue_head_t *wq = ext4_ioend_wq(inode); |
66 | 57 | ||
67 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | 58 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); |
68 | } | 59 | } |
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io) | |||
87 | for (i = 0; i < io->num_io_pages; i++) | 78 | for (i = 0; i < io->num_io_pages; i++) |
88 | put_io_page(io->pages[i]); | 79 | put_io_page(io->pages[i]); |
89 | io->num_io_pages = 0; | 80 | io->num_io_pages = 0; |
90 | wq = to_ioend_wq(io->inode); | 81 | wq = ext4_ioend_wq(io->inode); |
91 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && | 82 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && |
92 | waitqueue_active(wq)) | 83 | waitqueue_active(wq)) |
93 | wake_up_all(wq); | 84 | wake_up_all(wq); |
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io) | |||
102 | struct inode *inode = io->inode; | 93 | struct inode *inode = io->inode; |
103 | loff_t offset = io->offset; | 94 | loff_t offset = io->offset; |
104 | ssize_t size = io->size; | 95 | ssize_t size = io->size; |
96 | wait_queue_head_t *wq; | ||
105 | int ret = 0; | 97 | int ret = 0; |
106 | 98 | ||
107 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 99 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io) | |||
126 | if (io->iocb) | 118 | if (io->iocb) |
127 | aio_complete(io->iocb, io->result, 0); | 119 | aio_complete(io->iocb, io->result, 0); |
128 | /* clear the DIO AIO unwritten flag */ | 120 | /* clear the DIO AIO unwritten flag */ |
129 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | 121 | if (io->flag & EXT4_IO_END_UNWRITTEN) { |
122 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
123 | /* Wake up anyone waiting on unwritten extent conversion */ | ||
124 | wq = ext4_ioend_wq(io->inode); | ||
125 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) && | ||
126 | waitqueue_active(wq)) { | ||
127 | wake_up_all(wq); | ||
128 | } | ||
129 | } | ||
130 | |||
130 | return ret; | 131 | return ret; |
131 | } | 132 | } |
132 | 133 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 86b05486dc63..f6a318f836b2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -833,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
833 | ei->i_sync_tid = 0; | 833 | ei->i_sync_tid = 0; |
834 | ei->i_datasync_tid = 0; | 834 | ei->i_datasync_tid = 0; |
835 | atomic_set(&ei->i_ioend_count, 0); | 835 | atomic_set(&ei->i_ioend_count, 0); |
836 | atomic_set(&ei->i_aiodio_unwritten, 0); | ||
836 | 837 | ||
837 | return &ei->vfs_inode; | 838 | return &ei->vfs_inode; |
838 | } | 839 | } |
@@ -4800,11 +4801,21 @@ static void ext4_exit_feat_adverts(void) | |||
4800 | kfree(ext4_feat); | 4801 | kfree(ext4_feat); |
4801 | } | 4802 | } |
4802 | 4803 | ||
4804 | /* Shared across all ext4 file systems */ | ||
4805 | wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | ||
4806 | struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | ||
4807 | |||
4803 | static int __init ext4_init_fs(void) | 4808 | static int __init ext4_init_fs(void) |
4804 | { | 4809 | { |
4805 | int err; | 4810 | int i, err; |
4806 | 4811 | ||
4807 | ext4_check_flag_values(); | 4812 | ext4_check_flag_values(); |
4813 | |||
4814 | for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { | ||
4815 | mutex_init(&ext4__aio_mutex[i]); | ||
4816 | init_waitqueue_head(&ext4__ioend_wq[i]); | ||
4817 | } | ||
4818 | |||
4808 | err = ext4_init_pageio(); | 4819 | err = ext4_init_pageio(); |
4809 | if (err) | 4820 | if (err) |
4810 | return err; | 4821 | return err; |