summaryrefslogtreecommitdiffstats
path: root/fs/ext4/page-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/page-io.c')
-rw-r--r--fs/ext4/page-io.c171
1 files changed, 113 insertions, 58 deletions
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 997002218228..5b24c407701b 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -71,6 +71,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
71 int i; 71 int i;
72 72
73 BUG_ON(!io); 73 BUG_ON(!io);
74 BUG_ON(!list_empty(&io->list));
74 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); 75 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
75 76
76 if (io->page) 77 if (io->page)
@@ -83,21 +84,14 @@ void ext4_free_io_end(ext4_io_end_t *io)
83 kmem_cache_free(io_end_cachep, io); 84 kmem_cache_free(io_end_cachep, io);
84} 85}
85 86
86/* 87/* check a range of space and convert unwritten extents to written. */
87 * check a range of space and convert unwritten extents to written. 88static int ext4_end_io(ext4_io_end_t *io)
88 *
89 * Called with inode->i_mutex; we depend on this when we manipulate
90 * io->flag, since we could otherwise race with ext4_flush_completed_IO()
91 */
92int ext4_end_io_nolock(ext4_io_end_t *io)
93{ 89{
94 struct inode *inode = io->inode; 90 struct inode *inode = io->inode;
95 loff_t offset = io->offset; 91 loff_t offset = io->offset;
96 ssize_t size = io->size; 92 ssize_t size = io->size;
97 int ret = 0; 93 int ret = 0;
98 94
99 BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
100
101 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 95 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
102 "list->prev 0x%p\n", 96 "list->prev 0x%p\n",
103 io, inode->i_ino, io->list.next, io->list.prev); 97 io, inode->i_ino, io->list.next, io->list.prev);
@@ -110,7 +104,6 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
110 "(inode %lu, offset %llu, size %zd, error %d)", 104 "(inode %lu, offset %llu, size %zd, error %d)",
111 inode->i_ino, offset, size, ret); 105 inode->i_ino, offset, size, ret);
112 } 106 }
113 io->flag &= ~EXT4_IO_END_UNWRITTEN;
114 if (io->iocb) 107 if (io->iocb)
115 aio_complete(io->iocb, io->result, 0); 108 aio_complete(io->iocb, io->result, 0);
116 109
@@ -122,51 +115,122 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
122 return ret; 115 return ret;
123} 116}
124 117
125/* 118static void dump_completed_IO(struct inode *inode)
126 * work on completed aio dio IO, to convert unwritten extents to extents 119{
127 */ 120#ifdef EXT4FS_DEBUG
128static void ext4_end_io_work(struct work_struct *work) 121 struct list_head *cur, *before, *after;
122 ext4_io_end_t *io, *io0, *io1;
123 unsigned long flags;
124
125 if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
126 ext4_debug("inode %lu completed_io list is empty\n",
127 inode->i_ino);
128 return;
129 }
130
131 ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino);
132 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) {
133 cur = &io->list;
134 before = cur->prev;
135 io0 = container_of(before, ext4_io_end_t, list);
136 after = cur->next;
137 io1 = container_of(after, ext4_io_end_t, list);
138
139 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
140 io, inode->i_ino, io0, io1);
141 }
142#endif
143}
144
145/* Add the io_end to per-inode completed end_io list. */
146void ext4_add_complete_io(ext4_io_end_t *io_end)
129{ 147{
130 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 148 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
131 struct inode *inode = io->inode; 149 struct workqueue_struct *wq;
132 struct ext4_inode_info *ei = EXT4_I(inode); 150 unsigned long flags;
133 unsigned long flags; 151
152 BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
153 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
134 154
135 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 155 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
136 if (io->flag & EXT4_IO_END_IN_FSYNC) 156 if (list_empty(&ei->i_completed_io_list)) {
137 goto requeue; 157 io_end->flag |= EXT4_IO_END_QUEUED;
138 if (list_empty(&io->list)) { 158 queue_work(wq, &io_end->work);
139 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
140 goto free;
141 } 159 }
160 list_add_tail(&io_end->list, &ei->i_completed_io_list);
161 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
162}
142 163
143 if (!mutex_trylock(&inode->i_mutex)) { 164static int ext4_do_flush_completed_IO(struct inode *inode,
144 bool was_queued; 165 ext4_io_end_t *work_io)
145requeue: 166{
146 was_queued = !!(io->flag & EXT4_IO_END_QUEUED); 167 ext4_io_end_t *io;
147 io->flag |= EXT4_IO_END_QUEUED; 168 struct list_head unwritten, complete, to_free;
148 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 169 unsigned long flags;
149 /* 170 struct ext4_inode_info *ei = EXT4_I(inode);
150 * Requeue the work instead of waiting so that the work 171 int err, ret = 0;
151 * items queued after this can be processed. 172
152 */ 173 INIT_LIST_HEAD(&complete);
153 queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); 174 INIT_LIST_HEAD(&to_free);
154 /* 175
155 * To prevent the ext4-dio-unwritten thread from keeping 176 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
156 * requeueing end_io requests and occupying cpu for too long, 177 dump_completed_IO(inode);
157 * yield the cpu if it sees an end_io request that has already 178 list_replace_init(&ei->i_completed_io_list, &unwritten);
158 * been requeued. 179 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
159 */ 180
160 if (was_queued) 181 while (!list_empty(&unwritten)) {
161 yield(); 182 io = list_entry(unwritten.next, ext4_io_end_t, list);
162 return; 183 BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
184 list_del_init(&io->list);
185
186 err = ext4_end_io(io);
187 if (unlikely(!ret && err))
188 ret = err;
189
190 list_add_tail(&io->list, &complete);
191 }
192 /* It is important to update all flags for all end_io in one shot w/o
193 * dropping the lock.*/
194 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
195 while (!list_empty(&complete)) {
196 io = list_entry(complete.next, ext4_io_end_t, list);
197 io->flag &= ~EXT4_IO_END_UNWRITTEN;
198 /* end_io context can not be destroyed now because it still
199 * used by queued worker. Worker thread will destroy it later */
200 if (io->flag & EXT4_IO_END_QUEUED)
201 list_del_init(&io->list);
202 else
203 list_move(&io->list, &to_free);
204 }
205 /* If we are called from worker context, it is time to clear queued
206 * flag, and destroy it's end_io if it was converted already */
207 if (work_io) {
208 work_io->flag &= ~EXT4_IO_END_QUEUED;
209 if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
210 list_add_tail(&work_io->list, &to_free);
163 } 211 }
164 list_del_init(&io->list);
165 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 212 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
166 (void) ext4_end_io_nolock(io); 213
167 mutex_unlock(&inode->i_mutex); 214 while (!list_empty(&to_free)) {
168free: 215 io = list_entry(to_free.next, ext4_io_end_t, list);
169 ext4_free_io_end(io); 216 list_del_init(&io->list);
217 ext4_free_io_end(io);
218 }
219 return ret;
220}
221
222/*
223 * work on completed aio dio IO, to convert unwritten extents to extents
224 */
225static void ext4_end_io_work(struct work_struct *work)
226{
227 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
228 ext4_do_flush_completed_IO(io->inode, io);
229}
230
231int ext4_flush_completed_IO(struct inode *inode)
232{
233 return ext4_do_flush_completed_IO(inode, NULL);
170} 234}
171 235
172ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) 236ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
@@ -199,9 +263,7 @@ static void buffer_io_error(struct buffer_head *bh)
199static void ext4_end_bio(struct bio *bio, int error) 263static void ext4_end_bio(struct bio *bio, int error)
200{ 264{
201 ext4_io_end_t *io_end = bio->bi_private; 265 ext4_io_end_t *io_end = bio->bi_private;
202 struct workqueue_struct *wq;
203 struct inode *inode; 266 struct inode *inode;
204 unsigned long flags;
205 int i; 267 int i;
206 sector_t bi_sector = bio->bi_sector; 268 sector_t bi_sector = bio->bi_sector;
207 269
@@ -259,14 +321,7 @@ static void ext4_end_bio(struct bio *bio, int error)
259 return; 321 return;
260 } 322 }
261 323
262 /* Add the io_end to per-inode completed io list*/ 324 ext4_add_complete_io(io_end);
263 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
264 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
265 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
266
267 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
268 /* queue the work to convert unwritten extents to written */
269 queue_work(wq, &io_end->work);
270} 325}
271 326
272void ext4_io_submit(struct ext4_io_submit *io) 327void ext4_io_submit(struct ext4_io_submit *io)