aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2013-09-04 09:04:39 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2013-09-04 09:23:46 -0400
commit7b7a8665edd8db733980389b098530f9e4f630b2 (patch)
tree968d570a9f0c4d861226aefed2f5f97a131c8d53 /fs
parent4b6ccca701ef5977d0ffbc2c932430dea88b38b6 (diff)
direct-io: Implement generic deferred AIO completions
Add support to the core direct-io code to defer AIO completions to user context using a workqueue. This replaces opencoded and less efficient code in XFS and ext4 (we save a memory allocation for each direct IO) and will be needed to properly support O_(D)SYNC for AIO. The communication between the filesystem and the direct I/O code requires a new buffer head flag, which is a bit ugly but not avoidable until the direct I/O code stops abusing the buffer_head structure for communicating with the filesystems. Currently this creates a per-superblock unbound workqueue for these completions, which is taken from an earlier patch by Jan Kara. I'm not really convinced about this use and would prefer a "normal" global workqueue with a high concurrency limit, but this needs further discussion. JK: Fixed ext4 part, dynamic allocation of the workqueue. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/direct-io.c85
-rw-r--r--fs/ext4/ext4.h11
-rw-r--r--fs/ext4/inode.c28
-rw-r--r--fs/ext4/page-io.c30
-rw-r--r--fs/ext4/super.c16
-rw-r--r--fs/ocfs2/aops.c8
-rw-r--r--fs/super.c18
-rw-r--r--fs/xfs/xfs_aops.c28
-rw-r--r--fs/xfs/xfs_aops.h3
9 files changed, 98 insertions, 129 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7ab90f5081ee..8b31b9f449f4 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -127,6 +127,7 @@ struct dio {
127 spinlock_t bio_lock; /* protects BIO fields below */ 127 spinlock_t bio_lock; /* protects BIO fields below */
128 int page_errors; /* errno from get_user_pages() */ 128 int page_errors; /* errno from get_user_pages() */
129 int is_async; /* is IO async ? */ 129 int is_async; /* is IO async ? */
130 bool defer_completion; /* defer AIO completion to workqueue? */
130 int io_error; /* IO error in completion path */ 131 int io_error; /* IO error in completion path */
131 unsigned long refcount; /* direct_io_worker() and bios */ 132 unsigned long refcount; /* direct_io_worker() and bios */
132 struct bio *bio_list; /* singly linked via bi_private */ 133 struct bio *bio_list; /* singly linked via bi_private */
@@ -141,7 +142,10 @@ struct dio {
141 * allocation time. Don't add new fields after pages[] unless you 142 * allocation time. Don't add new fields after pages[] unless you
142 * wish that they not be zeroed. 143 * wish that they not be zeroed.
143 */ 144 */
144 struct page *pages[DIO_PAGES]; /* page buffer */ 145 union {
146 struct page *pages[DIO_PAGES]; /* page buffer */
147 struct work_struct complete_work;/* deferred AIO completion */
148 };
145} ____cacheline_aligned_in_smp; 149} ____cacheline_aligned_in_smp;
146 150
147static struct kmem_cache *dio_cache __read_mostly; 151static struct kmem_cache *dio_cache __read_mostly;
@@ -221,16 +225,16 @@ static inline struct page *dio_get_page(struct dio *dio,
221 * dio_complete() - called when all DIO BIO I/O has been completed 225 * dio_complete() - called when all DIO BIO I/O has been completed
222 * @offset: the byte offset in the file of the completed operation 226 * @offset: the byte offset in the file of the completed operation
223 * 227 *
224 * This releases locks as dictated by the locking type, lets interested parties 228 * This drops i_dio_count, lets interested parties know that a DIO operation
225 * know that a DIO operation has completed, and calculates the resulting return 229 * has completed, and calculates the resulting return code for the operation.
226 * code for the operation.
227 * 230 *
228 * It lets the filesystem know if it registered an interest earlier via 231 * It lets the filesystem know if it registered an interest earlier via
229 * get_block. Pass the private field of the map buffer_head so that 232 * get_block. Pass the private field of the map buffer_head so that
230 * filesystems can use it to hold additional state between get_block calls and 233 * filesystems can use it to hold additional state between get_block calls and
231 * dio_complete. 234 * dio_complete.
232 */ 235 */
233static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async) 236static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
237 bool is_async)
234{ 238{
235 ssize_t transferred = 0; 239 ssize_t transferred = 0;
236 240
@@ -258,19 +262,26 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
258 if (ret == 0) 262 if (ret == 0)
259 ret = transferred; 263 ret = transferred;
260 264
261 if (dio->end_io && dio->result) { 265 if (dio->end_io && dio->result)
262 dio->end_io(dio->iocb, offset, transferred, 266 dio->end_io(dio->iocb, offset, transferred, dio->private);
263 dio->private, ret, is_async); 267
264 } else { 268 inode_dio_done(dio->inode);
265 inode_dio_done(dio->inode); 269 if (is_async)
266 if (is_async) 270 aio_complete(dio->iocb, ret, 0);
267 aio_complete(dio->iocb, ret, 0);
268 }
269 271
272 kmem_cache_free(dio_cache, dio);
270 return ret; 273 return ret;
271} 274}
272 275
276static void dio_aio_complete_work(struct work_struct *work)
277{
278 struct dio *dio = container_of(work, struct dio, complete_work);
279
280 dio_complete(dio, dio->iocb->ki_pos, 0, true);
281}
282
273static int dio_bio_complete(struct dio *dio, struct bio *bio); 283static int dio_bio_complete(struct dio *dio, struct bio *bio);
284
274/* 285/*
275 * Asynchronous IO callback. 286 * Asynchronous IO callback.
276 */ 287 */
@@ -290,8 +301,13 @@ static void dio_bio_end_aio(struct bio *bio, int error)
290 spin_unlock_irqrestore(&dio->bio_lock, flags); 301 spin_unlock_irqrestore(&dio->bio_lock, flags);
291 302
292 if (remaining == 0) { 303 if (remaining == 0) {
293 dio_complete(dio, dio->iocb->ki_pos, 0, true); 304 if (dio->result && dio->defer_completion) {
294 kmem_cache_free(dio_cache, dio); 305 INIT_WORK(&dio->complete_work, dio_aio_complete_work);
306 queue_work(dio->inode->i_sb->s_dio_done_wq,
307 &dio->complete_work);
308 } else {
309 dio_complete(dio, dio->iocb->ki_pos, 0, true);
310 }
295 } 311 }
296} 312}
297 313
@@ -511,6 +527,41 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
511} 527}
512 528
513/* 529/*
530 * Create workqueue for deferred direct IO completions. We allocate the
531 * workqueue when it's first needed. This avoids creating workqueue for
532 * filesystems that don't need it and also allows us to create the workqueue
533 * late enough so the we can include s_id in the name of the workqueue.
534 */
535static int sb_init_dio_done_wq(struct super_block *sb)
536{
537 struct workqueue_struct *wq = alloc_workqueue("dio/%s",
538 WQ_MEM_RECLAIM, 0,
539 sb->s_id);
540 if (!wq)
541 return -ENOMEM;
542 /*
543 * This has to be atomic as more DIOs can race to create the workqueue
544 */
545 cmpxchg(&sb->s_dio_done_wq, NULL, wq);
546 /* Someone created workqueue before us? Free ours... */
547 if (wq != sb->s_dio_done_wq)
548 destroy_workqueue(wq);
549 return 0;
550}
551
552static int dio_set_defer_completion(struct dio *dio)
553{
554 struct super_block *sb = dio->inode->i_sb;
555
556 if (dio->defer_completion)
557 return 0;
558 dio->defer_completion = true;
559 if (!sb->s_dio_done_wq)
560 return sb_init_dio_done_wq(sb);
561 return 0;
562}
563
564/*
514 * Call into the fs to map some more disk blocks. We record the current number 565 * Call into the fs to map some more disk blocks. We record the current number
515 * of available blocks at sdio->blocks_available. These are in units of the 566 * of available blocks at sdio->blocks_available. These are in units of the
516 * fs blocksize, (1 << inode->i_blkbits). 567 * fs blocksize, (1 << inode->i_blkbits).
@@ -581,6 +632,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
581 632
582 /* Store for completion */ 633 /* Store for completion */
583 dio->private = map_bh->b_private; 634 dio->private = map_bh->b_private;
635
636 if (ret == 0 && buffer_defer_completion(map_bh))
637 ret = dio_set_defer_completion(dio);
584 } 638 }
585 return ret; 639 return ret;
586} 640}
@@ -1269,7 +1323,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1269 1323
1270 if (drop_refcount(dio) == 0) { 1324 if (drop_refcount(dio) == 0) {
1271 retval = dio_complete(dio, offset, retval, false); 1325 retval = dio_complete(dio, offset, retval, false);
1272 kmem_cache_free(dio_cache, dio);
1273 } else 1326 } else
1274 BUG_ON(retval != -EIOCBQUEUED); 1327 BUG_ON(retval != -EIOCBQUEUED);
1275 1328
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0ab26fbf3380..b247fbbed99c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -180,7 +180,6 @@ struct ext4_map_blocks {
180 * Flags for ext4_io_end->flags 180 * Flags for ext4_io_end->flags
181 */ 181 */
182#define EXT4_IO_END_UNWRITTEN 0x0001 182#define EXT4_IO_END_UNWRITTEN 0x0001
183#define EXT4_IO_END_DIRECT 0x0002
184 183
185/* 184/*
186 * For converting uninitialized extents on a work queue. 'handle' is used for 185 * For converting uninitialized extents on a work queue. 'handle' is used for
@@ -196,8 +195,6 @@ typedef struct ext4_io_end {
196 unsigned int flag; /* unwritten or not */ 195 unsigned int flag; /* unwritten or not */
197 loff_t offset; /* offset in the file */ 196 loff_t offset; /* offset in the file */
198 ssize_t size; /* size of the extent */ 197 ssize_t size; /* size of the extent */
199 struct kiocb *iocb; /* iocb struct for AIO */
200 int result; /* error value for AIO */
201 atomic_t count; /* reference counter */ 198 atomic_t count; /* reference counter */
202} ext4_io_end_t; 199} ext4_io_end_t;
203 200
@@ -900,11 +897,9 @@ struct ext4_inode_info {
900 * Completed IOs that need unwritten extents handling and don't have 897 * Completed IOs that need unwritten extents handling and don't have
901 * transaction reserved 898 * transaction reserved
902 */ 899 */
903 struct list_head i_unrsv_conversion_list;
904 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 900 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
905 atomic_t i_unwritten; /* Nr. of inflight conversions pending */ 901 atomic_t i_unwritten; /* Nr. of inflight conversions pending */
906 struct work_struct i_rsv_conversion_work; 902 struct work_struct i_rsv_conversion_work;
907 struct work_struct i_unrsv_conversion_work;
908 903
909 spinlock_t i_block_reservation_lock; 904 spinlock_t i_block_reservation_lock;
910 905
@@ -1276,8 +1271,6 @@ struct ext4_sb_info {
1276 struct flex_groups *s_flex_groups; 1271 struct flex_groups *s_flex_groups;
1277 ext4_group_t s_flex_groups_allocated; 1272 ext4_group_t s_flex_groups_allocated;
1278 1273
1279 /* workqueue for unreserved extent convertions (dio) */
1280 struct workqueue_struct *unrsv_conversion_wq;
1281 /* workqueue for reserved extent conversions (buffered io) */ 1274 /* workqueue for reserved extent conversions (buffered io) */
1282 struct workqueue_struct *rsv_conversion_wq; 1275 struct workqueue_struct *rsv_conversion_wq;
1283 1276
@@ -1340,9 +1333,6 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
1340 struct ext4_io_end *io_end) 1333 struct ext4_io_end *io_end)
1341{ 1334{
1342 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 1335 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1343 /* Writeback has to have coversion transaction reserved */
1344 WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle &&
1345 !(io_end->flag & EXT4_IO_END_DIRECT));
1346 io_end->flag |= EXT4_IO_END_UNWRITTEN; 1336 io_end->flag |= EXT4_IO_END_UNWRITTEN;
1347 atomic_inc(&EXT4_I(inode)->i_unwritten); 1337 atomic_inc(&EXT4_I(inode)->i_unwritten);
1348 } 1338 }
@@ -2716,7 +2706,6 @@ extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
2716extern void ext4_io_submit_init(struct ext4_io_submit *io, 2706extern void ext4_io_submit_init(struct ext4_io_submit *io,
2717 struct writeback_control *wbc); 2707 struct writeback_control *wbc);
2718extern void ext4_end_io_rsv_work(struct work_struct *work); 2708extern void ext4_end_io_rsv_work(struct work_struct *work);
2719extern void ext4_end_io_unrsv_work(struct work_struct *work);
2720extern void ext4_io_submit(struct ext4_io_submit *io); 2709extern void ext4_io_submit(struct ext4_io_submit *io);
2721extern int ext4_bio_write_page(struct ext4_io_submit *io, 2710extern int ext4_bio_write_page(struct ext4_io_submit *io,
2722 struct page *page, 2711 struct page *page,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c2ca04e67a4f..123bd81692d1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -727,8 +727,12 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
727 727
728 ret = ext4_map_blocks(handle, inode, &map, flags); 728 ret = ext4_map_blocks(handle, inode, &map, flags);
729 if (ret > 0) { 729 if (ret > 0) {
730 ext4_io_end_t *io_end = ext4_inode_aio(inode);
731
730 map_bh(bh, inode->i_sb, map.m_pblk); 732 map_bh(bh, inode->i_sb, map.m_pblk);
731 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 733 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
734 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
735 set_buffer_defer_completion(bh);
732 bh->b_size = inode->i_sb->s_blocksize * map.m_len; 736 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
733 ret = 0; 737 ret = 0;
734 } 738 }
@@ -2991,19 +2995,13 @@ static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
2991} 2995}
2992 2996
2993static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 2997static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2994 ssize_t size, void *private, int ret, 2998 ssize_t size, void *private)
2995 bool is_async)
2996{ 2999{
2997 struct inode *inode = file_inode(iocb->ki_filp);
2998 ext4_io_end_t *io_end = iocb->private; 3000 ext4_io_end_t *io_end = iocb->private;
2999 3001
3000 /* if not async direct IO just return */ 3002 /* if not async direct IO just return */
3001 if (!io_end) { 3003 if (!io_end)
3002 inode_dio_done(inode);
3003 if (is_async)
3004 aio_complete(iocb, ret, 0);
3005 return; 3004 return;
3006 }
3007 3005
3008 ext_debug("ext4_end_io_dio(): io_end 0x%p " 3006 ext_debug("ext4_end_io_dio(): io_end 0x%p "
3009 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 3007 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3013,11 +3011,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3013 iocb->private = NULL; 3011 iocb->private = NULL;
3014 io_end->offset = offset; 3012 io_end->offset = offset;
3015 io_end->size = size; 3013 io_end->size = size;
3016 if (is_async) { 3014 ext4_put_io_end(io_end);
3017 io_end->iocb = iocb;
3018 io_end->result = ret;
3019 }
3020 ext4_put_io_end_defer(io_end);
3021} 3015}
3022 3016
3023/* 3017/*
@@ -3102,7 +3096,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3102 ret = -ENOMEM; 3096 ret = -ENOMEM;
3103 goto retake_lock; 3097 goto retake_lock;
3104 } 3098 }
3105 io_end->flag |= EXT4_IO_END_DIRECT;
3106 /* 3099 /*
3107 * Grab reference for DIO. Will be dropped in ext4_end_io_dio() 3100 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3108 */ 3101 */
@@ -3147,13 +3140,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3147 if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { 3140 if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
3148 WARN_ON(iocb->private != io_end); 3141 WARN_ON(iocb->private != io_end);
3149 WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); 3142 WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
3150 WARN_ON(io_end->iocb);
3151 /*
3152 * Generic code already did inode_dio_done() so we
3153 * have to clear EXT4_IO_END_DIRECT to not do it for
3154 * the second time.
3155 */
3156 io_end->flag = 0;
3157 ext4_put_io_end(io_end); 3143 ext4_put_io_end(io_end);
3158 iocb->private = NULL; 3144 iocb->private = NULL;
3159 } 3145 }
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 6625d210fb45..d7d0c7b46ed4 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -123,10 +123,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
123 ext4_finish_bio(bio); 123 ext4_finish_bio(bio);
124 bio_put(bio); 124 bio_put(bio);
125 } 125 }
126 if (io_end->flag & EXT4_IO_END_DIRECT)
127 inode_dio_done(io_end->inode);
128 if (io_end->iocb)
129 aio_complete(io_end->iocb, io_end->result, 0);
130 kmem_cache_free(io_end_cachep, io_end); 126 kmem_cache_free(io_end_cachep, io_end);
131} 127}
132 128
@@ -204,19 +200,14 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
204 struct workqueue_struct *wq; 200 struct workqueue_struct *wq;
205 unsigned long flags; 201 unsigned long flags;
206 202
207 BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); 203 /* Only reserved conversions from writeback should enter here */
204 WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
205 WARN_ON(!io_end->handle);
208 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 206 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
209 if (io_end->handle) { 207 wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq;
210 wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; 208 if (list_empty(&ei->i_rsv_conversion_list))
211 if (list_empty(&ei->i_rsv_conversion_list)) 209 queue_work(wq, &ei->i_rsv_conversion_work);
212 queue_work(wq, &ei->i_rsv_conversion_work); 210 list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
213 list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
214 } else {
215 wq = EXT4_SB(io_end->inode->i_sb)->unrsv_conversion_wq;
216 if (list_empty(&ei->i_unrsv_conversion_list))
217 queue_work(wq, &ei->i_unrsv_conversion_work);
218 list_add_tail(&io_end->list, &ei->i_unrsv_conversion_list);
219 }
220 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 211 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
221} 212}
222 213
@@ -256,13 +247,6 @@ void ext4_end_io_rsv_work(struct work_struct *work)
256 ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list); 247 ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list);
257} 248}
258 249
259void ext4_end_io_unrsv_work(struct work_struct *work)
260{
261 struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
262 i_unrsv_conversion_work);
263 ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_unrsv_conversion_list);
264}
265
266ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) 250ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
267{ 251{
268 ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags); 252 ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b59373b625e9..5db4f0df8174 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -762,9 +762,7 @@ static void ext4_put_super(struct super_block *sb)
762 ext4_unregister_li_request(sb); 762 ext4_unregister_li_request(sb);
763 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 763 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
764 764
765 flush_workqueue(sbi->unrsv_conversion_wq);
766 flush_workqueue(sbi->rsv_conversion_wq); 765 flush_workqueue(sbi->rsv_conversion_wq);
767 destroy_workqueue(sbi->unrsv_conversion_wq);
768 destroy_workqueue(sbi->rsv_conversion_wq); 766 destroy_workqueue(sbi->rsv_conversion_wq);
769 767
770 if (sbi->s_journal) { 768 if (sbi->s_journal) {
@@ -875,14 +873,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
875#endif 873#endif
876 ei->jinode = NULL; 874 ei->jinode = NULL;
877 INIT_LIST_HEAD(&ei->i_rsv_conversion_list); 875 INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
878 INIT_LIST_HEAD(&ei->i_unrsv_conversion_list);
879 spin_lock_init(&ei->i_completed_io_lock); 876 spin_lock_init(&ei->i_completed_io_lock);
880 ei->i_sync_tid = 0; 877 ei->i_sync_tid = 0;
881 ei->i_datasync_tid = 0; 878 ei->i_datasync_tid = 0;
882 atomic_set(&ei->i_ioend_count, 0); 879 atomic_set(&ei->i_ioend_count, 0);
883 atomic_set(&ei->i_unwritten, 0); 880 atomic_set(&ei->i_unwritten, 0);
884 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); 881 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
885 INIT_WORK(&ei->i_unrsv_conversion_work, ext4_end_io_unrsv_work);
886 882
887 return &ei->vfs_inode; 883 return &ei->vfs_inode;
888} 884}
@@ -3954,14 +3950,6 @@ no_journal:
3954 goto failed_mount4; 3950 goto failed_mount4;
3955 } 3951 }
3956 3952
3957 EXT4_SB(sb)->unrsv_conversion_wq =
3958 alloc_workqueue("ext4-unrsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
3959 if (!EXT4_SB(sb)->unrsv_conversion_wq) {
3960 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
3961 ret = -ENOMEM;
3962 goto failed_mount4;
3963 }
3964
3965 /* 3953 /*
3966 * The jbd2_journal_load will have done any necessary log recovery, 3954 * The jbd2_journal_load will have done any necessary log recovery,
3967 * so we can safely mount the rest of the filesystem now. 3955 * so we can safely mount the rest of the filesystem now.
@@ -4115,8 +4103,6 @@ failed_mount4:
4115 ext4_msg(sb, KERN_ERR, "mount failed"); 4103 ext4_msg(sb, KERN_ERR, "mount failed");
4116 if (EXT4_SB(sb)->rsv_conversion_wq) 4104 if (EXT4_SB(sb)->rsv_conversion_wq)
4117 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4105 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4118 if (EXT4_SB(sb)->unrsv_conversion_wq)
4119 destroy_workqueue(EXT4_SB(sb)->unrsv_conversion_wq);
4120failed_mount_wq: 4106failed_mount_wq:
4121 if (sbi->s_journal) { 4107 if (sbi->s_journal) {
4122 jbd2_journal_destroy(sbi->s_journal); 4108 jbd2_journal_destroy(sbi->s_journal);
@@ -4564,7 +4550,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4564 4550
4565 trace_ext4_sync_fs(sb, wait); 4551 trace_ext4_sync_fs(sb, wait);
4566 flush_workqueue(sbi->rsv_conversion_wq); 4552 flush_workqueue(sbi->rsv_conversion_wq);
4567 flush_workqueue(sbi->unrsv_conversion_wq);
4568 /* 4553 /*
4569 * Writeback quota in non-journalled quota case - journalled quota has 4554 * Writeback quota in non-journalled quota case - journalled quota has
4570 * no dirty dquots 4555 * no dirty dquots
@@ -4600,7 +4585,6 @@ static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
4600 4585
4601 trace_ext4_sync_fs(sb, wait); 4586 trace_ext4_sync_fs(sb, wait);
4602 flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4587 flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4603 flush_workqueue(EXT4_SB(sb)->unrsv_conversion_wq);
4604 dquot_writeback_dquots(sb, -1); 4588 dquot_writeback_dquots(sb, -1);
4605 if (wait && test_opt(sb, BARRIER)) 4589 if (wait && test_opt(sb, BARRIER))
4606 ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); 4590 ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 2abf97b2a592..94417a85ce6e 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -565,9 +565,7 @@ bail:
565static void ocfs2_dio_end_io(struct kiocb *iocb, 565static void ocfs2_dio_end_io(struct kiocb *iocb,
566 loff_t offset, 566 loff_t offset,
567 ssize_t bytes, 567 ssize_t bytes,
568 void *private, 568 void *private)
569 int ret,
570 bool is_async)
571{ 569{
572 struct inode *inode = file_inode(iocb->ki_filp); 570 struct inode *inode = file_inode(iocb->ki_filp);
573 int level; 571 int level;
@@ -592,10 +590,6 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
592 590
593 level = ocfs2_iocb_rw_locked_level(iocb); 591 level = ocfs2_iocb_rw_locked_level(iocb);
594 ocfs2_rw_unlock(inode, level); 592 ocfs2_rw_unlock(inode, level);
595
596 inode_dio_done(inode);
597 if (is_async)
598 aio_complete(iocb, ret, 0);
599} 593}
600 594
601/* 595/*
diff --git a/fs/super.c b/fs/super.c
index 68307c029228..5536a95186e2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -152,15 +152,9 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
152 static const struct super_operations default_op; 152 static const struct super_operations default_op;
153 153
154 if (s) { 154 if (s) {
155 if (security_sb_alloc(s)) { 155 if (security_sb_alloc(s))
156 /* 156 goto out_free_sb;
157 * We cannot call security_sb_free() without 157
158 * security_sb_alloc() succeeding. So bail out manually
159 */
160 kfree(s);
161 s = NULL;
162 goto out;
163 }
164#ifdef CONFIG_SMP 158#ifdef CONFIG_SMP
165 s->s_files = alloc_percpu(struct list_head); 159 s->s_files = alloc_percpu(struct list_head);
166 if (!s->s_files) 160 if (!s->s_files)
@@ -228,6 +222,7 @@ err_out:
228 free_percpu(s->s_files); 222 free_percpu(s->s_files);
229#endif 223#endif
230 destroy_sb_writers(s); 224 destroy_sb_writers(s);
225out_free_sb:
231 kfree(s); 226 kfree(s);
232 s = NULL; 227 s = NULL;
233 goto out; 228 goto out;
@@ -414,6 +409,11 @@ void generic_shutdown_super(struct super_block *sb)
414 409
415 evict_inodes(sb); 410 evict_inodes(sb);
416 411
412 if (sb->s_dio_done_wq) {
413 destroy_workqueue(sb->s_dio_done_wq);
414 sb->s_dio_done_wq = NULL;
415 }
416
417 if (sop->put_super) 417 if (sop->put_super)
418 sop->put_super(sb); 418 sop->put_super(sb);
419 419
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 596ec71da00e..e11d654af786 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -86,14 +86,6 @@ xfs_destroy_ioend(
86 bh->b_end_io(bh, !ioend->io_error); 86 bh->b_end_io(bh, !ioend->io_error);
87 } 87 }
88 88
89 if (ioend->io_iocb) {
90 inode_dio_done(ioend->io_inode);
91 if (ioend->io_isasync) {
92 aio_complete(ioend->io_iocb, ioend->io_error ?
93 ioend->io_error : ioend->io_result, 0);
94 }
95 }
96
97 mempool_free(ioend, xfs_ioend_pool); 89 mempool_free(ioend, xfs_ioend_pool);
98} 90}
99 91
@@ -281,7 +273,6 @@ xfs_alloc_ioend(
281 * all the I/O from calling the completion routine too early. 273 * all the I/O from calling the completion routine too early.
282 */ 274 */
283 atomic_set(&ioend->io_remaining, 1); 275 atomic_set(&ioend->io_remaining, 1);
284 ioend->io_isasync = 0;
285 ioend->io_isdirect = 0; 276 ioend->io_isdirect = 0;
286 ioend->io_error = 0; 277 ioend->io_error = 0;
287 ioend->io_list = NULL; 278 ioend->io_list = NULL;
@@ -291,8 +282,6 @@ xfs_alloc_ioend(
291 ioend->io_buffer_tail = NULL; 282 ioend->io_buffer_tail = NULL;
292 ioend->io_offset = 0; 283 ioend->io_offset = 0;
293 ioend->io_size = 0; 284 ioend->io_size = 0;
294 ioend->io_iocb = NULL;
295 ioend->io_result = 0;
296 ioend->io_append_trans = NULL; 285 ioend->io_append_trans = NULL;
297 286
298 INIT_WORK(&ioend->io_work, xfs_end_io); 287 INIT_WORK(&ioend->io_work, xfs_end_io);
@@ -1292,8 +1281,10 @@ __xfs_get_blocks(
1292 if (create || !ISUNWRITTEN(&imap)) 1281 if (create || !ISUNWRITTEN(&imap))
1293 xfs_map_buffer(inode, bh_result, &imap, offset); 1282 xfs_map_buffer(inode, bh_result, &imap, offset);
1294 if (create && ISUNWRITTEN(&imap)) { 1283 if (create && ISUNWRITTEN(&imap)) {
1295 if (direct) 1284 if (direct) {
1296 bh_result->b_private = inode; 1285 bh_result->b_private = inode;
1286 set_buffer_defer_completion(bh_result);
1287 }
1297 set_buffer_unwritten(bh_result); 1288 set_buffer_unwritten(bh_result);
1298 } 1289 }
1299 } 1290 }
@@ -1390,9 +1381,7 @@ xfs_end_io_direct_write(
1390 struct kiocb *iocb, 1381 struct kiocb *iocb,
1391 loff_t offset, 1382 loff_t offset,
1392 ssize_t size, 1383 ssize_t size,
1393 void *private, 1384 void *private)
1394 int ret,
1395 bool is_async)
1396{ 1385{
1397 struct xfs_ioend *ioend = iocb->private; 1386 struct xfs_ioend *ioend = iocb->private;
1398 1387
@@ -1414,17 +1403,10 @@ xfs_end_io_direct_write(
1414 1403
1415 ioend->io_offset = offset; 1404 ioend->io_offset = offset;
1416 ioend->io_size = size; 1405 ioend->io_size = size;
1417 ioend->io_iocb = iocb;
1418 ioend->io_result = ret;
1419 if (private && size > 0) 1406 if (private && size > 0)
1420 ioend->io_type = XFS_IO_UNWRITTEN; 1407 ioend->io_type = XFS_IO_UNWRITTEN;
1421 1408
1422 if (is_async) { 1409 xfs_finish_ioend_sync(ioend);
1423 ioend->io_isasync = 1;
1424 xfs_finish_ioend(ioend);
1425 } else {
1426 xfs_finish_ioend_sync(ioend);
1427 }
1428} 1410}
1429 1411
1430STATIC ssize_t 1412STATIC ssize_t
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index c325abb8d61a..f94dd459dff9 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -45,7 +45,6 @@ typedef struct xfs_ioend {
45 unsigned int io_type; /* delalloc / unwritten */ 45 unsigned int io_type; /* delalloc / unwritten */
46 int io_error; /* I/O error code */ 46 int io_error; /* I/O error code */
47 atomic_t io_remaining; /* hold count */ 47 atomic_t io_remaining; /* hold count */
48 unsigned int io_isasync : 1; /* needs aio_complete */
49 unsigned int io_isdirect : 1;/* direct I/O */ 48 unsigned int io_isdirect : 1;/* direct I/O */
50 struct inode *io_inode; /* file being written to */ 49 struct inode *io_inode; /* file being written to */
51 struct buffer_head *io_buffer_head;/* buffer linked list head */ 50 struct buffer_head *io_buffer_head;/* buffer linked list head */
@@ -54,8 +53,6 @@ typedef struct xfs_ioend {
54 xfs_off_t io_offset; /* offset in the file */ 53 xfs_off_t io_offset; /* offset in the file */
55 struct work_struct io_work; /* xfsdatad work queue */ 54 struct work_struct io_work; /* xfsdatad work queue */
56 struct xfs_trans *io_append_trans;/* xact. for size update */ 55 struct xfs_trans *io_append_trans;/* xact. for size update */
57 struct kiocb *io_iocb;
58 int io_result;
59} xfs_ioend_t; 56} xfs_ioend_t;
60 57
61extern const struct address_space_operations xfs_address_space_operations; 58extern const struct address_space_operations xfs_address_space_operations;