diff options
author | Christoph Hellwig <hch@infradead.org> | 2013-09-04 09:04:39 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2013-09-04 09:23:46 -0400 |
commit | 7b7a8665edd8db733980389b098530f9e4f630b2 (patch) | |
tree | 968d570a9f0c4d861226aefed2f5f97a131c8d53 /fs | |
parent | 4b6ccca701ef5977d0ffbc2c932430dea88b38b6 (diff) |
direct-io: Implement generic deferred AIO completions
Add support to the core direct-io code to defer AIO completions to user
context using a workqueue. This replaces opencoded and less efficient
code in XFS and ext4 (we save a memory allocation for each direct IO)
and will be needed to properly support O_(D)SYNC for AIO.
The communication between the filesystem and the direct I/O code requires
a new buffer head flag, which is a bit ugly but not avoidable until the
direct I/O code stops abusing the buffer_head structure for communicating
with the filesystems.
Currently this creates a per-superblock unbound workqueue for these
completions, which is taken from an earlier patch by Jan Kara. I'm
not really convinced about this use and would prefer a "normal" global
workqueue with a high concurrency limit, but this needs further discussion.
JK: Fixed ext4 part, dynamic allocation of the workqueue.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/direct-io.c | 85 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 11 | ||||
-rw-r--r-- | fs/ext4/inode.c | 28 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 30 | ||||
-rw-r--r-- | fs/ext4/super.c | 16 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 8 | ||||
-rw-r--r-- | fs/super.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 28 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.h | 3 |
9 files changed, 98 insertions, 129 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index 7ab90f5081ee..8b31b9f449f4 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -127,6 +127,7 @@ struct dio { | |||
127 | spinlock_t bio_lock; /* protects BIO fields below */ | 127 | spinlock_t bio_lock; /* protects BIO fields below */ |
128 | int page_errors; /* errno from get_user_pages() */ | 128 | int page_errors; /* errno from get_user_pages() */ |
129 | int is_async; /* is IO async ? */ | 129 | int is_async; /* is IO async ? */ |
130 | bool defer_completion; /* defer AIO completion to workqueue? */ | ||
130 | int io_error; /* IO error in completion path */ | 131 | int io_error; /* IO error in completion path */ |
131 | unsigned long refcount; /* direct_io_worker() and bios */ | 132 | unsigned long refcount; /* direct_io_worker() and bios */ |
132 | struct bio *bio_list; /* singly linked via bi_private */ | 133 | struct bio *bio_list; /* singly linked via bi_private */ |
@@ -141,7 +142,10 @@ struct dio { | |||
141 | * allocation time. Don't add new fields after pages[] unless you | 142 | * allocation time. Don't add new fields after pages[] unless you |
142 | * wish that they not be zeroed. | 143 | * wish that they not be zeroed. |
143 | */ | 144 | */ |
144 | struct page *pages[DIO_PAGES]; /* page buffer */ | 145 | union { |
146 | struct page *pages[DIO_PAGES]; /* page buffer */ | ||
147 | struct work_struct complete_work;/* deferred AIO completion */ | ||
148 | }; | ||
145 | } ____cacheline_aligned_in_smp; | 149 | } ____cacheline_aligned_in_smp; |
146 | 150 | ||
147 | static struct kmem_cache *dio_cache __read_mostly; | 151 | static struct kmem_cache *dio_cache __read_mostly; |
@@ -221,16 +225,16 @@ static inline struct page *dio_get_page(struct dio *dio, | |||
221 | * dio_complete() - called when all DIO BIO I/O has been completed | 225 | * dio_complete() - called when all DIO BIO I/O has been completed |
222 | * @offset: the byte offset in the file of the completed operation | 226 | * @offset: the byte offset in the file of the completed operation |
223 | * | 227 | * |
224 | * This releases locks as dictated by the locking type, lets interested parties | 228 | * This drops i_dio_count, lets interested parties know that a DIO operation |
225 | * know that a DIO operation has completed, and calculates the resulting return | 229 | * has completed, and calculates the resulting return code for the operation. |
226 | * code for the operation. | ||
227 | * | 230 | * |
228 | * It lets the filesystem know if it registered an interest earlier via | 231 | * It lets the filesystem know if it registered an interest earlier via |
229 | * get_block. Pass the private field of the map buffer_head so that | 232 | * get_block. Pass the private field of the map buffer_head so that |
230 | * filesystems can use it to hold additional state between get_block calls and | 233 | * filesystems can use it to hold additional state between get_block calls and |
231 | * dio_complete. | 234 | * dio_complete. |
232 | */ | 235 | */ |
233 | static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async) | 236 | static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, |
237 | bool is_async) | ||
234 | { | 238 | { |
235 | ssize_t transferred = 0; | 239 | ssize_t transferred = 0; |
236 | 240 | ||
@@ -258,19 +262,26 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is | |||
258 | if (ret == 0) | 262 | if (ret == 0) |
259 | ret = transferred; | 263 | ret = transferred; |
260 | 264 | ||
261 | if (dio->end_io && dio->result) { | 265 | if (dio->end_io && dio->result) |
262 | dio->end_io(dio->iocb, offset, transferred, | 266 | dio->end_io(dio->iocb, offset, transferred, dio->private); |
263 | dio->private, ret, is_async); | 267 | |
264 | } else { | 268 | inode_dio_done(dio->inode); |
265 | inode_dio_done(dio->inode); | 269 | if (is_async) |
266 | if (is_async) | 270 | aio_complete(dio->iocb, ret, 0); |
267 | aio_complete(dio->iocb, ret, 0); | ||
268 | } | ||
269 | 271 | ||
272 | kmem_cache_free(dio_cache, dio); | ||
270 | return ret; | 273 | return ret; |
271 | } | 274 | } |
272 | 275 | ||
276 | static void dio_aio_complete_work(struct work_struct *work) | ||
277 | { | ||
278 | struct dio *dio = container_of(work, struct dio, complete_work); | ||
279 | |||
280 | dio_complete(dio, dio->iocb->ki_pos, 0, true); | ||
281 | } | ||
282 | |||
273 | static int dio_bio_complete(struct dio *dio, struct bio *bio); | 283 | static int dio_bio_complete(struct dio *dio, struct bio *bio); |
284 | |||
274 | /* | 285 | /* |
275 | * Asynchronous IO callback. | 286 | * Asynchronous IO callback. |
276 | */ | 287 | */ |
@@ -290,8 +301,13 @@ static void dio_bio_end_aio(struct bio *bio, int error) | |||
290 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 301 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
291 | 302 | ||
292 | if (remaining == 0) { | 303 | if (remaining == 0) { |
293 | dio_complete(dio, dio->iocb->ki_pos, 0, true); | 304 | if (dio->result && dio->defer_completion) { |
294 | kmem_cache_free(dio_cache, dio); | 305 | INIT_WORK(&dio->complete_work, dio_aio_complete_work); |
306 | queue_work(dio->inode->i_sb->s_dio_done_wq, | ||
307 | &dio->complete_work); | ||
308 | } else { | ||
309 | dio_complete(dio, dio->iocb->ki_pos, 0, true); | ||
310 | } | ||
295 | } | 311 | } |
296 | } | 312 | } |
297 | 313 | ||
@@ -511,6 +527,41 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) | |||
511 | } | 527 | } |
512 | 528 | ||
513 | /* | 529 | /* |
530 | * Create workqueue for deferred direct IO completions. We allocate the | ||
531 | * workqueue when it's first needed. This avoids creating workqueue for | ||
532 | * filesystems that don't need it and also allows us to create the workqueue | ||
533 | * late enough so the we can include s_id in the name of the workqueue. | ||
534 | */ | ||
535 | static int sb_init_dio_done_wq(struct super_block *sb) | ||
536 | { | ||
537 | struct workqueue_struct *wq = alloc_workqueue("dio/%s", | ||
538 | WQ_MEM_RECLAIM, 0, | ||
539 | sb->s_id); | ||
540 | if (!wq) | ||
541 | return -ENOMEM; | ||
542 | /* | ||
543 | * This has to be atomic as more DIOs can race to create the workqueue | ||
544 | */ | ||
545 | cmpxchg(&sb->s_dio_done_wq, NULL, wq); | ||
546 | /* Someone created workqueue before us? Free ours... */ | ||
547 | if (wq != sb->s_dio_done_wq) | ||
548 | destroy_workqueue(wq); | ||
549 | return 0; | ||
550 | } | ||
551 | |||
552 | static int dio_set_defer_completion(struct dio *dio) | ||
553 | { | ||
554 | struct super_block *sb = dio->inode->i_sb; | ||
555 | |||
556 | if (dio->defer_completion) | ||
557 | return 0; | ||
558 | dio->defer_completion = true; | ||
559 | if (!sb->s_dio_done_wq) | ||
560 | return sb_init_dio_done_wq(sb); | ||
561 | return 0; | ||
562 | } | ||
563 | |||
564 | /* | ||
514 | * Call into the fs to map some more disk blocks. We record the current number | 565 | * Call into the fs to map some more disk blocks. We record the current number |
515 | * of available blocks at sdio->blocks_available. These are in units of the | 566 | * of available blocks at sdio->blocks_available. These are in units of the |
516 | * fs blocksize, (1 << inode->i_blkbits). | 567 | * fs blocksize, (1 << inode->i_blkbits). |
@@ -581,6 +632,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, | |||
581 | 632 | ||
582 | /* Store for completion */ | 633 | /* Store for completion */ |
583 | dio->private = map_bh->b_private; | 634 | dio->private = map_bh->b_private; |
635 | |||
636 | if (ret == 0 && buffer_defer_completion(map_bh)) | ||
637 | ret = dio_set_defer_completion(dio); | ||
584 | } | 638 | } |
585 | return ret; | 639 | return ret; |
586 | } | 640 | } |
@@ -1269,7 +1323,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1269 | 1323 | ||
1270 | if (drop_refcount(dio) == 0) { | 1324 | if (drop_refcount(dio) == 0) { |
1271 | retval = dio_complete(dio, offset, retval, false); | 1325 | retval = dio_complete(dio, offset, retval, false); |
1272 | kmem_cache_free(dio_cache, dio); | ||
1273 | } else | 1326 | } else |
1274 | BUG_ON(retval != -EIOCBQUEUED); | 1327 | BUG_ON(retval != -EIOCBQUEUED); |
1275 | 1328 | ||
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0ab26fbf3380..b247fbbed99c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -180,7 +180,6 @@ struct ext4_map_blocks { | |||
180 | * Flags for ext4_io_end->flags | 180 | * Flags for ext4_io_end->flags |
181 | */ | 181 | */ |
182 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 182 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
183 | #define EXT4_IO_END_DIRECT 0x0002 | ||
184 | 183 | ||
185 | /* | 184 | /* |
186 | * For converting uninitialized extents on a work queue. 'handle' is used for | 185 | * For converting uninitialized extents on a work queue. 'handle' is used for |
@@ -196,8 +195,6 @@ typedef struct ext4_io_end { | |||
196 | unsigned int flag; /* unwritten or not */ | 195 | unsigned int flag; /* unwritten or not */ |
197 | loff_t offset; /* offset in the file */ | 196 | loff_t offset; /* offset in the file */ |
198 | ssize_t size; /* size of the extent */ | 197 | ssize_t size; /* size of the extent */ |
199 | struct kiocb *iocb; /* iocb struct for AIO */ | ||
200 | int result; /* error value for AIO */ | ||
201 | atomic_t count; /* reference counter */ | 198 | atomic_t count; /* reference counter */ |
202 | } ext4_io_end_t; | 199 | } ext4_io_end_t; |
203 | 200 | ||
@@ -900,11 +897,9 @@ struct ext4_inode_info { | |||
900 | * Completed IOs that need unwritten extents handling and don't have | 897 | * Completed IOs that need unwritten extents handling and don't have |
901 | * transaction reserved | 898 | * transaction reserved |
902 | */ | 899 | */ |
903 | struct list_head i_unrsv_conversion_list; | ||
904 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 900 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
905 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ | 901 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ |
906 | struct work_struct i_rsv_conversion_work; | 902 | struct work_struct i_rsv_conversion_work; |
907 | struct work_struct i_unrsv_conversion_work; | ||
908 | 903 | ||
909 | spinlock_t i_block_reservation_lock; | 904 | spinlock_t i_block_reservation_lock; |
910 | 905 | ||
@@ -1276,8 +1271,6 @@ struct ext4_sb_info { | |||
1276 | struct flex_groups *s_flex_groups; | 1271 | struct flex_groups *s_flex_groups; |
1277 | ext4_group_t s_flex_groups_allocated; | 1272 | ext4_group_t s_flex_groups_allocated; |
1278 | 1273 | ||
1279 | /* workqueue for unreserved extent convertions (dio) */ | ||
1280 | struct workqueue_struct *unrsv_conversion_wq; | ||
1281 | /* workqueue for reserved extent conversions (buffered io) */ | 1274 | /* workqueue for reserved extent conversions (buffered io) */ |
1282 | struct workqueue_struct *rsv_conversion_wq; | 1275 | struct workqueue_struct *rsv_conversion_wq; |
1283 | 1276 | ||
@@ -1340,9 +1333,6 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, | |||
1340 | struct ext4_io_end *io_end) | 1333 | struct ext4_io_end *io_end) |
1341 | { | 1334 | { |
1342 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 1335 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
1343 | /* Writeback has to have coversion transaction reserved */ | ||
1344 | WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle && | ||
1345 | !(io_end->flag & EXT4_IO_END_DIRECT)); | ||
1346 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 1336 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
1347 | atomic_inc(&EXT4_I(inode)->i_unwritten); | 1337 | atomic_inc(&EXT4_I(inode)->i_unwritten); |
1348 | } | 1338 | } |
@@ -2716,7 +2706,6 @@ extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); | |||
2716 | extern void ext4_io_submit_init(struct ext4_io_submit *io, | 2706 | extern void ext4_io_submit_init(struct ext4_io_submit *io, |
2717 | struct writeback_control *wbc); | 2707 | struct writeback_control *wbc); |
2718 | extern void ext4_end_io_rsv_work(struct work_struct *work); | 2708 | extern void ext4_end_io_rsv_work(struct work_struct *work); |
2719 | extern void ext4_end_io_unrsv_work(struct work_struct *work); | ||
2720 | extern void ext4_io_submit(struct ext4_io_submit *io); | 2709 | extern void ext4_io_submit(struct ext4_io_submit *io); |
2721 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | 2710 | extern int ext4_bio_write_page(struct ext4_io_submit *io, |
2722 | struct page *page, | 2711 | struct page *page, |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c2ca04e67a4f..123bd81692d1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -727,8 +727,12 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
727 | 727 | ||
728 | ret = ext4_map_blocks(handle, inode, &map, flags); | 728 | ret = ext4_map_blocks(handle, inode, &map, flags); |
729 | if (ret > 0) { | 729 | if (ret > 0) { |
730 | ext4_io_end_t *io_end = ext4_inode_aio(inode); | ||
731 | |||
730 | map_bh(bh, inode->i_sb, map.m_pblk); | 732 | map_bh(bh, inode->i_sb, map.m_pblk); |
731 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 733 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
734 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) | ||
735 | set_buffer_defer_completion(bh); | ||
732 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 736 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
733 | ret = 0; | 737 | ret = 0; |
734 | } | 738 | } |
@@ -2991,19 +2995,13 @@ static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, | |||
2991 | } | 2995 | } |
2992 | 2996 | ||
2993 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 2997 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
2994 | ssize_t size, void *private, int ret, | 2998 | ssize_t size, void *private) |
2995 | bool is_async) | ||
2996 | { | 2999 | { |
2997 | struct inode *inode = file_inode(iocb->ki_filp); | ||
2998 | ext4_io_end_t *io_end = iocb->private; | 3000 | ext4_io_end_t *io_end = iocb->private; |
2999 | 3001 | ||
3000 | /* if not async direct IO just return */ | 3002 | /* if not async direct IO just return */ |
3001 | if (!io_end) { | 3003 | if (!io_end) |
3002 | inode_dio_done(inode); | ||
3003 | if (is_async) | ||
3004 | aio_complete(iocb, ret, 0); | ||
3005 | return; | 3004 | return; |
3006 | } | ||
3007 | 3005 | ||
3008 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 3006 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
3009 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", | 3007 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
@@ -3013,11 +3011,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3013 | iocb->private = NULL; | 3011 | iocb->private = NULL; |
3014 | io_end->offset = offset; | 3012 | io_end->offset = offset; |
3015 | io_end->size = size; | 3013 | io_end->size = size; |
3016 | if (is_async) { | 3014 | ext4_put_io_end(io_end); |
3017 | io_end->iocb = iocb; | ||
3018 | io_end->result = ret; | ||
3019 | } | ||
3020 | ext4_put_io_end_defer(io_end); | ||
3021 | } | 3015 | } |
3022 | 3016 | ||
3023 | /* | 3017 | /* |
@@ -3102,7 +3096,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3102 | ret = -ENOMEM; | 3096 | ret = -ENOMEM; |
3103 | goto retake_lock; | 3097 | goto retake_lock; |
3104 | } | 3098 | } |
3105 | io_end->flag |= EXT4_IO_END_DIRECT; | ||
3106 | /* | 3099 | /* |
3107 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() | 3100 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() |
3108 | */ | 3101 | */ |
@@ -3147,13 +3140,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3147 | if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { | 3140 | if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { |
3148 | WARN_ON(iocb->private != io_end); | 3141 | WARN_ON(iocb->private != io_end); |
3149 | WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | 3142 | WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); |
3150 | WARN_ON(io_end->iocb); | ||
3151 | /* | ||
3152 | * Generic code already did inode_dio_done() so we | ||
3153 | * have to clear EXT4_IO_END_DIRECT to not do it for | ||
3154 | * the second time. | ||
3155 | */ | ||
3156 | io_end->flag = 0; | ||
3157 | ext4_put_io_end(io_end); | 3143 | ext4_put_io_end(io_end); |
3158 | iocb->private = NULL; | 3144 | iocb->private = NULL; |
3159 | } | 3145 | } |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 6625d210fb45..d7d0c7b46ed4 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -123,10 +123,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) | |||
123 | ext4_finish_bio(bio); | 123 | ext4_finish_bio(bio); |
124 | bio_put(bio); | 124 | bio_put(bio); |
125 | } | 125 | } |
126 | if (io_end->flag & EXT4_IO_END_DIRECT) | ||
127 | inode_dio_done(io_end->inode); | ||
128 | if (io_end->iocb) | ||
129 | aio_complete(io_end->iocb, io_end->result, 0); | ||
130 | kmem_cache_free(io_end_cachep, io_end); | 126 | kmem_cache_free(io_end_cachep, io_end); |
131 | } | 127 | } |
132 | 128 | ||
@@ -204,19 +200,14 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end) | |||
204 | struct workqueue_struct *wq; | 200 | struct workqueue_struct *wq; |
205 | unsigned long flags; | 201 | unsigned long flags; |
206 | 202 | ||
207 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); | 203 | /* Only reserved conversions from writeback should enter here */ |
204 | WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); | ||
205 | WARN_ON(!io_end->handle); | ||
208 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 206 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
209 | if (io_end->handle) { | 207 | wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; |
210 | wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; | 208 | if (list_empty(&ei->i_rsv_conversion_list)) |
211 | if (list_empty(&ei->i_rsv_conversion_list)) | 209 | queue_work(wq, &ei->i_rsv_conversion_work); |
212 | queue_work(wq, &ei->i_rsv_conversion_work); | 210 | list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); |
213 | list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); | ||
214 | } else { | ||
215 | wq = EXT4_SB(io_end->inode->i_sb)->unrsv_conversion_wq; | ||
216 | if (list_empty(&ei->i_unrsv_conversion_list)) | ||
217 | queue_work(wq, &ei->i_unrsv_conversion_work); | ||
218 | list_add_tail(&io_end->list, &ei->i_unrsv_conversion_list); | ||
219 | } | ||
220 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 211 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
221 | } | 212 | } |
222 | 213 | ||
@@ -256,13 +247,6 @@ void ext4_end_io_rsv_work(struct work_struct *work) | |||
256 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list); | 247 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list); |
257 | } | 248 | } |
258 | 249 | ||
259 | void ext4_end_io_unrsv_work(struct work_struct *work) | ||
260 | { | ||
261 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, | ||
262 | i_unrsv_conversion_work); | ||
263 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_unrsv_conversion_list); | ||
264 | } | ||
265 | |||
266 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | 250 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) |
267 | { | 251 | { |
268 | ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags); | 252 | ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b59373b625e9..5db4f0df8174 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -762,9 +762,7 @@ static void ext4_put_super(struct super_block *sb) | |||
762 | ext4_unregister_li_request(sb); | 762 | ext4_unregister_li_request(sb); |
763 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 763 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
764 | 764 | ||
765 | flush_workqueue(sbi->unrsv_conversion_wq); | ||
766 | flush_workqueue(sbi->rsv_conversion_wq); | 765 | flush_workqueue(sbi->rsv_conversion_wq); |
767 | destroy_workqueue(sbi->unrsv_conversion_wq); | ||
768 | destroy_workqueue(sbi->rsv_conversion_wq); | 766 | destroy_workqueue(sbi->rsv_conversion_wq); |
769 | 767 | ||
770 | if (sbi->s_journal) { | 768 | if (sbi->s_journal) { |
@@ -875,14 +873,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
875 | #endif | 873 | #endif |
876 | ei->jinode = NULL; | 874 | ei->jinode = NULL; |
877 | INIT_LIST_HEAD(&ei->i_rsv_conversion_list); | 875 | INIT_LIST_HEAD(&ei->i_rsv_conversion_list); |
878 | INIT_LIST_HEAD(&ei->i_unrsv_conversion_list); | ||
879 | spin_lock_init(&ei->i_completed_io_lock); | 876 | spin_lock_init(&ei->i_completed_io_lock); |
880 | ei->i_sync_tid = 0; | 877 | ei->i_sync_tid = 0; |
881 | ei->i_datasync_tid = 0; | 878 | ei->i_datasync_tid = 0; |
882 | atomic_set(&ei->i_ioend_count, 0); | 879 | atomic_set(&ei->i_ioend_count, 0); |
883 | atomic_set(&ei->i_unwritten, 0); | 880 | atomic_set(&ei->i_unwritten, 0); |
884 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); | 881 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); |
885 | INIT_WORK(&ei->i_unrsv_conversion_work, ext4_end_io_unrsv_work); | ||
886 | 882 | ||
887 | return &ei->vfs_inode; | 883 | return &ei->vfs_inode; |
888 | } | 884 | } |
@@ -3954,14 +3950,6 @@ no_journal: | |||
3954 | goto failed_mount4; | 3950 | goto failed_mount4; |
3955 | } | 3951 | } |
3956 | 3952 | ||
3957 | EXT4_SB(sb)->unrsv_conversion_wq = | ||
3958 | alloc_workqueue("ext4-unrsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | ||
3959 | if (!EXT4_SB(sb)->unrsv_conversion_wq) { | ||
3960 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); | ||
3961 | ret = -ENOMEM; | ||
3962 | goto failed_mount4; | ||
3963 | } | ||
3964 | |||
3965 | /* | 3953 | /* |
3966 | * The jbd2_journal_load will have done any necessary log recovery, | 3954 | * The jbd2_journal_load will have done any necessary log recovery, |
3967 | * so we can safely mount the rest of the filesystem now. | 3955 | * so we can safely mount the rest of the filesystem now. |
@@ -4115,8 +4103,6 @@ failed_mount4: | |||
4115 | ext4_msg(sb, KERN_ERR, "mount failed"); | 4103 | ext4_msg(sb, KERN_ERR, "mount failed"); |
4116 | if (EXT4_SB(sb)->rsv_conversion_wq) | 4104 | if (EXT4_SB(sb)->rsv_conversion_wq) |
4117 | destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | 4105 | destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); |
4118 | if (EXT4_SB(sb)->unrsv_conversion_wq) | ||
4119 | destroy_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
4120 | failed_mount_wq: | 4106 | failed_mount_wq: |
4121 | if (sbi->s_journal) { | 4107 | if (sbi->s_journal) { |
4122 | jbd2_journal_destroy(sbi->s_journal); | 4108 | jbd2_journal_destroy(sbi->s_journal); |
@@ -4564,7 +4550,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
4564 | 4550 | ||
4565 | trace_ext4_sync_fs(sb, wait); | 4551 | trace_ext4_sync_fs(sb, wait); |
4566 | flush_workqueue(sbi->rsv_conversion_wq); | 4552 | flush_workqueue(sbi->rsv_conversion_wq); |
4567 | flush_workqueue(sbi->unrsv_conversion_wq); | ||
4568 | /* | 4553 | /* |
4569 | * Writeback quota in non-journalled quota case - journalled quota has | 4554 | * Writeback quota in non-journalled quota case - journalled quota has |
4570 | * no dirty dquots | 4555 | * no dirty dquots |
@@ -4600,7 +4585,6 @@ static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) | |||
4600 | 4585 | ||
4601 | trace_ext4_sync_fs(sb, wait); | 4586 | trace_ext4_sync_fs(sb, wait); |
4602 | flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | 4587 | flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); |
4603 | flush_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
4604 | dquot_writeback_dquots(sb, -1); | 4588 | dquot_writeback_dquots(sb, -1); |
4605 | if (wait && test_opt(sb, BARRIER)) | 4589 | if (wait && test_opt(sb, BARRIER)) |
4606 | ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | 4590 | ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 2abf97b2a592..94417a85ce6e 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -565,9 +565,7 @@ bail: | |||
565 | static void ocfs2_dio_end_io(struct kiocb *iocb, | 565 | static void ocfs2_dio_end_io(struct kiocb *iocb, |
566 | loff_t offset, | 566 | loff_t offset, |
567 | ssize_t bytes, | 567 | ssize_t bytes, |
568 | void *private, | 568 | void *private) |
569 | int ret, | ||
570 | bool is_async) | ||
571 | { | 569 | { |
572 | struct inode *inode = file_inode(iocb->ki_filp); | 570 | struct inode *inode = file_inode(iocb->ki_filp); |
573 | int level; | 571 | int level; |
@@ -592,10 +590,6 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
592 | 590 | ||
593 | level = ocfs2_iocb_rw_locked_level(iocb); | 591 | level = ocfs2_iocb_rw_locked_level(iocb); |
594 | ocfs2_rw_unlock(inode, level); | 592 | ocfs2_rw_unlock(inode, level); |
595 | |||
596 | inode_dio_done(inode); | ||
597 | if (is_async) | ||
598 | aio_complete(iocb, ret, 0); | ||
599 | } | 593 | } |
600 | 594 | ||
601 | /* | 595 | /* |
diff --git a/fs/super.c b/fs/super.c index 68307c029228..5536a95186e2 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -152,15 +152,9 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
152 | static const struct super_operations default_op; | 152 | static const struct super_operations default_op; |
153 | 153 | ||
154 | if (s) { | 154 | if (s) { |
155 | if (security_sb_alloc(s)) { | 155 | if (security_sb_alloc(s)) |
156 | /* | 156 | goto out_free_sb; |
157 | * We cannot call security_sb_free() without | 157 | |
158 | * security_sb_alloc() succeeding. So bail out manually | ||
159 | */ | ||
160 | kfree(s); | ||
161 | s = NULL; | ||
162 | goto out; | ||
163 | } | ||
164 | #ifdef CONFIG_SMP | 158 | #ifdef CONFIG_SMP |
165 | s->s_files = alloc_percpu(struct list_head); | 159 | s->s_files = alloc_percpu(struct list_head); |
166 | if (!s->s_files) | 160 | if (!s->s_files) |
@@ -228,6 +222,7 @@ err_out: | |||
228 | free_percpu(s->s_files); | 222 | free_percpu(s->s_files); |
229 | #endif | 223 | #endif |
230 | destroy_sb_writers(s); | 224 | destroy_sb_writers(s); |
225 | out_free_sb: | ||
231 | kfree(s); | 226 | kfree(s); |
232 | s = NULL; | 227 | s = NULL; |
233 | goto out; | 228 | goto out; |
@@ -414,6 +409,11 @@ void generic_shutdown_super(struct super_block *sb) | |||
414 | 409 | ||
415 | evict_inodes(sb); | 410 | evict_inodes(sb); |
416 | 411 | ||
412 | if (sb->s_dio_done_wq) { | ||
413 | destroy_workqueue(sb->s_dio_done_wq); | ||
414 | sb->s_dio_done_wq = NULL; | ||
415 | } | ||
416 | |||
417 | if (sop->put_super) | 417 | if (sop->put_super) |
418 | sop->put_super(sb); | 418 | sop->put_super(sb); |
419 | 419 | ||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 596ec71da00e..e11d654af786 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -86,14 +86,6 @@ xfs_destroy_ioend( | |||
86 | bh->b_end_io(bh, !ioend->io_error); | 86 | bh->b_end_io(bh, !ioend->io_error); |
87 | } | 87 | } |
88 | 88 | ||
89 | if (ioend->io_iocb) { | ||
90 | inode_dio_done(ioend->io_inode); | ||
91 | if (ioend->io_isasync) { | ||
92 | aio_complete(ioend->io_iocb, ioend->io_error ? | ||
93 | ioend->io_error : ioend->io_result, 0); | ||
94 | } | ||
95 | } | ||
96 | |||
97 | mempool_free(ioend, xfs_ioend_pool); | 89 | mempool_free(ioend, xfs_ioend_pool); |
98 | } | 90 | } |
99 | 91 | ||
@@ -281,7 +273,6 @@ xfs_alloc_ioend( | |||
281 | * all the I/O from calling the completion routine too early. | 273 | * all the I/O from calling the completion routine too early. |
282 | */ | 274 | */ |
283 | atomic_set(&ioend->io_remaining, 1); | 275 | atomic_set(&ioend->io_remaining, 1); |
284 | ioend->io_isasync = 0; | ||
285 | ioend->io_isdirect = 0; | 276 | ioend->io_isdirect = 0; |
286 | ioend->io_error = 0; | 277 | ioend->io_error = 0; |
287 | ioend->io_list = NULL; | 278 | ioend->io_list = NULL; |
@@ -291,8 +282,6 @@ xfs_alloc_ioend( | |||
291 | ioend->io_buffer_tail = NULL; | 282 | ioend->io_buffer_tail = NULL; |
292 | ioend->io_offset = 0; | 283 | ioend->io_offset = 0; |
293 | ioend->io_size = 0; | 284 | ioend->io_size = 0; |
294 | ioend->io_iocb = NULL; | ||
295 | ioend->io_result = 0; | ||
296 | ioend->io_append_trans = NULL; | 285 | ioend->io_append_trans = NULL; |
297 | 286 | ||
298 | INIT_WORK(&ioend->io_work, xfs_end_io); | 287 | INIT_WORK(&ioend->io_work, xfs_end_io); |
@@ -1292,8 +1281,10 @@ __xfs_get_blocks( | |||
1292 | if (create || !ISUNWRITTEN(&imap)) | 1281 | if (create || !ISUNWRITTEN(&imap)) |
1293 | xfs_map_buffer(inode, bh_result, &imap, offset); | 1282 | xfs_map_buffer(inode, bh_result, &imap, offset); |
1294 | if (create && ISUNWRITTEN(&imap)) { | 1283 | if (create && ISUNWRITTEN(&imap)) { |
1295 | if (direct) | 1284 | if (direct) { |
1296 | bh_result->b_private = inode; | 1285 | bh_result->b_private = inode; |
1286 | set_buffer_defer_completion(bh_result); | ||
1287 | } | ||
1297 | set_buffer_unwritten(bh_result); | 1288 | set_buffer_unwritten(bh_result); |
1298 | } | 1289 | } |
1299 | } | 1290 | } |
@@ -1390,9 +1381,7 @@ xfs_end_io_direct_write( | |||
1390 | struct kiocb *iocb, | 1381 | struct kiocb *iocb, |
1391 | loff_t offset, | 1382 | loff_t offset, |
1392 | ssize_t size, | 1383 | ssize_t size, |
1393 | void *private, | 1384 | void *private) |
1394 | int ret, | ||
1395 | bool is_async) | ||
1396 | { | 1385 | { |
1397 | struct xfs_ioend *ioend = iocb->private; | 1386 | struct xfs_ioend *ioend = iocb->private; |
1398 | 1387 | ||
@@ -1414,17 +1403,10 @@ xfs_end_io_direct_write( | |||
1414 | 1403 | ||
1415 | ioend->io_offset = offset; | 1404 | ioend->io_offset = offset; |
1416 | ioend->io_size = size; | 1405 | ioend->io_size = size; |
1417 | ioend->io_iocb = iocb; | ||
1418 | ioend->io_result = ret; | ||
1419 | if (private && size > 0) | 1406 | if (private && size > 0) |
1420 | ioend->io_type = XFS_IO_UNWRITTEN; | 1407 | ioend->io_type = XFS_IO_UNWRITTEN; |
1421 | 1408 | ||
1422 | if (is_async) { | 1409 | xfs_finish_ioend_sync(ioend); |
1423 | ioend->io_isasync = 1; | ||
1424 | xfs_finish_ioend(ioend); | ||
1425 | } else { | ||
1426 | xfs_finish_ioend_sync(ioend); | ||
1427 | } | ||
1428 | } | 1410 | } |
1429 | 1411 | ||
1430 | STATIC ssize_t | 1412 | STATIC ssize_t |
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index c325abb8d61a..f94dd459dff9 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h | |||
@@ -45,7 +45,6 @@ typedef struct xfs_ioend { | |||
45 | unsigned int io_type; /* delalloc / unwritten */ | 45 | unsigned int io_type; /* delalloc / unwritten */ |
46 | int io_error; /* I/O error code */ | 46 | int io_error; /* I/O error code */ |
47 | atomic_t io_remaining; /* hold count */ | 47 | atomic_t io_remaining; /* hold count */ |
48 | unsigned int io_isasync : 1; /* needs aio_complete */ | ||
49 | unsigned int io_isdirect : 1;/* direct I/O */ | 48 | unsigned int io_isdirect : 1;/* direct I/O */ |
50 | struct inode *io_inode; /* file being written to */ | 49 | struct inode *io_inode; /* file being written to */ |
51 | struct buffer_head *io_buffer_head;/* buffer linked list head */ | 50 | struct buffer_head *io_buffer_head;/* buffer linked list head */ |
@@ -54,8 +53,6 @@ typedef struct xfs_ioend { | |||
54 | xfs_off_t io_offset; /* offset in the file */ | 53 | xfs_off_t io_offset; /* offset in the file */ |
55 | struct work_struct io_work; /* xfsdatad work queue */ | 54 | struct work_struct io_work; /* xfsdatad work queue */ |
56 | struct xfs_trans *io_append_trans;/* xact. for size update */ | 55 | struct xfs_trans *io_append_trans;/* xact. for size update */ |
57 | struct kiocb *io_iocb; | ||
58 | int io_result; | ||
59 | } xfs_ioend_t; | 56 | } xfs_ioend_t; |
60 | 57 | ||
61 | extern const struct address_space_operations xfs_address_space_operations; | 58 | extern const struct address_space_operations xfs_address_space_operations; |