aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-04-05 18:34:30 -0400
committerDave Chinner <david@fromorbit.com>2016-04-05 18:34:30 -0400
commit0e51a8e191dbd9b9c7b7bb0a1c28d57cd2be8e6a (patch)
tree15c9f6f5bcdcbda2978889057780ec3d6b4c9cbe /fs/xfs
parent37992c18bba3f578860c6448b7bae18a14e535d3 (diff)
xfs: optimize bio handling in the buffer writeback path
This patch implements two closely related changes: First it embeds a bio the ioend structure so that we don't have to allocate one separately. Second it uses the block layer bio chaining mechanism to chain additional bios off this first one if needed instead of manually accounting for multiple bio completions in the ioend structure. Together this removes a memory allocation per ioend and greatly simplifies the ioend setup and I/O completion path. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_aops.c247
-rw-r--r--fs/xfs/xfs_aops.h15
-rw-r--r--fs/xfs/xfs_super.c26
3 files changed, 123 insertions, 165 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 9d9a01b50078..b5f1c66bbb58 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -124,18 +124,25 @@ next_bh:
124 */ 124 */
125STATIC void 125STATIC void
126xfs_destroy_ioend( 126xfs_destroy_ioend(
127 struct xfs_ioend *ioend) 127 struct xfs_ioend *ioend,
128 int error)
128{ 129{
129 struct inode *inode = ioend->io_inode; 130 struct inode *inode = ioend->io_inode;
130 int error = ioend->io_error; 131 struct bio *last = ioend->io_bio;
131 struct bio *bio, *next; 132 struct bio *bio, *next;
132 133
133 for (bio = ioend->io_bio_done; bio; bio = next) { 134 for (bio = &ioend->io_inline_bio; bio; bio = next) {
134 struct bio_vec *bvec; 135 struct bio_vec *bvec;
135 int i; 136 int i;
136 137
137 next = bio->bi_private; 138 /*
138 bio->bi_private = NULL; 139 * For the last bio, bi_private points to the ioend, so we
140 * need to explicitly end the iteration here.
141 */
142 if (bio == last)
143 next = NULL;
144 else
145 next = bio->bi_private;
139 146
140 /* walk each page on bio, ending page IO on them */ 147 /* walk each page on bio, ending page IO on them */
141 bio_for_each_segment_all(bvec, bio, i) 148 bio_for_each_segment_all(bvec, bio, i)
@@ -143,8 +150,6 @@ xfs_destroy_ioend(
143 150
144 bio_put(bio); 151 bio_put(bio);
145 } 152 }
146
147 mempool_free(ioend, xfs_ioend_pool);
148} 153}
149 154
150/* 155/*
@@ -218,7 +223,8 @@ xfs_setfilesize(
218 223
219STATIC int 224STATIC int
220xfs_setfilesize_ioend( 225xfs_setfilesize_ioend(
221 struct xfs_ioend *ioend) 226 struct xfs_ioend *ioend,
227 int error)
222{ 228{
223 struct xfs_inode *ip = XFS_I(ioend->io_inode); 229 struct xfs_inode *ip = XFS_I(ioend->io_inode);
224 struct xfs_trans *tp = ioend->io_append_trans; 230 struct xfs_trans *tp = ioend->io_append_trans;
@@ -232,53 +238,32 @@ xfs_setfilesize_ioend(
232 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 238 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
233 239
234 /* we abort the update if there was an IO error */ 240 /* we abort the update if there was an IO error */
235 if (ioend->io_error) { 241 if (error) {
236 xfs_trans_cancel(tp); 242 xfs_trans_cancel(tp);
237 return ioend->io_error; 243 return error;
238 } 244 }
239 245
240 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 246 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
241} 247}
242 248
243/* 249/*
244 * Schedule IO completion handling on the final put of an ioend.
245 *
246 * If there is no work to do we might as well call it a day and free the
247 * ioend right now.
248 */
249STATIC void
250xfs_finish_ioend(
251 struct xfs_ioend *ioend)
252{
253 if (atomic_dec_and_test(&ioend->io_remaining)) {
254 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
255
256 if (ioend->io_type == XFS_IO_UNWRITTEN)
257 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
258 else if (ioend->io_append_trans)
259 queue_work(mp->m_data_workqueue, &ioend->io_work);
260 else
261 xfs_destroy_ioend(ioend);
262 }
263}
264
265/*
266 * IO write completion. 250 * IO write completion.
267 */ 251 */
268STATIC void 252STATIC void
269xfs_end_io( 253xfs_end_io(
270 struct work_struct *work) 254 struct work_struct *work)
271{ 255{
272 xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); 256 struct xfs_ioend *ioend =
273 struct xfs_inode *ip = XFS_I(ioend->io_inode); 257 container_of(work, struct xfs_ioend, io_work);
274 int error = 0; 258 struct xfs_inode *ip = XFS_I(ioend->io_inode);
259 int error = ioend->io_bio->bi_error;
275 260
276 /* 261 /*
277 * Set an error if the mount has shut down and proceed with end I/O 262 * Set an error if the mount has shut down and proceed with end I/O
278 * processing so it can perform whatever cleanups are necessary. 263 * processing so it can perform whatever cleanups are necessary.
279 */ 264 */
280 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 265 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
281 ioend->io_error = -EIO; 266 error = -EIO;
282 267
283 /* 268 /*
284 * For unwritten extents we need to issue transactions to convert a 269 * For unwritten extents we need to issue transactions to convert a
@@ -288,50 +273,33 @@ xfs_end_io(
288 * on error. 273 * on error.
289 */ 274 */
290 if (ioend->io_type == XFS_IO_UNWRITTEN) { 275 if (ioend->io_type == XFS_IO_UNWRITTEN) {
291 if (ioend->io_error) 276 if (error)
292 goto done; 277 goto done;
293 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 278 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
294 ioend->io_size); 279 ioend->io_size);
295 } else if (ioend->io_append_trans) { 280 } else if (ioend->io_append_trans) {
296 error = xfs_setfilesize_ioend(ioend); 281 error = xfs_setfilesize_ioend(ioend, error);
297 } else { 282 } else {
298 ASSERT(!xfs_ioend_is_append(ioend)); 283 ASSERT(!xfs_ioend_is_append(ioend));
299 } 284 }
300 285
301done: 286done:
302 if (error) 287 xfs_destroy_ioend(ioend, error);
303 ioend->io_error = error;
304 xfs_destroy_ioend(ioend);
305} 288}
306 289
307/* 290STATIC void
308 * Allocate and initialise an IO completion structure. 291xfs_end_bio(
309 * We need to track unwritten extent write completion here initially. 292 struct bio *bio)
310 * We'll need to extend this for updating the ondisk inode size later
311 * (vs. incore size).
312 */
313STATIC xfs_ioend_t *
314xfs_alloc_ioend(
315 struct inode *inode,
316 unsigned int type)
317{ 293{
318 xfs_ioend_t *ioend; 294 struct xfs_ioend *ioend = bio->bi_private;
319 295 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
320 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
321 memset(ioend, 0, sizeof(*ioend));
322 296
323 /* 297 if (ioend->io_type == XFS_IO_UNWRITTEN)
324 * Set the count to 1 initially, which will prevent an I/O 298 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
325 * completion callback from happening before we have started 299 else if (ioend->io_append_trans)
326 * all the I/O from calling the completion routine too early. 300 queue_work(mp->m_data_workqueue, &ioend->io_work);
327 */ 301 else
328 atomic_set(&ioend->io_remaining, 1); 302 xfs_destroy_ioend(ioend, bio->bi_error);
329 INIT_LIST_HEAD(&ioend->io_list);
330 ioend->io_type = type;
331 ioend->io_inode = inode;
332 INIT_WORK(&ioend->io_work, xfs_end_io);
333 spin_lock_init(&ioend->io_lock);
334 return ioend;
335} 303}
336 304
337STATIC int 305STATIC int
@@ -403,56 +371,6 @@ xfs_imap_valid(
403 offset < imap->br_startoff + imap->br_blockcount; 371 offset < imap->br_startoff + imap->br_blockcount;
404} 372}
405 373
406/*
407 * BIO completion handler for buffered IO.
408 */
409STATIC void
410xfs_end_bio(
411 struct bio *bio)
412{
413 struct xfs_ioend *ioend = bio->bi_private;
414 unsigned long flags;
415
416 bio->bi_private = NULL;
417 bio->bi_end_io = NULL;
418
419 spin_lock_irqsave(&ioend->io_lock, flags);
420 if (!ioend->io_error)
421 ioend->io_error = bio->bi_error;
422 if (!ioend->io_bio_done)
423 ioend->io_bio_done = bio;
424 else
425 ioend->io_bio_done_tail->bi_private = bio;
426 ioend->io_bio_done_tail = bio;
427 spin_unlock_irqrestore(&ioend->io_lock, flags);
428
429 xfs_finish_ioend(ioend);
430}
431
432STATIC void
433xfs_submit_ioend_bio(
434 struct writeback_control *wbc,
435 xfs_ioend_t *ioend,
436 struct bio *bio)
437{
438 atomic_inc(&ioend->io_remaining);
439 bio->bi_private = ioend;
440 bio->bi_end_io = xfs_end_bio;
441 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
442}
443
444STATIC struct bio *
445xfs_alloc_ioend_bio(
446 struct buffer_head *bh)
447{
448 struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
449
450 ASSERT(bio->bi_private == NULL);
451 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
452 bio->bi_bdev = bh->b_bdev;
453 return bio;
454}
455
456STATIC void 374STATIC void
457xfs_start_buffer_writeback( 375xfs_start_buffer_writeback(
458 struct buffer_head *bh) 376 struct buffer_head *bh)
@@ -513,16 +431,19 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
513STATIC int 431STATIC int
514xfs_submit_ioend( 432xfs_submit_ioend(
515 struct writeback_control *wbc, 433 struct writeback_control *wbc,
516 xfs_ioend_t *ioend, 434 struct xfs_ioend *ioend,
517 int status) 435 int status)
518{ 436{
519 /* Reserve log space if we might write beyond the on-disk inode size. */ 437 /* Reserve log space if we might write beyond the on-disk inode size. */
520 if (!status && 438 if (!status &&
521 ioend->io_bio && ioend->io_type != XFS_IO_UNWRITTEN && 439 ioend->io_type != XFS_IO_UNWRITTEN &&
522 xfs_ioend_is_append(ioend) && 440 xfs_ioend_is_append(ioend) &&
523 !ioend->io_append_trans) 441 !ioend->io_append_trans)
524 status = xfs_setfilesize_trans_alloc(ioend); 442 status = xfs_setfilesize_trans_alloc(ioend);
525 443
444 ioend->io_bio->bi_private = ioend;
445 ioend->io_bio->bi_end_io = xfs_end_bio;
446
526 /* 447 /*
527 * If we are failing the IO now, just mark the ioend with an 448 * If we are failing the IO now, just mark the ioend with an
528 * error and finish it. This will run IO completion immediately 449 * error and finish it. This will run IO completion immediately
@@ -530,19 +451,75 @@ xfs_submit_ioend(
530 * time. 451 * time.
531 */ 452 */
532 if (status) { 453 if (status) {
533 if (ioend->io_bio) 454 ioend->io_bio->bi_error = status;
534 bio_put(ioend->io_bio); 455 bio_endio(ioend->io_bio);
535 ioend->io_error = status;
536 xfs_finish_ioend(ioend);
537 return status; 456 return status;
538 } 457 }
539 458
540 xfs_submit_ioend_bio(wbc, ioend, ioend->io_bio); 459 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
541 ioend->io_bio = NULL; 460 ioend->io_bio);
542 xfs_finish_ioend(ioend);
543 return 0; 461 return 0;
544} 462}
545 463
464static void
465xfs_init_bio_from_bh(
466 struct bio *bio,
467 struct buffer_head *bh)
468{
469 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
470 bio->bi_bdev = bh->b_bdev;
471}
472
473static struct xfs_ioend *
474xfs_alloc_ioend(
475 struct inode *inode,
476 unsigned int type,
477 xfs_off_t offset,
478 struct buffer_head *bh)
479{
480 struct xfs_ioend *ioend;
481 struct bio *bio;
482
483 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
484 xfs_init_bio_from_bh(bio, bh);
485
486 ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
487 INIT_LIST_HEAD(&ioend->io_list);
488 ioend->io_type = type;
489 ioend->io_inode = inode;
490 ioend->io_size = 0;
491 ioend->io_offset = offset;
492 INIT_WORK(&ioend->io_work, xfs_end_io);
493 ioend->io_append_trans = NULL;
494 ioend->io_bio = bio;
495 return ioend;
496}
497
498/*
499 * Allocate a new bio, and chain the old bio to the new one.
500 *
501 * Note that we have to do perform the chaining in this unintuitive order
502 * so that the bi_private linkage is set up in the right direction for the
503 * traversal in xfs_destroy_ioend().
504 */
505static void
506xfs_chain_bio(
507 struct xfs_ioend *ioend,
508 struct writeback_control *wbc,
509 struct buffer_head *bh)
510{
511 struct bio *new;
512
513 new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
514 xfs_init_bio_from_bh(new, bh);
515
516 bio_chain(ioend->io_bio, new);
517 bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
518 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
519 ioend->io_bio);
520 ioend->io_bio = new;
521}
522
546/* 523/*
547 * Test to see if we've been building up a completion structure for 524 * Test to see if we've been building up a completion structure for
548 * earlier buffers -- if so, we try to append to this ioend if we 525 * earlier buffers -- if so, we try to append to this ioend if we
@@ -564,19 +541,15 @@ xfs_add_to_ioend(
564 offset != wpc->ioend->io_offset + wpc->ioend->io_size) { 541 offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
565 if (wpc->ioend) 542 if (wpc->ioend)
566 list_add(&wpc->ioend->io_list, iolist); 543 list_add(&wpc->ioend->io_list, iolist);
567 wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type); 544 wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
568 wpc->ioend->io_offset = offset;
569 } 545 }
570 546
571retry: 547 /*
572 if (!wpc->ioend->io_bio) 548 * If the buffer doesn't fit into the bio we need to allocate a new
573 wpc->ioend->io_bio = xfs_alloc_ioend_bio(bh); 549 * one. This shouldn't happen more than once for a given buffer.
574 550 */
575 if (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size) { 551 while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
576 xfs_submit_ioend_bio(wbc, wpc->ioend, wpc->ioend->io_bio); 552 xfs_chain_bio(wpc->ioend, wbc, bh);
577 wpc->ioend->io_bio = NULL;
578 goto retry;
579 }
580 553
581 wpc->ioend->io_size += bh->b_size; 554 wpc->ioend->io_size += bh->b_size;
582 wpc->last_block = bh->b_blocknr; 555 wpc->last_block = bh->b_blocknr;
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 61a3dc3dbdf8..814aab790713 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,7 +18,7 @@
18#ifndef __XFS_AOPS_H__ 18#ifndef __XFS_AOPS_H__
19#define __XFS_AOPS_H__ 19#define __XFS_AOPS_H__
20 20
21extern mempool_t *xfs_ioend_pool; 21extern struct bio_set *xfs_ioend_bioset;
22 22
23/* 23/*
24 * Types of I/O for bmap clustering and I/O completion tracking. 24 * Types of I/O for bmap clustering and I/O completion tracking.
@@ -37,24 +37,19 @@ enum {
37 { XFS_IO_OVERWRITE, "overwrite" } 37 { XFS_IO_OVERWRITE, "overwrite" }
38 38
39/* 39/*
40 * xfs_ioend struct manages large extent writes for XFS. 40 * Structure for buffered I/O completions.
41 * It can manage several multi-page bio's at once.
42 */ 41 */
43typedef struct xfs_ioend { 42struct xfs_ioend {
44 struct list_head io_list; /* next ioend in chain */ 43 struct list_head io_list; /* next ioend in chain */
45 unsigned int io_type; /* delalloc / unwritten */ 44 unsigned int io_type; /* delalloc / unwritten */
46 int io_error; /* I/O error code */
47 atomic_t io_remaining; /* hold count */
48 struct inode *io_inode; /* file being written to */ 45 struct inode *io_inode; /* file being written to */
49 size_t io_size; /* size of the extent */ 46 size_t io_size; /* size of the extent */
50 xfs_off_t io_offset; /* offset in the file */ 47 xfs_off_t io_offset; /* offset in the file */
51 struct work_struct io_work; /* xfsdatad work queue */ 48 struct work_struct io_work; /* xfsdatad work queue */
52 struct xfs_trans *io_append_trans;/* xact. for size update */ 49 struct xfs_trans *io_append_trans;/* xact. for size update */
53 struct bio *io_bio; /* bio being built */ 50 struct bio *io_bio; /* bio being built */
54 struct bio *io_bio_done; /* bios completed */ 51 struct bio io_inline_bio; /* MUST BE LAST! */
55 struct bio *io_bio_done_tail; /* bios completed */ 52};
56 spinlock_t io_lock; /* for bio completion list */
57} xfs_ioend_t;
58 53
59extern const struct address_space_operations xfs_address_space_operations; 54extern const struct address_space_operations xfs_address_space_operations;
60 55
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d760934109b5..e52e9c1fd933 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -58,8 +58,7 @@
58#include <linux/parser.h> 58#include <linux/parser.h>
59 59
60static const struct super_operations xfs_super_operations; 60static const struct super_operations xfs_super_operations;
61static kmem_zone_t *xfs_ioend_zone; 61struct bio_set *xfs_ioend_bioset;
62mempool_t *xfs_ioend_pool;
63 62
64static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 63static struct kset *xfs_kset; /* top-level xfs sysfs dir */
65#ifdef DEBUG 64#ifdef DEBUG
@@ -1688,20 +1687,15 @@ MODULE_ALIAS_FS("xfs");
1688STATIC int __init 1687STATIC int __init
1689xfs_init_zones(void) 1688xfs_init_zones(void)
1690{ 1689{
1691 1690 xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
1692 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); 1691 offsetof(struct xfs_ioend, io_inline_bio));
1693 if (!xfs_ioend_zone) 1692 if (!xfs_ioend_bioset)
1694 goto out; 1693 goto out;
1695 1694
1696 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
1697 xfs_ioend_zone);
1698 if (!xfs_ioend_pool)
1699 goto out_destroy_ioend_zone;
1700
1701 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), 1695 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
1702 "xfs_log_ticket"); 1696 "xfs_log_ticket");
1703 if (!xfs_log_ticket_zone) 1697 if (!xfs_log_ticket_zone)
1704 goto out_destroy_ioend_pool; 1698 goto out_free_ioend_bioset;
1705 1699
1706 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), 1700 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
1707 "xfs_bmap_free_item"); 1701 "xfs_bmap_free_item");
@@ -1797,10 +1791,8 @@ xfs_init_zones(void)
1797 kmem_zone_destroy(xfs_bmap_free_item_zone); 1791 kmem_zone_destroy(xfs_bmap_free_item_zone);
1798 out_destroy_log_ticket_zone: 1792 out_destroy_log_ticket_zone:
1799 kmem_zone_destroy(xfs_log_ticket_zone); 1793 kmem_zone_destroy(xfs_log_ticket_zone);
1800 out_destroy_ioend_pool: 1794 out_free_ioend_bioset:
1801 mempool_destroy(xfs_ioend_pool); 1795 bioset_free(xfs_ioend_bioset);
1802 out_destroy_ioend_zone:
1803 kmem_zone_destroy(xfs_ioend_zone);
1804 out: 1796 out:
1805 return -ENOMEM; 1797 return -ENOMEM;
1806} 1798}
@@ -1826,9 +1818,7 @@ xfs_destroy_zones(void)
1826 kmem_zone_destroy(xfs_btree_cur_zone); 1818 kmem_zone_destroy(xfs_btree_cur_zone);
1827 kmem_zone_destroy(xfs_bmap_free_item_zone); 1819 kmem_zone_destroy(xfs_bmap_free_item_zone);
1828 kmem_zone_destroy(xfs_log_ticket_zone); 1820 kmem_zone_destroy(xfs_log_ticket_zone);
1829 mempool_destroy(xfs_ioend_pool); 1821 bioset_free(xfs_ioend_bioset);
1830 kmem_zone_destroy(xfs_ioend_zone);
1831
1832} 1822}
1833 1823
1834STATIC int __init 1824STATIC int __init