aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-09-30 00:45:02 -0400
committerAlex Elder <aelder@sgi.com>2011-10-11 22:15:08 -0400
commit3815832a2aa4df9815d15dac05227e0c8551833f (patch)
treef92c98e6e73d3d8e6da7909b1ebd8e7cf4888df9 /fs/xfs/xfs_buf.c
parentddc3415aba1cb2f86d1fcad720cea834ee178f54 (diff)
xfs: Don't allocate new buffers on every call to _xfs_buf_find
Stats show that for an 8-way unlink @ ~80,000 unlinks/s we are doing ~1 million cache hit lookups to ~3000 buffer creates. That's almost 3 orders of magnitude more cahce hits than misses, so optimising for cache hits is quite important. In the cache hit case, we do not need to allocate a new buffer in case of a cache miss, so we are effectively hitting the allocator for no good reason for vast the majority of calls to _xfs_buf_find. 8-way create workloads are showing similar cache hit/miss ratios. The result is profiles that look like this: samples pcnt function DSO _______ _____ _______________________________ _________________ 1036.00 10.0% _xfs_buf_find [kernel.kallsyms] 582.00 5.6% kmem_cache_alloc [kernel.kallsyms] 519.00 5.0% __memcpy [kernel.kallsyms] 468.00 4.5% __ticket_spin_lock [kernel.kallsyms] 388.00 3.7% kmem_cache_free [kernel.kallsyms] 331.00 3.2% xfs_log_commit_cil [kernel.kallsyms] Further, there is a fair bit of work involved in initialising a new buffer once a cache miss has occurred and we currently do that under the rbtree spinlock. That increases spinlock hold time on what are heavily used trees. To fix this, remove the initialisation of the buffer from _xfs_buf_find() and only allocate the new buffer once we've had a cache miss. Initialise the buffer immediately after allocating it in xfs_buf_get, too, so that is it ready for insert if we get another cache miss after allocation. This minimises lock hold time and avoids unnecessary allocator churn. The resulting profiles look like: samples pcnt function DSO _______ _____ ___________________________ _________________ 8111.00 9.1% _xfs_buf_find [kernel.kallsyms] 4380.00 4.9% __memcpy [kernel.kallsyms] 4341.00 4.8% __ticket_spin_lock [kernel.kallsyms] 3401.00 3.8% kmem_cache_alloc [kernel.kallsyms] 2856.00 3.2% xfs_log_commit_cil [kernel.kallsyms] 2625.00 2.9% __kmalloc [kernel.kallsyms] 2380.00 2.7% kfree [kernel.kallsyms] 2016.00 2.3% kmem_cache_free [kernel.kallsyms] Showing a significant reduction in time spent doing allocation and freeing from slabs (kmem_cache_alloc and kmem_cache_free). Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r--fs/xfs/xfs_buf.c48
1 files changed, 28 insertions, 20 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e3af85095ddb..6785b7bd952d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -477,8 +477,6 @@ _xfs_buf_find(
477 477
478 /* No match found */ 478 /* No match found */
479 if (new_bp) { 479 if (new_bp) {
480 _xfs_buf_initialize(new_bp, btp, range_base,
481 range_length, flags);
482 rb_link_node(&new_bp->b_rbnode, parent, rbp); 480 rb_link_node(&new_bp->b_rbnode, parent, rbp);
483 rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); 481 rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
484 /* the buffer keeps the perag reference until it is freed */ 482 /* the buffer keeps the perag reference until it is freed */
@@ -521,35 +519,53 @@ found:
521} 519}
522 520
523/* 521/*
524 * Assembles a buffer covering the specified range. 522 * Assembles a buffer covering the specified range. The code is optimised for
525 * Storage in memory for all portions of the buffer will be allocated, 523 * cache hits, as metadata intensive workloads will see 3 orders of magnitude
526 * although backing storage may not be. 524 * more hits than misses.
527 */ 525 */
528xfs_buf_t * 526struct xfs_buf *
529xfs_buf_get( 527xfs_buf_get(
530 xfs_buftarg_t *target,/* target for buffer */ 528 xfs_buftarg_t *target,/* target for buffer */
531 xfs_off_t ioff, /* starting offset of range */ 529 xfs_off_t ioff, /* starting offset of range */
532 size_t isize, /* length of range */ 530 size_t isize, /* length of range */
533 xfs_buf_flags_t flags) 531 xfs_buf_flags_t flags)
534{ 532{
535 xfs_buf_t *bp, *new_bp; 533 struct xfs_buf *bp;
534 struct xfs_buf *new_bp;
536 int error = 0; 535 int error = 0;
537 536
537 bp = _xfs_buf_find(target, ioff, isize, flags, NULL);
538 if (likely(bp))
539 goto found;
540
538 new_bp = xfs_buf_allocate(flags); 541 new_bp = xfs_buf_allocate(flags);
539 if (unlikely(!new_bp)) 542 if (unlikely(!new_bp))
540 return NULL; 543 return NULL;
541 544
545 _xfs_buf_initialize(new_bp, target,
546 ioff << BBSHIFT, isize << BBSHIFT, flags);
547
542 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); 548 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
549 if (!bp) {
550 xfs_buf_deallocate(new_bp);
551 return NULL;
552 }
553
543 if (bp == new_bp) { 554 if (bp == new_bp) {
544 error = xfs_buf_allocate_memory(bp, flags); 555 error = xfs_buf_allocate_memory(bp, flags);
545 if (error) 556 if (error)
546 goto no_buffer; 557 goto no_buffer;
547 } else { 558 } else
548 xfs_buf_deallocate(new_bp); 559 xfs_buf_deallocate(new_bp);
549 if (unlikely(bp == NULL))
550 return NULL;
551 }
552 560
561 /*
562 * Now we have a workable buffer, fill in the block number so
563 * that we can do IO on it.
564 */
565 bp->b_bn = ioff;
566 bp->b_count_desired = bp->b_buffer_length;
567
568found:
553 if (!(bp->b_flags & XBF_MAPPED)) { 569 if (!(bp->b_flags & XBF_MAPPED)) {
554 error = _xfs_buf_map_pages(bp, flags); 570 error = _xfs_buf_map_pages(bp, flags);
555 if (unlikely(error)) { 571 if (unlikely(error)) {
@@ -560,18 +576,10 @@ xfs_buf_get(
560 } 576 }
561 577
562 XFS_STATS_INC(xb_get); 578 XFS_STATS_INC(xb_get);
563
564 /*
565 * Always fill in the block number now, the mapped cases can do
566 * their own overlay of this later.
567 */
568 bp->b_bn = ioff;
569 bp->b_count_desired = bp->b_buffer_length;
570
571 trace_xfs_buf_get(bp, flags, _RET_IP_); 579 trace_xfs_buf_get(bp, flags, _RET_IP_);
572 return bp; 580 return bp;
573 581
574 no_buffer: 582no_buffer:
575 if (flags & (XBF_LOCK | XBF_TRYLOCK)) 583 if (flags & (XBF_LOCK | XBF_TRYLOCK))
576 xfs_buf_unlock(bp); 584 xfs_buf_unlock(bp);
577 xfs_buf_rele(bp); 585 xfs_buf_rele(bp);