From 375ec69d2ef6e0797f19f5823e36e249765c3d41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 08:28:03 +0000 Subject: xfs: remove delwri buffer handling from xfs_buf_iorequest We cannot ever reach xfs_buf_iorequest for a buffer with XBF_DELWRI set, given that all write handlers make sure that the buffer is remove from the delwri queue before, and we never do reads with the XBF_DELWRI flag set (which the code would not handle correctly anyway). Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c57836dc778f..2e71a26da22e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1275,15 +1275,10 @@ xfs_buf_iorequest( { trace_xfs_buf_iorequest(bp, _RET_IP_); - if (bp->b_flags & XBF_DELWRI) { - xfs_buf_delwri_queue(bp, 1); - return 0; - } + ASSERT(!(bp->b_flags & XBF_DELWRI)); - if (bp->b_flags & XBF_WRITE) { + if (bp->b_flags & XBF_WRITE) xfs_buf_wait_unpin(bp); - } - xfs_buf_hold(bp); /* Set the count to 1 initially, this will stop an I/O -- cgit v1.2.2 From 527cfdf19dd538a5a9e46b9bed0f30a38c28438d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 08:28:04 +0000 Subject: xfs: remove the unlock argument to xfs_buf_delwri_queue We can just unlock the buffer in the caller, and the decrement of b_hold would also be needed in the !unlock, we just never hit that case currently given that the caller handles that case. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 2e71a26da22e..04689dbbcbba 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -43,7 +43,7 @@ static kmem_zone_t *xfs_buf_zone; STATIC int xfsbufd(void *); -STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); +STATIC void xfs_buf_delwri_queue(xfs_buf_t *); static struct workqueue_struct *xfslogd_workqueue; struct workqueue_struct *xfsdatad_workqueue; @@ -940,7 +940,7 @@ xfs_buf_unlock( if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { atomic_inc(&bp->b_hold); bp->b_flags |= XBF_ASYNC; - xfs_buf_delwri_queue(bp, 0); + xfs_buf_delwri_queue(bp); } XB_CLEAR_OWNER(bp); @@ -1049,7 +1049,8 @@ xfs_bdwrite( bp->b_flags &= ~XBF_READ; bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); - xfs_buf_delwri_queue(bp, 1); + xfs_buf_delwri_queue(bp); + xfs_buf_unlock(bp); } /* @@ -1562,8 +1563,7 @@ error: */ STATIC void xfs_buf_delwri_queue( - xfs_buf_t *bp, - int unlock) + xfs_buf_t *bp) { struct list_head *dwq = &bp->b_target->bt_delwrite_queue; spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; @@ -1576,8 +1576,7 @@ xfs_buf_delwri_queue( /* If already in the queue, dequeue and place at tail */ if (!list_empty(&bp->b_list)) { ASSERT(bp->b_flags & _XBF_DELWRI_Q); - if (unlock) - atomic_dec(&bp->b_hold); + atomic_dec(&bp->b_hold); list_del(&bp->b_list); } @@ -1590,9 +1589,6 @@ xfs_buf_delwri_queue( list_add_tail(&bp->b_list, dwq); bp->b_queuetime = jiffies; spin_unlock(dwlk); - - if (unlock) - xfs_buf_unlock(bp); } void -- cgit v1.2.2 From 5a8ee6bafdd0ab8555adceac8b2cec539a552a1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 08:28:05 +0000 Subject: xfs: move more delwri setup into xfs_buf_delwri_queue Do not transfer a reference held by the caller to the buffer on the list, or decrement it in xfs_buf_delwri_queue, but instead grab a new reference if needed, and let the caller drop its own reference. Also move setting of the XBF_DELWRI and XBF_ASYNC flags into xfs_buf_delwri_queue, and only do it if needed. Note that for now xfs_buf_unlock already has XBF_DELWRI, but that will change in the following patches. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 04689dbbcbba..86c0945053c9 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -937,11 +937,8 @@ void xfs_buf_unlock( struct xfs_buf *bp) { - if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { - atomic_inc(&bp->b_hold); - bp->b_flags |= XBF_ASYNC; + if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) xfs_buf_delwri_queue(bp); - } XB_CLEAR_OWNER(bp); up(&bp->b_sema); @@ -1046,11 +1043,8 @@ xfs_bdwrite( { trace_xfs_buf_bdwrite(bp, _RET_IP_); - bp->b_flags &= ~XBF_READ; - bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); - xfs_buf_delwri_queue(bp); - xfs_buf_unlock(bp); + xfs_buf_relse(bp); } /* @@ -1570,23 +1564,22 @@ xfs_buf_delwri_queue( trace_xfs_buf_delwri_queue(bp, _RET_IP_); - ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); + ASSERT(!(bp->b_flags & XBF_READ)); spin_lock(dwlk); - /* If already in the queue, dequeue and place at tail */ if (!list_empty(&bp->b_list)) { + /* if already in the queue, move it to the tail */ ASSERT(bp->b_flags & _XBF_DELWRI_Q); - atomic_dec(&bp->b_hold); - list_del(&bp->b_list); - } - - if (list_empty(dwq)) { + list_move_tail(&bp->b_list, dwq); + } else { /* start xfsbufd as it is about to have something to do */ - wake_up_process(bp->b_target->bt_task); - } + if (list_empty(dwq)) + wake_up_process(bp->b_target->bt_task); - bp->b_flags |= _XBF_DELWRI_Q; - list_add_tail(&bp->b_list, dwq); + atomic_inc(&bp->b_hold); + bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC; + list_add_tail(&bp->b_list, dwq); + } bp->b_queuetime = jiffies; spin_unlock(dwlk); } -- cgit v1.2.2 From 61551f1ee536289084a4a8f1c4f187e2f371c440 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 08:28:06 +0000 Subject: xfs: call xfs_buf_delwri_queue directly Unify the ways we add buffers to the delwri queue by always calling xfs_buf_delwri_queue directly. The xfs_bdwrite functions is removed and opencoded in its callers, and the two places setting XBF_DELWRI while a buffer is locked and expecting xfs_buf_unlock to pick it up are converted to call xfs_buf_delwri_queue directly, too. Also replace the XFS_BUF_UNDELAYWRITE macro with direct calls to xfs_buf_delwri_dequeue to make the explicit queuing/dequeuing more obvious. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 86c0945053c9..309eca75fad4 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -43,7 +43,6 @@ static kmem_zone_t *xfs_buf_zone; STATIC int xfsbufd(void *); -STATIC void xfs_buf_delwri_queue(xfs_buf_t *); static struct workqueue_struct *xfslogd_workqueue; struct workqueue_struct *xfsdatad_workqueue; @@ -937,9 +936,6 @@ void xfs_buf_unlock( struct xfs_buf *bp) { - if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) - xfs_buf_delwri_queue(bp); - XB_CLEAR_OWNER(bp); up(&bp->b_sema); @@ -1036,17 +1032,6 @@ xfs_bwrite( return error; } -void -xfs_bdwrite( - void *mp, - struct xfs_buf *bp) -{ - trace_xfs_buf_bdwrite(bp, _RET_IP_); - - xfs_buf_delwri_queue(bp); - xfs_buf_relse(bp); -} - /* * Called when we want to stop a buffer from getting written or read. * We attach the EIO error, muck with its flags, and call xfs_buf_ioend @@ -1069,7 +1054,7 @@ xfs_bioerror( * We're calling xfs_buf_ioend, so delete XBF_DONE flag. */ XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); + xfs_buf_delwri_dequeue(bp); XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); @@ -1098,7 +1083,7 @@ xfs_bioerror_relse( * change that interface. */ XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); + xfs_buf_delwri_dequeue(bp); XFS_BUF_DONE(bp); XFS_BUF_STALE(bp); bp->b_iodone = NULL; @@ -1555,7 +1540,7 @@ error: /* * Delayed write buffer handling */ -STATIC void +void xfs_buf_delwri_queue( xfs_buf_t *bp) { -- cgit v1.2.2 From c2b006c1da1602551def200e4661535f02b82488 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 08:28:07 +0000 Subject: xfs: let xfs_bwrite callers handle the xfs_buf_relse Remove the xfs_buf_relse from xfs_bwrite and let the caller handle it to mirror the delwri and read paths. Also remove the mount pointer passed to xfs_bwrite, which is superflous now that we have a mount pointer in the buftarg. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 309eca75fad4..63dbeb9efc49 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1014,7 +1014,6 @@ xfs_buf_ioerror( int xfs_bwrite( - struct xfs_mount *mp, struct xfs_buf *bp) { int error; @@ -1026,9 +1025,10 @@ xfs_bwrite( xfs_bdstrat_cb(bp); error = xfs_buf_iowait(bp); - if (error) - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - xfs_buf_relse(bp); + if (error) { + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); + } return error; } -- cgit v1.2.2 From c4e1c098ee8a72ea563a697a2b175868be86fdc9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 08:28:08 +0000 Subject: xfs: use the "delwri" terminology consistently And also remove the strange local lock and delwri list pointers in a few functions. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 63dbeb9efc49..d3c2b58d7d70 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1489,12 +1489,12 @@ xfs_setsize_buftarg( } STATIC int -xfs_alloc_delwrite_queue( +xfs_alloc_delwri_queue( xfs_buftarg_t *btp, const char *fsname) { - INIT_LIST_HEAD(&btp->bt_delwrite_queue); - spin_lock_init(&btp->bt_delwrite_lock); + INIT_LIST_HEAD(&btp->bt_delwri_queue); + spin_lock_init(&btp->bt_delwri_lock); btp->bt_flags = 0; btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); if (IS_ERR(btp->bt_task)) @@ -1524,7 +1524,7 @@ xfs_alloc_buftarg( spin_lock_init(&btp->bt_lru_lock); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; - if (xfs_alloc_delwrite_queue(btp, fsname)) + if (xfs_alloc_delwri_queue(btp, fsname)) goto error; btp->bt_shrinker.shrink = xfs_buftarg_shrink; btp->bt_shrinker.seeks = DEFAULT_SEEKS; @@ -1544,46 +1544,44 @@ void xfs_buf_delwri_queue( xfs_buf_t *bp) { - struct list_head *dwq = &bp->b_target->bt_delwrite_queue; - spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; + struct xfs_buftarg *btp = bp->b_target; trace_xfs_buf_delwri_queue(bp, _RET_IP_); ASSERT(!(bp->b_flags & XBF_READ)); - spin_lock(dwlk); + spin_lock(&btp->bt_delwri_lock); if (!list_empty(&bp->b_list)) { /* if already in the queue, move it to the tail */ ASSERT(bp->b_flags & _XBF_DELWRI_Q); - list_move_tail(&bp->b_list, dwq); + list_move_tail(&bp->b_list, &btp->bt_delwri_queue); } else { /* start xfsbufd as it is about to have something to do */ - if (list_empty(dwq)) + if (list_empty(&btp->bt_delwri_queue)) wake_up_process(bp->b_target->bt_task); atomic_inc(&bp->b_hold); bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC; - list_add_tail(&bp->b_list, dwq); + list_add_tail(&bp->b_list, &btp->bt_delwri_queue); } bp->b_queuetime = jiffies; - spin_unlock(dwlk); + spin_unlock(&btp->bt_delwri_lock); } void xfs_buf_delwri_dequeue( xfs_buf_t *bp) { - spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; int dequeued = 0; - spin_lock(dwlk); + spin_lock(&bp->b_target->bt_delwri_lock); if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { ASSERT(bp->b_flags & _XBF_DELWRI_Q); list_del_init(&bp->b_list); dequeued = 1; } bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); - spin_unlock(dwlk); + spin_unlock(&bp->b_target->bt_delwri_lock); if (dequeued) xfs_buf_rele(bp); @@ -1615,9 +1613,9 @@ xfs_buf_delwri_promote( if (bp->b_queuetime < jiffies - age) return; bp->b_queuetime = jiffies - age; - spin_lock(&btp->bt_delwrite_lock); - list_move(&bp->b_list, &btp->bt_delwrite_queue); - spin_unlock(&btp->bt_delwrite_lock); + spin_lock(&btp->bt_delwri_lock); + list_move(&bp->b_list, &btp->bt_delwri_queue); + spin_unlock(&btp->bt_delwri_lock); } STATIC void @@ -1638,15 +1636,13 @@ xfs_buf_delwri_split( unsigned long age) { xfs_buf_t *bp, *n; - struct list_head *dwq = &target->bt_delwrite_queue; - spinlock_t *dwlk = &target->bt_delwrite_lock; int skipped = 0; int force; force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); INIT_LIST_HEAD(list); - spin_lock(dwlk); - list_for_each_entry_safe(bp, n, dwq, b_list) { + spin_lock(&target->bt_delwri_lock); + list_for_each_entry_safe(bp, n, &target->bt_delwri_queue, b_list) { ASSERT(bp->b_flags & XBF_DELWRI); if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { @@ -1663,10 +1659,9 @@ xfs_buf_delwri_split( } else skipped++; } - spin_unlock(dwlk); + spin_unlock(&target->bt_delwri_lock); return skipped; - } /* @@ -1716,7 +1711,7 @@ xfsbufd( } /* sleep for a long time if there is nothing to do. */ - if (list_empty(&target->bt_delwrite_queue)) + if (list_empty(&target->bt_delwri_queue)) tout = MAX_SCHEDULE_TIMEOUT; schedule_timeout_interruptible(tout); -- cgit v1.2.2 From eabbaf118239d0d4188298b52751040f3b4cc28f Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Thu, 8 Sep 2011 20:18:50 +0000 Subject: xfs: Fix the incorrect comment in the header of _xfs_buf_find Fix the incorrect comment in the header of the function _xfs_buf_find(). Signed-off-by: Chandra Seetharaman Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d3c2b58d7d70..e3af85095ddb 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -415,10 +415,7 @@ _xfs_buf_map_pages( /* * Look up, and creates if absent, a lockable buffer for * a given range of an inode. The buffer is returned - * locked. If other overlapping buffers exist, they are - * released before the new buffer is created and locked, - * which may imply that this call will block until those buffers - * are unlocked. No I/O is implied by this call. + * locked. No I/O is implied by this call. */ xfs_buf_t * _xfs_buf_find( -- cgit v1.2.2 From 3815832a2aa4df9815d15dac05227e0c8551833f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 30 Sep 2011 04:45:02 +0000 Subject: xfs: Don't allocate new buffers on every call to _xfs_buf_find Stats show that for an 8-way unlink @ ~80,000 unlinks/s we are doing ~1 million cache hit lookups to ~3000 buffer creates. That's almost 3 orders of magnitude more cahce hits than misses, so optimising for cache hits is quite important. In the cache hit case, we do not need to allocate a new buffer in case of a cache miss, so we are effectively hitting the allocator for no good reason for vast the majority of calls to _xfs_buf_find. 8-way create workloads are showing similar cache hit/miss ratios. The result is profiles that look like this: samples pcnt function DSO _______ _____ _______________________________ _________________ 1036.00 10.0% _xfs_buf_find [kernel.kallsyms] 582.00 5.6% kmem_cache_alloc [kernel.kallsyms] 519.00 5.0% __memcpy [kernel.kallsyms] 468.00 4.5% __ticket_spin_lock [kernel.kallsyms] 388.00 3.7% kmem_cache_free [kernel.kallsyms] 331.00 3.2% xfs_log_commit_cil [kernel.kallsyms] Further, there is a fair bit of work involved in initialising a new buffer once a cache miss has occurred and we currently do that under the rbtree spinlock. That increases spinlock hold time on what are heavily used trees. To fix this, remove the initialisation of the buffer from _xfs_buf_find() and only allocate the new buffer once we've had a cache miss. Initialise the buffer immediately after allocating it in xfs_buf_get, too, so that is it ready for insert if we get another cache miss after allocation. This minimises lock hold time and avoids unnecessary allocator churn. The resulting profiles look like: samples pcnt function DSO _______ _____ ___________________________ _________________ 8111.00 9.1% _xfs_buf_find [kernel.kallsyms] 4380.00 4.9% __memcpy [kernel.kallsyms] 4341.00 4.8% __ticket_spin_lock [kernel.kallsyms] 3401.00 3.8% kmem_cache_alloc [kernel.kallsyms] 2856.00 3.2% xfs_log_commit_cil [kernel.kallsyms] 2625.00 2.9% __kmalloc [kernel.kallsyms] 2380.00 2.7% kfree [kernel.kallsyms] 2016.00 2.3% kmem_cache_free [kernel.kallsyms] Showing a significant reduction in time spent doing allocation and freeing from slabs (kmem_cache_alloc and kmem_cache_free). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e3af85095ddb..6785b7bd952d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -477,8 +477,6 @@ _xfs_buf_find( /* No match found */ if (new_bp) { - _xfs_buf_initialize(new_bp, btp, range_base, - range_length, flags); rb_link_node(&new_bp->b_rbnode, parent, rbp); rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); /* the buffer keeps the perag reference until it is freed */ @@ -521,35 +519,53 @@ found: } /* - * Assembles a buffer covering the specified range. - * Storage in memory for all portions of the buffer will be allocated, - * although backing storage may not be. + * Assembles a buffer covering the specified range. The code is optimised for + * cache hits, as metadata intensive workloads will see 3 orders of magnitude + * more hits than misses. */ -xfs_buf_t * +struct xfs_buf * xfs_buf_get( xfs_buftarg_t *target,/* target for buffer */ xfs_off_t ioff, /* starting offset of range */ size_t isize, /* length of range */ xfs_buf_flags_t flags) { - xfs_buf_t *bp, *new_bp; + struct xfs_buf *bp; + struct xfs_buf *new_bp; int error = 0; + bp = _xfs_buf_find(target, ioff, isize, flags, NULL); + if (likely(bp)) + goto found; + new_bp = xfs_buf_allocate(flags); if (unlikely(!new_bp)) return NULL; + _xfs_buf_initialize(new_bp, target, + ioff << BBSHIFT, isize << BBSHIFT, flags); + bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); + if (!bp) { + xfs_buf_deallocate(new_bp); + return NULL; + } + if (bp == new_bp) { error = xfs_buf_allocate_memory(bp, flags); if (error) goto no_buffer; - } else { + } else xfs_buf_deallocate(new_bp); - if (unlikely(bp == NULL)) - return NULL; - } + /* + * Now we have a workable buffer, fill in the block number so + * that we can do IO on it. + */ + bp->b_bn = ioff; + bp->b_count_desired = bp->b_buffer_length; + +found: if (!(bp->b_flags & XBF_MAPPED)) { error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { @@ -560,18 +576,10 @@ xfs_buf_get( } XFS_STATS_INC(xb_get); - - /* - * Always fill in the block number now, the mapped cases can do - * their own overlay of this later. - */ - bp->b_bn = ioff; - bp->b_count_desired = bp->b_buffer_length; - trace_xfs_buf_get(bp, flags, _RET_IP_); return bp; - no_buffer: +no_buffer: if (flags & (XBF_LOCK | XBF_TRYLOCK)) xfs_buf_unlock(bp); xfs_buf_rele(bp); -- cgit v1.2.2 From b17b833443a3b65907f5ecb36f8af33996f6ec78 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Oct 2011 16:52:43 +0000 Subject: xfs: remove xfs_get_buftarg_list The code is unused and under a config option that doesn't exist, remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 6785b7bd952d..e0339a4a0bc8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1835,11 +1835,3 @@ xfs_buf_terminate(void) destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); } - -#ifdef CONFIG_KDB_MODULES -struct list_head * -xfs_get_buftarg_list(void) -{ - return &xfs_buftarg_list; -} -#endif -- cgit v1.2.2 From 5fde0326ddb1472ef31034c8ed952a19d4679191 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Oct 2011 16:52:44 +0000 Subject: xfs: remove XFS_BUF_FINISH_IOWAIT Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e0339a4a0bc8..36fed03da26f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1100,7 +1100,7 @@ xfs_bioerror_relse( * ASYNC buffers. */ xfs_buf_ioerror(bp, EIO); - XFS_BUF_FINISH_IOWAIT(bp); + complete(&bp->b_iowait); } else { xfs_buf_relse(bp); } -- cgit v1.2.2 From c867cb61641751fd3d86350232d64ae2a10137d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Oct 2011 16:52:46 +0000 Subject: xfs: remove XFS_BUF_STALE and XFS_BUF_SUPER_STALE Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 36fed03da26f..f88eab9e8144 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1061,7 +1061,7 @@ xfs_bioerror( XFS_BUF_UNREAD(bp); xfs_buf_delwri_dequeue(bp); XFS_BUF_UNDONE(bp); - XFS_BUF_STALE(bp); + xfs_buf_stale(bp); xfs_buf_ioend(bp, 0); @@ -1090,7 +1090,7 @@ xfs_bioerror_relse( XFS_BUF_UNREAD(bp); xfs_buf_delwri_dequeue(bp); XFS_BUF_DONE(bp); - XFS_BUF_STALE(bp); + xfs_buf_stale(bp); bp->b_iodone = NULL; if (!(fl & XBF_ASYNC)) { /* -- cgit v1.2.2 From af5c4bee499eb68bc36ca046030394d82d0e3669 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Oct 2011 16:52:47 +0000 Subject: xfs: remove buffers from the delwri list in xfs_buf_stale For each call to xfs_buf_stale we call xfs_buf_delwri_dequeue either directly before or after it, or are guaranteed by the surrounding conditionals that we are never called on delwri buffers. Simply this situation by moving the call to xfs_buf_delwri_dequeue into xfs_buf_stale. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index f88eab9e8144..3df7d0a2b245 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -151,6 +151,7 @@ xfs_buf_stale( struct xfs_buf *bp) { bp->b_flags |= XBF_STALE; + xfs_buf_delwri_dequeue(bp); atomic_set(&(bp)->b_lru_ref, 0); if (!list_empty(&bp->b_lru)) { struct xfs_buftarg *btp = bp->b_target; @@ -1059,7 +1060,6 @@ xfs_bioerror( * We're calling xfs_buf_ioend, so delete XBF_DONE flag. */ XFS_BUF_UNREAD(bp); - xfs_buf_delwri_dequeue(bp); XFS_BUF_UNDONE(bp); xfs_buf_stale(bp); @@ -1088,7 +1088,6 @@ xfs_bioerror_relse( * change that interface. */ XFS_BUF_UNREAD(bp); - xfs_buf_delwri_dequeue(bp); XFS_BUF_DONE(bp); xfs_buf_stale(bp); bp->b_iodone = NULL; -- cgit v1.2.2 From 4347b9d7ad4223474d315c3ab6bc1ce7cce7fa2d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Oct 2011 16:52:48 +0000 Subject: xfs: clean up buffer allocation Change _xfs_buf_initialize to allocate the buffer directly and rename it to xfs_buf_alloc now that is the only buffer allocation routine. Also remove the xfs_buf_deallocate wrapper around the kmem_zone_free calls for buffers. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 3df7d0a2b245..1f24ee5f0d7a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -65,10 +65,6 @@ struct workqueue_struct *xfsconvertd_workqueue; #define xb_to_km(flags) \ (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) -#define xfs_buf_allocate(flags) \ - kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) -#define xfs_buf_deallocate(bp) \ - kmem_zone_free(xfs_buf_zone, (bp)); static inline int xfs_buf_is_vmapped( @@ -167,14 +163,19 @@ xfs_buf_stale( ASSERT(atomic_read(&bp->b_hold) >= 1); } -STATIC void -_xfs_buf_initialize( - xfs_buf_t *bp, - xfs_buftarg_t *target, +struct xfs_buf * +xfs_buf_alloc( + struct xfs_buftarg *target, xfs_off_t range_base, size_t range_length, xfs_buf_flags_t flags) { + struct xfs_buf *bp; + + bp = kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)); + if (unlikely(!bp)) + return NULL; + /* * We don't want certain flags to appear in b_flags. */ @@ -203,8 +204,9 @@ _xfs_buf_initialize( init_waitqueue_head(&bp->b_waiters); XFS_STATS_INC(xb_create); - trace_xfs_buf_init(bp, _RET_IP_); + + return bp; } /* @@ -277,7 +279,7 @@ xfs_buf_free( } else if (bp->b_flags & _XBF_KMEM) kmem_free(bp->b_addr); _xfs_buf_free_pages(bp); - xfs_buf_deallocate(bp); + kmem_zone_free(xfs_buf_zone, bp); } /* @@ -539,16 +541,14 @@ xfs_buf_get( if (likely(bp)) goto found; - new_bp = xfs_buf_allocate(flags); + new_bp = xfs_buf_alloc(target, ioff << BBSHIFT, isize << BBSHIFT, + flags); if (unlikely(!new_bp)) return NULL; - _xfs_buf_initialize(new_bp, target, - ioff << BBSHIFT, isize << BBSHIFT, flags); - bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); if (!bp) { - xfs_buf_deallocate(new_bp); + kmem_zone_free(xfs_buf_zone, new_bp); return NULL; } @@ -557,7 +557,7 @@ xfs_buf_get( if (error) goto no_buffer; } else - xfs_buf_deallocate(new_bp); + kmem_zone_free(xfs_buf_zone, new_bp); /* * Now we have a workable buffer, fill in the block number so @@ -694,19 +694,6 @@ xfs_buf_read_uncached( return bp; } -xfs_buf_t * -xfs_buf_get_empty( - size_t len, - xfs_buftarg_t *target) -{ - xfs_buf_t *bp; - - bp = xfs_buf_allocate(0); - if (bp) - _xfs_buf_initialize(bp, target, 0, len, 0); - return bp; -} - /* * Return a buffer allocated as an empty buffer and associated to external * memory via xfs_buf_associate_memory() back to it's empty state. @@ -792,10 +779,9 @@ xfs_buf_get_uncached( int error, i; xfs_buf_t *bp; - bp = xfs_buf_allocate(0); + bp = xfs_buf_alloc(target, 0, len, 0); if (unlikely(bp == NULL)) goto fail; - _xfs_buf_initialize(bp, target, 0, len, 0); error = _xfs_buf_get_pages(bp, page_count, 0); if (error) @@ -823,7 +809,7 @@ xfs_buf_get_uncached( __free_page(bp->b_pages[i]); _xfs_buf_free_pages(bp); fail_free_buf: - xfs_buf_deallocate(bp); + kmem_zone_free(xfs_buf_zone, bp); fail: return NULL; } -- cgit v1.2.2 From 901796afca0d31d97bf6d1bf2ab251a93a4b8c83 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Oct 2011 16:52:49 +0000 Subject: xfs: clean up xfs_ioerror_alert Instead of passing the block number and mount structure explicitly get them off the bp and fix make the argument order more natural. Also move it to xfs_buf.c and stop printing the device name given that we already get the fs name as part of xfs_alert, and we know what device is operates on because of the caller that gets printed, finally rename it to xfs_buf_ioerror_alert and pass __func__ as argument where it makes sense. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1f24ee5f0d7a..0a767fca0305 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1004,6 +1004,17 @@ xfs_buf_ioerror( trace_xfs_buf_ioerror(bp, error, _RET_IP_); } +void +xfs_buf_ioerror_alert( + struct xfs_buf *bp, + const char *func) +{ + xfs_alert(bp->b_target->bt_mount, +"metadata I/O error: block 0x%llx (\"%s\") error %d buf count %zd", + (__uint64_t)XFS_BUF_ADDR(bp), func, + bp->b_error, XFS_BUF_COUNT(bp)); +} + int xfs_bwrite( struct xfs_buf *bp) -- cgit v1.2.2 From 02b102df1502a7ea4167d115510e1e8fe6467f12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Oct 2011 16:52:51 +0000 Subject: xfs: remove xfs_buf_target_name The calling convention that returns a pointer to a static buffer is fairly nasty, so just opencode it in the only caller that is left. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 0a767fca0305..6f615c259411 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1457,9 +1457,13 @@ xfs_setsize_buftarg_flags( btp->bt_smask = sectorsize - 1; if (set_blocksize(btp->bt_bdev, sectorsize)) { + char name[BDEVNAME_SIZE]; + + bdevname(btp->bt_bdev, name); + xfs_warn(btp->bt_mount, "Cannot set_blocksize to %u on device %s\n", - sectorsize, xfs_buf_target_name(btp)); + sectorsize, name); return EINVAL; } -- cgit v1.2.2 From 5a93a064d27b42e4af1772b0599b53e3241191ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Oct 2011 16:52:53 +0000 Subject: xfs: do not flush data workqueues in xfs_flush_buftarg When we call xfs_flush_buftarg (generally from sync or umount) it already is too late to flush the data workqueues, as I/O completion is signalled for them and we are thus already done with the data we would flush here. There are places where flushing them might be useful, but the current sync interface doesn't give us that opportunity. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_buf.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 6f615c259411..cf0ac056815f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1623,13 +1623,6 @@ xfs_buf_delwri_promote( spin_unlock(&btp->bt_delwri_lock); } -STATIC void -xfs_buf_runall_queues( - struct workqueue_struct *queue) -{ - flush_workqueue(queue); -} - /* * Move as many buffers as specified to the supplied list * idicating if we skipped any buffers to prevent deadlocks. @@ -1752,9 +1745,7 @@ xfs_flush_buftarg( LIST_HEAD(wait_list); struct blk_plug plug; - xfs_buf_runall_queues(xfsconvertd_workqueue); - xfs_buf_runall_queues(xfsdatad_workqueue); - xfs_buf_runall_queues(xfslogd_workqueue); + flush_workqueue(xfslogd_workqueue); set_bit(XBT_FORCE_FLUSH, &target->bt_flags); pincount = xfs_buf_delwri_split(target, &tmp_list, 0); -- cgit v1.2.2