aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.h
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-01-07 08:02:23 -0500
committerAlex Elder <aelder@sgi.com>2011-01-11 21:28:42 -0500
commitbfc60177f8ab509bc225becbb58f7e53a0e33e81 (patch)
treed5475852558efd0e06c698f4d64926581134b0fb /fs/xfs/linux-2.6/xfs_buf.h
parenta46db60834883c1c8c665d7fcc7b4ab66f5966fc (diff)
xfs: fix error handling for synchronous writes
If we get an IO error on a synchronous superblock write, we attach an error release function to it so that when the last reference goes away the release function is called and the buffer is invalidated and unlocked. The buffer is left locked until the release function is called so that other concurrent users of the buffer will be locked out until the buffer error is fully processed. Unfortunately, for the superblock buffer the filesyetm itself holds a reference to the buffer which prevents the reference count from dropping to zero and the release function being called. As a result, once an IO error occurs on a sync write, the buffer will never be unlocked and all future attempts to lock the buffer will hang. To make matters worse, this problems is not unique to such buffers; if there is a concurrent _xfs_buf_find() running, the lookup will grab a reference to the buffer and then wait on the buffer lock, preventing the reference count from ever falling to zero and hence unlocking the buffer. As such, the whole b_relse function implementation is broken because it cannot rely on the buffer reference count falling to zero to unlock the errored buffer. The synchronous write error path is the only path that uses this callback - it is used to ensure that the synchronous waiter gets the buffer error before the error state is cleared from the buffer by the release function. Given that the only sychronous buffer writes now go through xfs_bwrite and the error path in question can only occur for a write of a dirty, logged buffer, we can move most of the b_relse processing to happen inline in xfs_buf_iodone_callbacks, just like a normal I/O completion. In addition to that we make sure the error is not cleared in xfs_buf_iodone_callbacks, so that xfs_bwrite can reliably check it. Given that xfs_bwrite keeps the buffer locked until it has waited for it and checked the error this allows to reliably propagate the error to the caller, and make sure that the buffer is reliably unlocked. Given that xfs_buf_iodone_callbacks was the only instance of the b_relse callback we can remove it entirely. Based on earlier patches by Dave Chinner and Ajeet Yadav. Signed-off-by: Christoph Hellwig <hch@lst.de> Reported-by: Ajeet Yadav <ajeet.yadav.77@gmail.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.h')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h7
1 files changed, 1 insertions, 6 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a76c2428faf..cbe65950e52 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -152,8 +152,6 @@ typedef struct xfs_buftarg {
152 152
153struct xfs_buf; 153struct xfs_buf;
154typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); 154typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
155typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
156typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
157 155
158#define XB_PAGES 2 156#define XB_PAGES 2
159 157
@@ -183,7 +181,6 @@ typedef struct xfs_buf {
183 void *b_addr; /* virtual address of buffer */ 181 void *b_addr; /* virtual address of buffer */
184 struct work_struct b_iodone_work; 182 struct work_struct b_iodone_work;
185 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 183 xfs_buf_iodone_t b_iodone; /* I/O completion function */
186 xfs_buf_relse_t b_relse; /* releasing function */
187 struct completion b_iowait; /* queue for I/O waiters */ 184 struct completion b_iowait; /* queue for I/O waiters */
188 void *b_fspriv; 185 void *b_fspriv;
189 void *b_fspriv2; 186 void *b_fspriv2;
@@ -323,7 +320,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
323#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) 320#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
324#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) 321#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
325#define XFS_BUF_SET_START(bp) do { } while (0) 322#define XFS_BUF_SET_START(bp) do { } while (0)
326#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
327 323
328#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) 324#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
329#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) 325#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt)
@@ -360,8 +356,7 @@ xfs_buf_set_ref(
360 356
361static inline void xfs_buf_relse(xfs_buf_t *bp) 357static inline void xfs_buf_relse(xfs_buf_t *bp)
362{ 358{
363 if (!bp->b_relse) 359 xfs_buf_unlock(bp);
364 xfs_buf_unlock(bp);
365 xfs_buf_rele(bp); 360 xfs_buf_rele(bp);
366} 361}
367 362