diff options
author | Carlos Maiolino <cmaiolino@redhat.com> | 2012-08-10 14:01:51 -0400 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2012-08-24 14:46:10 -0400 |
commit | e599b3253c5e49f7a2a579eef2fc2aa066989ef5 (patch) | |
tree | b54f738a2ab484b0d86a78e6cf8edf7cec4c9530 /fs | |
parent | 643bfc061c47e9c7661324a09fb0a0bc6601e5d6 (diff) |
xfs: fix race while discarding buffers [V4]
While xfs_buftarg_shrink() is freeing buffers from the dispose list (filled with
buffers from lru list), there is a possibility to have xfs_buf_stale() racing
with it, and removing buffers from dispose list before xfs_buftarg_shrink() does
it.
This happens because xfs_buftarg_shrink() handle the dispose list without
locking and the test condition in xfs_buf_stale() checks for the buffer being in
*any* list:
if (!list_empty(&bp->b_lru))
If the buffer happens to be on dispose list, this causes the buffer counter of
lru list (btp->bt_lru_nr) to be decremented twice (once in xfs_buftarg_shrink()
and another in xfs_buf_stale()) causing a wrong account usage of the lru list.
This may cause xfs_buftarg_shrink() to return a wrong value to the memory
shrinker shrink_slab(), and such account error may also cause an underflowed
value to be returned; since the counter is lower than the current number of
items in the lru list, a decrement may happen when the counter is 0, causing
an underflow on the counter.
The fix uses a new flag field (and a new buffer flag) to serialize buffer
handling during the shrink process. The new flag field has been designed to use
btp->bt_lru_lock/unlock instead of xfs_buf_lock/unlock mechanism.
dchinner, sandeen, aquini and aris also deserve credits for this.
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/xfs_buf.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 41 |
2 files changed, 28 insertions, 18 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d7a9dd735e1e..933b7930b863 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -96,6 +96,7 @@ xfs_buf_lru_add( | |||
96 | atomic_inc(&bp->b_hold); | 96 | atomic_inc(&bp->b_hold); |
97 | list_add_tail(&bp->b_lru, &btp->bt_lru); | 97 | list_add_tail(&bp->b_lru, &btp->bt_lru); |
98 | btp->bt_lru_nr++; | 98 | btp->bt_lru_nr++; |
99 | bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; | ||
99 | } | 100 | } |
100 | spin_unlock(&btp->bt_lru_lock); | 101 | spin_unlock(&btp->bt_lru_lock); |
101 | } | 102 | } |
@@ -154,7 +155,8 @@ xfs_buf_stale( | |||
154 | struct xfs_buftarg *btp = bp->b_target; | 155 | struct xfs_buftarg *btp = bp->b_target; |
155 | 156 | ||
156 | spin_lock(&btp->bt_lru_lock); | 157 | spin_lock(&btp->bt_lru_lock); |
157 | if (!list_empty(&bp->b_lru)) { | 158 | if (!list_empty(&bp->b_lru) && |
159 | !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { | ||
158 | list_del_init(&bp->b_lru); | 160 | list_del_init(&bp->b_lru); |
159 | btp->bt_lru_nr--; | 161 | btp->bt_lru_nr--; |
160 | atomic_dec(&bp->b_hold); | 162 | atomic_dec(&bp->b_hold); |
@@ -1501,6 +1503,7 @@ xfs_buftarg_shrink( | |||
1501 | */ | 1503 | */ |
1502 | list_move(&bp->b_lru, &dispose); | 1504 | list_move(&bp->b_lru, &dispose); |
1503 | btp->bt_lru_nr--; | 1505 | btp->bt_lru_nr--; |
1506 | bp->b_lru_flags |= _XBF_LRU_DISPOSE; | ||
1504 | } | 1507 | } |
1505 | spin_unlock(&btp->bt_lru_lock); | 1508 | spin_unlock(&btp->bt_lru_lock); |
1506 | 1509 | ||
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d03b73b9604e..7c0b6a0a1557 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -38,27 +38,28 @@ typedef enum { | |||
38 | XBRW_ZERO = 3, /* Zero target memory */ | 38 | XBRW_ZERO = 3, /* Zero target memory */ |
39 | } xfs_buf_rw_t; | 39 | } xfs_buf_rw_t; |
40 | 40 | ||
41 | #define XBF_READ (1 << 0) /* buffer intended for reading from device */ | 41 | #define XBF_READ (1 << 0) /* buffer intended for reading from device */ |
42 | #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ | 42 | #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ |
43 | #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ | 43 | #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ |
44 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ | 44 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ |
45 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 45 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
46 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ | 46 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ |
47 | 47 | ||
48 | /* I/O hints for the BIO layer */ | 48 | /* I/O hints for the BIO layer */ |
49 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ | 49 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ |
50 | #define XBF_FUA (1 << 11)/* force cache write through mode */ | 50 | #define XBF_FUA (1 << 11)/* force cache write through mode */ |
51 | #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ | 51 | #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ |
52 | 52 | ||
53 | /* flags used only as arguments to access routines */ | 53 | /* flags used only as arguments to access routines */ |
54 | #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ | 54 | #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ |
55 | #define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ | 55 | #define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ |
56 | 56 | ||
57 | /* flags used only internally */ | 57 | /* flags used only internally */ |
58 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ | 58 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ |
59 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ | 59 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ |
60 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ | 60 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ |
61 | #define _XBF_COMPOUND (1 << 23)/* compound buffer */ | 61 | #define _XBF_COMPOUND (1 << 23)/* compound buffer */ |
62 | #define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ | ||
62 | 63 | ||
63 | typedef unsigned int xfs_buf_flags_t; | 64 | typedef unsigned int xfs_buf_flags_t; |
64 | 65 | ||
@@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t; | |||
72 | { XBF_SYNCIO, "SYNCIO" }, \ | 73 | { XBF_SYNCIO, "SYNCIO" }, \ |
73 | { XBF_FUA, "FUA" }, \ | 74 | { XBF_FUA, "FUA" }, \ |
74 | { XBF_FLUSH, "FLUSH" }, \ | 75 | { XBF_FLUSH, "FLUSH" }, \ |
75 | { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ | 76 | { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ |
76 | { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ | 77 | { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ |
77 | { _XBF_PAGES, "PAGES" }, \ | 78 | { _XBF_PAGES, "PAGES" }, \ |
78 | { _XBF_KMEM, "KMEM" }, \ | 79 | { _XBF_KMEM, "KMEM" }, \ |
79 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 80 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ |
80 | { _XBF_COMPOUND, "COMPOUND" } | 81 | { _XBF_COMPOUND, "COMPOUND" }, \ |
82 | { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } | ||
81 | 83 | ||
82 | typedef struct xfs_buftarg { | 84 | typedef struct xfs_buftarg { |
83 | dev_t bt_dev; | 85 | dev_t bt_dev; |
@@ -124,7 +126,12 @@ typedef struct xfs_buf { | |||
124 | xfs_buf_flags_t b_flags; /* status flags */ | 126 | xfs_buf_flags_t b_flags; /* status flags */ |
125 | struct semaphore b_sema; /* semaphore for lockables */ | 127 | struct semaphore b_sema; /* semaphore for lockables */ |
126 | 128 | ||
129 | /* | ||
130 | * concurrent access to b_lru and b_lru_flags are protected by | ||
131 | * bt_lru_lock and not by b_sema | ||
132 | */ | ||
127 | struct list_head b_lru; /* lru list */ | 133 | struct list_head b_lru; /* lru list */ |
134 | xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ | ||
128 | wait_queue_head_t b_waiters; /* unpin waiters */ | 135 | wait_queue_head_t b_waiters; /* unpin waiters */ |
129 | struct list_head b_list; | 136 | struct list_head b_list; |
130 | struct xfs_perag *b_pag; /* contains rbtree root */ | 137 | struct xfs_perag *b_pag; /* contains rbtree root */ |