aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-08-27 20:18:06 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2013-09-10 18:56:31 -0400
commita408235726aa82c0358c9ec68124b6f4bc0a79df (patch)
treeacd5a608f19e5717d5de6b76977f4bf09f00a01e /fs/xfs
parentaddbda40bed47d8942658fca93e14b5f1cbf009a (diff)
xfs: rework buffer dispose list tracking
In converting the buffer lru lists to use the generic code, the locking for marking the buffers as on the dispose list was lost. This results in confusion in LRU buffer tracking and acocunting, resulting in reference counts being mucked up and filesystem beig unmountable. To fix this, introduce an internal buffer spinlock to protect the state field that holds the dispose list information. Because there is now locking needed around xfs_buf_lru_add/del, and they are used in exactly one place each two lines apart, get rid of the wrappers and code the logic directly in place. Further, the LRU emptying code used on unmount is less than optimal. Convert it to use a dispose list as per a normal shrinker walk, and repeat the walk that fills the dispose list until the LRU is empty. Thi avoids needing to drop and regain the LRU lock for every item being freed, and allows the same logic as the shrinker isolate call to be used. Simpler, easier to understand. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Glauber Costa <glommer@openvz.org> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: Arve Hjønnevåg <arve@android.com> Cc: Carlos Maiolino <cmaiolino@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Chuck Lever <chuck.lever@oracle.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: David Rientjes <rientjes@google.com> Cc: Gleb Natapov <gleb@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: J. Bruce Fields <bfields@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Jerome Glisse <jglisse@redhat.com> Cc: John Stultz <john.stultz@linaro.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Kent Overstreet <koverstreet@google.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Thomas Hellstrom <thellstrom@vmware.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_buf.c125
-rw-r--r--fs/xfs/xfs_buf.h12
2 files changed, 79 insertions, 58 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 76c595a9ad48..d46f6a3dc1de 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -81,37 +81,6 @@ xfs_buf_vmap_len(
81} 81}
82 82
83/* 83/*
84 * xfs_buf_lru_add - add a buffer to the LRU.
85 *
86 * The LRU takes a new reference to the buffer so that it will only be freed
87 * once the shrinker takes the buffer off the LRU.
88 */
89static void
90xfs_buf_lru_add(
91 struct xfs_buf *bp)
92{
93 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
94 bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
95 atomic_inc(&bp->b_hold);
96 }
97}
98
99/*
100 * xfs_buf_lru_del - remove a buffer from the LRU
101 *
102 * The unlocked check is safe here because it only occurs when there are not
103 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
104 * to optimise the shrinker removing the buffer from the LRU and calling
105 * xfs_buf_free().
106 */
107static void
108xfs_buf_lru_del(
109 struct xfs_buf *bp)
110{
111 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
112}
113
114/*
115 * When we mark a buffer stale, we remove the buffer from the LRU and clear the 84 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
116 * b_lru_ref count so that the buffer is freed immediately when the buffer 85 * b_lru_ref count so that the buffer is freed immediately when the buffer
117 * reference count falls to zero. If the buffer is already on the LRU, we need 86 * reference count falls to zero. If the buffer is already on the LRU, we need
@@ -134,12 +103,14 @@ xfs_buf_stale(
134 */ 103 */
135 bp->b_flags &= ~_XBF_DELWRI_Q; 104 bp->b_flags &= ~_XBF_DELWRI_Q;
136 105
137 atomic_set(&(bp)->b_lru_ref, 0); 106 spin_lock(&bp->b_lock);
138 if (!(bp->b_lru_flags & _XBF_LRU_DISPOSE) && 107 atomic_set(&bp->b_lru_ref, 0);
108 if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
139 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) 109 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
140 atomic_dec(&bp->b_hold); 110 atomic_dec(&bp->b_hold);
141 111
142 ASSERT(atomic_read(&bp->b_hold) >= 1); 112 ASSERT(atomic_read(&bp->b_hold) >= 1);
113 spin_unlock(&bp->b_lock);
143} 114}
144 115
145static int 116static int
@@ -203,6 +174,7 @@ _xfs_buf_alloc(
203 INIT_LIST_HEAD(&bp->b_list); 174 INIT_LIST_HEAD(&bp->b_list);
204 RB_CLEAR_NODE(&bp->b_rbnode); 175 RB_CLEAR_NODE(&bp->b_rbnode);
205 sema_init(&bp->b_sema, 0); /* held, no waiters */ 176 sema_init(&bp->b_sema, 0); /* held, no waiters */
177 spin_lock_init(&bp->b_lock);
206 XB_SET_OWNER(bp); 178 XB_SET_OWNER(bp);
207 bp->b_target = target; 179 bp->b_target = target;
208 bp->b_flags = flags; 180 bp->b_flags = flags;
@@ -892,12 +864,33 @@ xfs_buf_rele(
892 864
893 ASSERT(atomic_read(&bp->b_hold) > 0); 865 ASSERT(atomic_read(&bp->b_hold) > 0);
894 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { 866 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
895 if (!(bp->b_flags & XBF_STALE) && 867 spin_lock(&bp->b_lock);
896 atomic_read(&bp->b_lru_ref)) { 868 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
897 xfs_buf_lru_add(bp); 869 /*
870 * If the buffer is added to the LRU take a new
871 * reference to the buffer for the LRU and clear the
872 * (now stale) dispose list state flag
873 */
874 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
875 bp->b_state &= ~XFS_BSTATE_DISPOSE;
876 atomic_inc(&bp->b_hold);
877 }
878 spin_unlock(&bp->b_lock);
898 spin_unlock(&pag->pag_buf_lock); 879 spin_unlock(&pag->pag_buf_lock);
899 } else { 880 } else {
900 xfs_buf_lru_del(bp); 881 /*
882 * most of the time buffers will already be removed from
883 * the LRU, so optimise that case by checking for the
884 * XFS_BSTATE_DISPOSE flag indicating the last list the
885 * buffer was on was the disposal list
886 */
887 if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
888 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
889 } else {
890 ASSERT(list_empty(&bp->b_lru));
891 }
892 spin_unlock(&bp->b_lock);
893
901 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 894 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
902 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); 895 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
903 spin_unlock(&pag->pag_buf_lock); 896 spin_unlock(&pag->pag_buf_lock);
@@ -1485,33 +1478,48 @@ xfs_buftarg_wait_rele(
1485 1478
1486{ 1479{
1487 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); 1480 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1481 struct list_head *dispose = arg;
1488 1482
1489 if (atomic_read(&bp->b_hold) > 1) { 1483 if (atomic_read(&bp->b_hold) > 1) {
1490 /* need to wait */ 1484 /* need to wait, so skip it this pass */
1491 trace_xfs_buf_wait_buftarg(bp, _RET_IP_); 1485 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1492 spin_unlock(lru_lock); 1486 return LRU_SKIP;
1493 delay(100);
1494 } else {
1495 /*
1496 * clear the LRU reference count so the buffer doesn't get
1497 * ignored in xfs_buf_rele().
1498 */
1499 atomic_set(&bp->b_lru_ref, 0);
1500 spin_unlock(lru_lock);
1501 xfs_buf_rele(bp);
1502 } 1487 }
1488 if (!spin_trylock(&bp->b_lock))
1489 return LRU_SKIP;
1503 1490
1504 spin_lock(lru_lock); 1491 /*
1505 return LRU_RETRY; 1492 * clear the LRU reference count so the buffer doesn't get
1493 * ignored in xfs_buf_rele().
1494 */
1495 atomic_set(&bp->b_lru_ref, 0);
1496 bp->b_state |= XFS_BSTATE_DISPOSE;
1497 list_move(item, dispose);
1498 spin_unlock(&bp->b_lock);
1499 return LRU_REMOVED;
1506} 1500}
1507 1501
1508void 1502void
1509xfs_wait_buftarg( 1503xfs_wait_buftarg(
1510 struct xfs_buftarg *btp) 1504 struct xfs_buftarg *btp)
1511{ 1505{
1512 while (list_lru_count(&btp->bt_lru)) 1506 LIST_HEAD(dispose);
1507 int loop = 0;
1508
1509 /* loop until there is nothing left on the lru list. */
1510 while (list_lru_count(&btp->bt_lru)) {
1513 list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, 1511 list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
1514 NULL, LONG_MAX); 1512 &dispose, LONG_MAX);
1513
1514 while (!list_empty(&dispose)) {
1515 struct xfs_buf *bp;
1516 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1517 list_del_init(&bp->b_lru);
1518 xfs_buf_rele(bp);
1519 }
1520 if (loop++ != 0)
1521 delay(100);
1522 }
1515} 1523}
1516 1524
1517static enum lru_status 1525static enum lru_status
@@ -1524,15 +1532,24 @@ xfs_buftarg_isolate(
1524 struct list_head *dispose = arg; 1532 struct list_head *dispose = arg;
1525 1533
1526 /* 1534 /*
1535 * we are inverting the lru lock/bp->b_lock here, so use a trylock.
1536 * If we fail to get the lock, just skip it.
1537 */
1538 if (!spin_trylock(&bp->b_lock))
1539 return LRU_SKIP;
1540 /*
1527 * Decrement the b_lru_ref count unless the value is already 1541 * Decrement the b_lru_ref count unless the value is already
1528 * zero. If the value is already zero, we need to reclaim the 1542 * zero. If the value is already zero, we need to reclaim the
1529 * buffer, otherwise it gets another trip through the LRU. 1543 * buffer, otherwise it gets another trip through the LRU.
1530 */ 1544 */
1531 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) 1545 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1546 spin_unlock(&bp->b_lock);
1532 return LRU_ROTATE; 1547 return LRU_ROTATE;
1548 }
1533 1549
1534 bp->b_lru_flags |= _XBF_LRU_DISPOSE; 1550 bp->b_state |= XFS_BSTATE_DISPOSE;
1535 list_move(item, dispose); 1551 list_move(item, dispose);
1552 spin_unlock(&bp->b_lock);
1536 return LRU_REMOVED; 1553 return LRU_REMOVED;
1537} 1554}
1538 1555
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 5ec7d35a77ea..e65683361017 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -60,7 +60,6 @@ typedef enum {
60#define _XBF_KMEM (1 << 21)/* backed by heap memory */ 60#define _XBF_KMEM (1 << 21)/* backed by heap memory */
61#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ 61#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
62#define _XBF_COMPOUND (1 << 23)/* compound buffer */ 62#define _XBF_COMPOUND (1 << 23)/* compound buffer */
63#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
64 63
65typedef unsigned int xfs_buf_flags_t; 64typedef unsigned int xfs_buf_flags_t;
66 65
@@ -79,8 +78,12 @@ typedef unsigned int xfs_buf_flags_t;
79 { _XBF_PAGES, "PAGES" }, \ 78 { _XBF_PAGES, "PAGES" }, \
80 { _XBF_KMEM, "KMEM" }, \ 79 { _XBF_KMEM, "KMEM" }, \
81 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 80 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
82 { _XBF_COMPOUND, "COMPOUND" }, \ 81 { _XBF_COMPOUND, "COMPOUND" }
83 { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } 82
83/*
84 * Internal state flags.
85 */
86#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
84 87
85typedef struct xfs_buftarg { 88typedef struct xfs_buftarg {
86 dev_t bt_dev; 89 dev_t bt_dev;
@@ -136,7 +139,8 @@ typedef struct xfs_buf {
136 * bt_lru_lock and not by b_sema 139 * bt_lru_lock and not by b_sema
137 */ 140 */
138 struct list_head b_lru; /* lru list */ 141 struct list_head b_lru; /* lru list */
139 xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ 142 spinlock_t b_lock; /* internal state lock */
143 unsigned int b_state; /* internal state flags */
140 wait_queue_head_t b_waiters; /* unpin waiters */ 144 wait_queue_head_t b_waiters; /* unpin waiters */
141 struct list_head b_list; 145 struct list_head b_list;
142 struct xfs_perag *b_pag; /* contains rbtree root */ 146 struct xfs_perag *b_pag; /* contains rbtree root */