aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-12-02 00:30:55 -0500
committerDave Chinner <david@fromorbit.com>2010-12-02 00:30:55 -0500
commit430cbeb86fdcbbdabea7d4aa65307de8de425350 (patch)
tree5d9e6851942ca74e0e13f9195b9c3f81c3bd7509
parentff57ab21995a8636cfc72efeebb09cc6034d756f (diff)
xfs: add a lru to the XFS buffer cache
Introduce a per-buftarg LRU for memory reclaim to operate on. This is the last piece we need to put in place so that we can fully control the buffer lifecycle. This allows XFS to be responsibile for maintaining the working set of buffers under memory pressure instead of relying on the VM reclaim not to take pages we need out from underneath us. The implementation introduces a b_lru_ref counter into the buffer. This is currently set to 1 whenever the buffer is referenced and so is used to determine if the buffer should be added to the LRU or not when freed. Effectively it allows lazy LRU initialisation of the buffer so we do not need to touch the LRU list and locks in xfs_buf_find(). Instead, when the buffer is being released and we drop the last reference to it, we check the b_lru_ref count and if it is none zero we re-add the buffer reference and add the inode to the LRU. The b_lru_ref counter is decremented by the shrinker, and whenever the shrinker comes across a buffer with a zero b_lru_ref counter, if released the LRU reference on the buffer. In the absence of a lookup race, this will result in the buffer being freed. This counting mechanism is used instead of a reference flag so that it is simple to re-introduce buffer-type specific reclaim reference counts to prioritise reclaim more effectively. We still have all those hooks in the XFS code, so this will provide the infrastructure to re-implement that functionality. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c164
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h8
2 files changed, 150 insertions, 22 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 0a00d7a2fc23..92f1f2acc6ab 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -163,8 +163,79 @@ test_page_region(
163} 163}
164 164
165/* 165/*
166 * Internal xfs_buf_t object manipulation 166 * xfs_buf_lru_add - add a buffer to the LRU.
167 *
168 * The LRU takes a new reference to the buffer so that it will only be freed
169 * once the shrinker takes the buffer off the LRU.
167 */ 170 */
171STATIC void
172xfs_buf_lru_add(
173 struct xfs_buf *bp)
174{
175 struct xfs_buftarg *btp = bp->b_target;
176
177 spin_lock(&btp->bt_lru_lock);
178 if (list_empty(&bp->b_lru)) {
179 atomic_inc(&bp->b_hold);
180 list_add_tail(&bp->b_lru, &btp->bt_lru);
181 btp->bt_lru_nr++;
182 }
183 spin_unlock(&btp->bt_lru_lock);
184}
185
186/*
187 * xfs_buf_lru_del - remove a buffer from the LRU
188 *
189 * The unlocked check is safe here because it only occurs when there are not
190 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
191 * to optimise the shrinker removing the buffer from the LRU and calling
192 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the
193 * bt_lru_lock.
194 */
195STATIC void
196xfs_buf_lru_del(
197 struct xfs_buf *bp)
198{
199 struct xfs_buftarg *btp = bp->b_target;
200
201 if (list_empty(&bp->b_lru))
202 return;
203
204 spin_lock(&btp->bt_lru_lock);
205 if (!list_empty(&bp->b_lru)) {
206 list_del_init(&bp->b_lru);
207 btp->bt_lru_nr--;
208 }
209 spin_unlock(&btp->bt_lru_lock);
210}
211
212/*
213 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
214 * b_lru_ref count so that the buffer is freed immediately when the buffer
215 * reference count falls to zero. If the buffer is already on the LRU, we need
216 * to remove the reference that LRU holds on the buffer.
217 *
218 * This prevents build-up of stale buffers on the LRU.
219 */
220void
221xfs_buf_stale(
222 struct xfs_buf *bp)
223{
224 bp->b_flags |= XBF_STALE;
225 atomic_set(&(bp)->b_lru_ref, 0);
226 if (!list_empty(&bp->b_lru)) {
227 struct xfs_buftarg *btp = bp->b_target;
228
229 spin_lock(&btp->bt_lru_lock);
230 if (!list_empty(&bp->b_lru)) {
231 list_del_init(&bp->b_lru);
232 btp->bt_lru_nr--;
233 atomic_dec(&bp->b_hold);
234 }
235 spin_unlock(&btp->bt_lru_lock);
236 }
237 ASSERT(atomic_read(&bp->b_hold) >= 1);
238}
168 239
169STATIC void 240STATIC void
170_xfs_buf_initialize( 241_xfs_buf_initialize(
@@ -181,7 +252,9 @@ _xfs_buf_initialize(
181 252
182 memset(bp, 0, sizeof(xfs_buf_t)); 253 memset(bp, 0, sizeof(xfs_buf_t));
183 atomic_set(&bp->b_hold, 1); 254 atomic_set(&bp->b_hold, 1);
255 atomic_set(&bp->b_lru_ref, 1);
184 init_completion(&bp->b_iowait); 256 init_completion(&bp->b_iowait);
257 INIT_LIST_HEAD(&bp->b_lru);
185 INIT_LIST_HEAD(&bp->b_list); 258 INIT_LIST_HEAD(&bp->b_list);
186 RB_CLEAR_NODE(&bp->b_rbnode); 259 RB_CLEAR_NODE(&bp->b_rbnode);
187 sema_init(&bp->b_sema, 0); /* held, no waiters */ 260 sema_init(&bp->b_sema, 0); /* held, no waiters */
@@ -257,6 +330,8 @@ xfs_buf_free(
257{ 330{
258 trace_xfs_buf_free(bp, _RET_IP_); 331 trace_xfs_buf_free(bp, _RET_IP_);
259 332
333 ASSERT(list_empty(&bp->b_lru));
334
260 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
261 uint i; 336 uint i;
262 337
@@ -822,6 +897,7 @@ xfs_buf_rele(
822 897
823 if (!pag) { 898 if (!pag) {
824 ASSERT(!bp->b_relse); 899 ASSERT(!bp->b_relse);
900 ASSERT(list_empty(&bp->b_lru));
825 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); 901 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
826 if (atomic_dec_and_test(&bp->b_hold)) 902 if (atomic_dec_and_test(&bp->b_hold))
827 xfs_buf_free(bp); 903 xfs_buf_free(bp);
@@ -829,13 +905,19 @@ xfs_buf_rele(
829 } 905 }
830 906
831 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); 907 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
908
832 ASSERT(atomic_read(&bp->b_hold) > 0); 909 ASSERT(atomic_read(&bp->b_hold) > 0);
833 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { 910 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
834 if (bp->b_relse) { 911 if (bp->b_relse) {
835 atomic_inc(&bp->b_hold); 912 atomic_inc(&bp->b_hold);
836 spin_unlock(&pag->pag_buf_lock); 913 spin_unlock(&pag->pag_buf_lock);
837 bp->b_relse(bp); 914 bp->b_relse(bp);
915 } else if (!(bp->b_flags & XBF_STALE) &&
916 atomic_read(&bp->b_lru_ref)) {
917 xfs_buf_lru_add(bp);
918 spin_unlock(&pag->pag_buf_lock);
838 } else { 919 } else {
920 xfs_buf_lru_del(bp);
839 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); 921 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
840 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); 922 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
841 spin_unlock(&pag->pag_buf_lock); 923 spin_unlock(&pag->pag_buf_lock);
@@ -1432,27 +1514,35 @@ xfs_buf_iomove(
1432 */ 1514 */
1433 1515
1434/* 1516/*
1435 * Wait for any bufs with callbacks that have been submitted but 1517 * Wait for any bufs with callbacks that have been submitted but have not yet
1436 * have not yet returned... walk the hash list for the target. 1518 * returned. These buffers will have an elevated hold count, so wait on those
1519 * while freeing all the buffers only held by the LRU.
1437 */ 1520 */
1438void 1521void
1439xfs_wait_buftarg( 1522xfs_wait_buftarg(
1440 struct xfs_buftarg *btp) 1523 struct xfs_buftarg *btp)
1441{ 1524{
1442 struct xfs_perag *pag; 1525 struct xfs_buf *bp;
1443 uint i;
1444 1526
1445 for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { 1527restart:
1446 pag = xfs_perag_get(btp->bt_mount, i); 1528 spin_lock(&btp->bt_lru_lock);
1447 spin_lock(&pag->pag_buf_lock); 1529 while (!list_empty(&btp->bt_lru)) {
1448 while (rb_first(&pag->pag_buf_tree)) { 1530 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1449 spin_unlock(&pag->pag_buf_lock); 1531 if (atomic_read(&bp->b_hold) > 1) {
1532 spin_unlock(&btp->bt_lru_lock);
1450 delay(100); 1533 delay(100);
1451 spin_lock(&pag->pag_buf_lock); 1534 goto restart;
1452 } 1535 }
1453 spin_unlock(&pag->pag_buf_lock); 1536 /*
1454 xfs_perag_put(pag); 1537 * clear the LRU reference count so the bufer doesn't get
1538 * ignored in xfs_buf_rele().
1539 */
1540 atomic_set(&bp->b_lru_ref, 0);
1541 spin_unlock(&btp->bt_lru_lock);
1542 xfs_buf_rele(bp);
1543 spin_lock(&btp->bt_lru_lock);
1455 } 1544 }
1545 spin_unlock(&btp->bt_lru_lock);
1456} 1546}
1457 1547
1458int 1548int
@@ -1463,15 +1553,45 @@ xfs_buftarg_shrink(
1463{ 1553{
1464 struct xfs_buftarg *btp = container_of(shrink, 1554 struct xfs_buftarg *btp = container_of(shrink,
1465 struct xfs_buftarg, bt_shrinker); 1555 struct xfs_buftarg, bt_shrinker);
1466 if (nr_to_scan) { 1556 struct xfs_buf *bp;
1467 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) 1557 LIST_HEAD(dispose);
1468 return -1; 1558
1469 if (list_empty(&btp->bt_delwrite_queue)) 1559 if (!nr_to_scan)
1470 return -1; 1560 return btp->bt_lru_nr;
1471 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); 1561
1472 wake_up_process(btp->bt_task); 1562 spin_lock(&btp->bt_lru_lock);
1563 while (!list_empty(&btp->bt_lru)) {
1564 if (nr_to_scan-- <= 0)
1565 break;
1566
1567 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1568
1569 /*
1570 * Decrement the b_lru_ref count unless the value is already
1571 * zero. If the value is already zero, we need to reclaim the
1572 * buffer, otherwise it gets another trip through the LRU.
1573 */
1574 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1575 list_move_tail(&bp->b_lru, &btp->bt_lru);
1576 continue;
1577 }
1578
1579 /*
1580 * remove the buffer from the LRU now to avoid needing another
1581 * lock round trip inside xfs_buf_rele().
1582 */
1583 list_move(&bp->b_lru, &dispose);
1584 btp->bt_lru_nr--;
1473 } 1585 }
1474 return list_empty(&btp->bt_delwrite_queue) ? -1 : 1; 1586 spin_unlock(&btp->bt_lru_lock);
1587
1588 while (!list_empty(&dispose)) {
1589 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1590 list_del_init(&bp->b_lru);
1591 xfs_buf_rele(bp);
1592 }
1593
1594 return btp->bt_lru_nr;
1475} 1595}
1476 1596
1477void 1597void
@@ -1606,6 +1726,8 @@ xfs_alloc_buftarg(
1606 btp->bt_mount = mp; 1726 btp->bt_mount = mp;
1607 btp->bt_dev = bdev->bd_dev; 1727 btp->bt_dev = bdev->bd_dev;
1608 btp->bt_bdev = bdev; 1728 btp->bt_bdev = bdev;
1729 INIT_LIST_HEAD(&btp->bt_lru);
1730 spin_lock_init(&btp->bt_lru_lock);
1609 if (xfs_setsize_buftarg_early(btp, bdev)) 1731 if (xfs_setsize_buftarg_early(btp, bdev))
1610 goto error; 1732 goto error;
1611 if (xfs_mapping_buftarg(btp, bdev)) 1733 if (xfs_mapping_buftarg(btp, bdev))
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9344103e77d6..4601eabd0da0 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -134,6 +134,9 @@ typedef struct xfs_buftarg {
134 134
135 /* LRU control structures */ 135 /* LRU control structures */
136 struct shrinker bt_shrinker; 136 struct shrinker bt_shrinker;
137 struct list_head bt_lru;
138 spinlock_t bt_lru_lock;
139 unsigned int bt_lru_nr;
137} xfs_buftarg_t; 140} xfs_buftarg_t;
138 141
139/* 142/*
@@ -166,9 +169,11 @@ typedef struct xfs_buf {
166 xfs_off_t b_file_offset; /* offset in file */ 169 xfs_off_t b_file_offset; /* offset in file */
167 size_t b_buffer_length;/* size of buffer in bytes */ 170 size_t b_buffer_length;/* size of buffer in bytes */
168 atomic_t b_hold; /* reference count */ 171 atomic_t b_hold; /* reference count */
172 atomic_t b_lru_ref; /* lru reclaim ref count */
169 xfs_buf_flags_t b_flags; /* status flags */ 173 xfs_buf_flags_t b_flags; /* status flags */
170 struct semaphore b_sema; /* semaphore for lockables */ 174 struct semaphore b_sema; /* semaphore for lockables */
171 175
176 struct list_head b_lru; /* lru list */
172 wait_queue_head_t b_waiters; /* unpin waiters */ 177 wait_queue_head_t b_waiters; /* unpin waiters */
173 struct list_head b_list; 178 struct list_head b_list;
174 struct xfs_perag *b_pag; /* contains rbtree root */ 179 struct xfs_perag *b_pag; /* contains rbtree root */
@@ -266,7 +271,8 @@ extern void xfs_buf_terminate(void);
266#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ 271#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
267 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) 272 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
268 273
269#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) 274void xfs_buf_stale(struct xfs_buf *bp);
275#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
270#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) 276#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
271#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) 277#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
272#define XFS_BUF_SUPER_STALE(bp) do { \ 278#define XFS_BUF_SUPER_STALE(bp) do { \