aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c235
1 files changed, 158 insertions, 77 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4c5deb6e9e31..92f1f2acc6ab 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -44,12 +44,7 @@
44 44
45static kmem_zone_t *xfs_buf_zone; 45static kmem_zone_t *xfs_buf_zone;
46STATIC int xfsbufd(void *); 46STATIC int xfsbufd(void *);
47STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
48STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 47STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
49static struct shrinker xfs_buf_shake = {
50 .shrink = xfsbufd_wakeup,
51 .seeks = DEFAULT_SEEKS,
52};
53 48
54static struct workqueue_struct *xfslogd_workqueue; 49static struct workqueue_struct *xfslogd_workqueue;
55struct workqueue_struct *xfsdatad_workqueue; 50struct workqueue_struct *xfsdatad_workqueue;
@@ -168,8 +163,79 @@ test_page_region(
168} 163}
169 164
170/* 165/*
171 * Internal xfs_buf_t object manipulation 166 * xfs_buf_lru_add - add a buffer to the LRU.
167 *
168 * The LRU takes a new reference to the buffer so that it will only be freed
169 * once the shrinker takes the buffer off the LRU.
172 */ 170 */
171STATIC void
172xfs_buf_lru_add(
173 struct xfs_buf *bp)
174{
175 struct xfs_buftarg *btp = bp->b_target;
176
177 spin_lock(&btp->bt_lru_lock);
178 if (list_empty(&bp->b_lru)) {
179 atomic_inc(&bp->b_hold);
180 list_add_tail(&bp->b_lru, &btp->bt_lru);
181 btp->bt_lru_nr++;
182 }
183 spin_unlock(&btp->bt_lru_lock);
184}
185
186/*
187 * xfs_buf_lru_del - remove a buffer from the LRU
188 *
189 * The unlocked check is safe here because it only occurs when there are not
190 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
191 * to optimise the shrinker removing the buffer from the LRU and calling
192 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the
193 * bt_lru_lock.
194 */
195STATIC void
196xfs_buf_lru_del(
197 struct xfs_buf *bp)
198{
199 struct xfs_buftarg *btp = bp->b_target;
200
201 if (list_empty(&bp->b_lru))
202 return;
203
204 spin_lock(&btp->bt_lru_lock);
205 if (!list_empty(&bp->b_lru)) {
206 list_del_init(&bp->b_lru);
207 btp->bt_lru_nr--;
208 }
209 spin_unlock(&btp->bt_lru_lock);
210}
211
212/*
213 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
214 * b_lru_ref count so that the buffer is freed immediately when the buffer
215 * reference count falls to zero. If the buffer is already on the LRU, we need
216 * to remove the reference that LRU holds on the buffer.
217 *
218 * This prevents build-up of stale buffers on the LRU.
219 */
220void
221xfs_buf_stale(
222 struct xfs_buf *bp)
223{
224 bp->b_flags |= XBF_STALE;
225 atomic_set(&(bp)->b_lru_ref, 0);
226 if (!list_empty(&bp->b_lru)) {
227 struct xfs_buftarg *btp = bp->b_target;
228
229 spin_lock(&btp->bt_lru_lock);
230 if (!list_empty(&bp->b_lru)) {
231 list_del_init(&bp->b_lru);
232 btp->bt_lru_nr--;
233 atomic_dec(&bp->b_hold);
234 }
235 spin_unlock(&btp->bt_lru_lock);
236 }
237 ASSERT(atomic_read(&bp->b_hold) >= 1);
238}
173 239
174STATIC void 240STATIC void
175_xfs_buf_initialize( 241_xfs_buf_initialize(
@@ -186,7 +252,9 @@ _xfs_buf_initialize(
186 252
187 memset(bp, 0, sizeof(xfs_buf_t)); 253 memset(bp, 0, sizeof(xfs_buf_t));
188 atomic_set(&bp->b_hold, 1); 254 atomic_set(&bp->b_hold, 1);
255 atomic_set(&bp->b_lru_ref, 1);
189 init_completion(&bp->b_iowait); 256 init_completion(&bp->b_iowait);
257 INIT_LIST_HEAD(&bp->b_lru);
190 INIT_LIST_HEAD(&bp->b_list); 258 INIT_LIST_HEAD(&bp->b_list);
191 RB_CLEAR_NODE(&bp->b_rbnode); 259 RB_CLEAR_NODE(&bp->b_rbnode);
192 sema_init(&bp->b_sema, 0); /* held, no waiters */ 260 sema_init(&bp->b_sema, 0); /* held, no waiters */
@@ -262,6 +330,8 @@ xfs_buf_free(
262{ 330{
263 trace_xfs_buf_free(bp, _RET_IP_); 331 trace_xfs_buf_free(bp, _RET_IP_);
264 332
333 ASSERT(list_empty(&bp->b_lru));
334
265 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
266 uint i; 336 uint i;
267 337
@@ -337,7 +407,6 @@ _xfs_buf_lookup_pages(
337 __func__, gfp_mask); 407 __func__, gfp_mask);
338 408
339 XFS_STATS_INC(xb_page_retries); 409 XFS_STATS_INC(xb_page_retries);
340 xfsbufd_wakeup(NULL, 0, gfp_mask);
341 congestion_wait(BLK_RW_ASYNC, HZ/50); 410 congestion_wait(BLK_RW_ASYNC, HZ/50);
342 goto retry; 411 goto retry;
343 } 412 }
@@ -828,6 +897,7 @@ xfs_buf_rele(
828 897
829 if (!pag) { 898 if (!pag) {
830 ASSERT(!bp->b_relse); 899 ASSERT(!bp->b_relse);
900 ASSERT(list_empty(&bp->b_lru));
831 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); 901 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
832 if (atomic_dec_and_test(&bp->b_hold)) 902 if (atomic_dec_and_test(&bp->b_hold))
833 xfs_buf_free(bp); 903 xfs_buf_free(bp);
@@ -835,13 +905,19 @@ xfs_buf_rele(
835 } 905 }
836 906
837 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); 907 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
908
838 ASSERT(atomic_read(&bp->b_hold) > 0); 909 ASSERT(atomic_read(&bp->b_hold) > 0);
839 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { 910 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
840 if (bp->b_relse) { 911 if (bp->b_relse) {
841 atomic_inc(&bp->b_hold); 912 atomic_inc(&bp->b_hold);
842 spin_unlock(&pag->pag_buf_lock); 913 spin_unlock(&pag->pag_buf_lock);
843 bp->b_relse(bp); 914 bp->b_relse(bp);
915 } else if (!(bp->b_flags & XBF_STALE) &&
916 atomic_read(&bp->b_lru_ref)) {
917 xfs_buf_lru_add(bp);
918 spin_unlock(&pag->pag_buf_lock);
844 } else { 919 } else {
920 xfs_buf_lru_del(bp);
845 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); 921 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
846 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); 922 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
847 spin_unlock(&pag->pag_buf_lock); 923 spin_unlock(&pag->pag_buf_lock);
@@ -1438,51 +1514,84 @@ xfs_buf_iomove(
1438 */ 1514 */
1439 1515
1440/* 1516/*
1441 * Wait for any bufs with callbacks that have been submitted but 1517 * Wait for any bufs with callbacks that have been submitted but have not yet
1442 * have not yet returned... walk the hash list for the target. 1518 * returned. These buffers will have an elevated hold count, so wait on those
1519 * while freeing all the buffers only held by the LRU.
1443 */ 1520 */
1444void 1521void
1445xfs_wait_buftarg( 1522xfs_wait_buftarg(
1446 struct xfs_buftarg *btp) 1523 struct xfs_buftarg *btp)
1447{ 1524{
1448 struct xfs_perag *pag; 1525 struct xfs_buf *bp;
1449 uint i;
1450 1526
1451 for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { 1527restart:
1452 pag = xfs_perag_get(btp->bt_mount, i); 1528 spin_lock(&btp->bt_lru_lock);
1453 spin_lock(&pag->pag_buf_lock); 1529 while (!list_empty(&btp->bt_lru)) {
1454 while (rb_first(&pag->pag_buf_tree)) { 1530 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1455 spin_unlock(&pag->pag_buf_lock); 1531 if (atomic_read(&bp->b_hold) > 1) {
1532 spin_unlock(&btp->bt_lru_lock);
1456 delay(100); 1533 delay(100);
1457 spin_lock(&pag->pag_buf_lock); 1534 goto restart;
1458 } 1535 }
1459 spin_unlock(&pag->pag_buf_lock); 1536 /*
1460 xfs_perag_put(pag); 1537 * clear the LRU reference count so the bufer doesn't get
1538 * ignored in xfs_buf_rele().
1539 */
1540 atomic_set(&bp->b_lru_ref, 0);
1541 spin_unlock(&btp->bt_lru_lock);
1542 xfs_buf_rele(bp);
1543 spin_lock(&btp->bt_lru_lock);
1461 } 1544 }
1545 spin_unlock(&btp->bt_lru_lock);
1462} 1546}
1463 1547
1464/* 1548int
1465 * buftarg list for delwrite queue processing 1549xfs_buftarg_shrink(
1466 */ 1550 struct shrinker *shrink,
1467static LIST_HEAD(xfs_buftarg_list); 1551 int nr_to_scan,
1468static DEFINE_SPINLOCK(xfs_buftarg_lock); 1552 gfp_t mask)
1469
1470STATIC void
1471xfs_register_buftarg(
1472 xfs_buftarg_t *btp)
1473{ 1553{
1474 spin_lock(&xfs_buftarg_lock); 1554 struct xfs_buftarg *btp = container_of(shrink,
1475 list_add(&btp->bt_list, &xfs_buftarg_list); 1555 struct xfs_buftarg, bt_shrinker);
1476 spin_unlock(&xfs_buftarg_lock); 1556 struct xfs_buf *bp;
1477} 1557 LIST_HEAD(dispose);
1478 1558
1479STATIC void 1559 if (!nr_to_scan)
1480xfs_unregister_buftarg( 1560 return btp->bt_lru_nr;
1481 xfs_buftarg_t *btp) 1561
1482{ 1562 spin_lock(&btp->bt_lru_lock);
1483 spin_lock(&xfs_buftarg_lock); 1563 while (!list_empty(&btp->bt_lru)) {
1484 list_del(&btp->bt_list); 1564 if (nr_to_scan-- <= 0)
1485 spin_unlock(&xfs_buftarg_lock); 1565 break;
1566
1567 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1568
1569 /*
1570 * Decrement the b_lru_ref count unless the value is already
1571 * zero. If the value is already zero, we need to reclaim the
1572 * buffer, otherwise it gets another trip through the LRU.
1573 */
1574 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1575 list_move_tail(&bp->b_lru, &btp->bt_lru);
1576 continue;
1577 }
1578
1579 /*
1580 * remove the buffer from the LRU now to avoid needing another
1581 * lock round trip inside xfs_buf_rele().
1582 */
1583 list_move(&bp->b_lru, &dispose);
1584 btp->bt_lru_nr--;
1585 }
1586 spin_unlock(&btp->bt_lru_lock);
1587
1588 while (!list_empty(&dispose)) {
1589 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1590 list_del_init(&bp->b_lru);
1591 xfs_buf_rele(bp);
1592 }
1593
1594 return btp->bt_lru_nr;
1486} 1595}
1487 1596
1488void 1597void
@@ -1490,17 +1599,14 @@ xfs_free_buftarg(
1490 struct xfs_mount *mp, 1599 struct xfs_mount *mp,
1491 struct xfs_buftarg *btp) 1600 struct xfs_buftarg *btp)
1492{ 1601{
1602 unregister_shrinker(&btp->bt_shrinker);
1603
1493 xfs_flush_buftarg(btp, 1); 1604 xfs_flush_buftarg(btp, 1);
1494 if (mp->m_flags & XFS_MOUNT_BARRIER) 1605 if (mp->m_flags & XFS_MOUNT_BARRIER)
1495 xfs_blkdev_issue_flush(btp); 1606 xfs_blkdev_issue_flush(btp);
1496 iput(btp->bt_mapping->host); 1607 iput(btp->bt_mapping->host);
1497 1608
1498 /* Unregister the buftarg first so that we don't get a
1499 * wakeup finding a non-existent task
1500 */
1501 xfs_unregister_buftarg(btp);
1502 kthread_stop(btp->bt_task); 1609 kthread_stop(btp->bt_task);
1503
1504 kmem_free(btp); 1610 kmem_free(btp);
1505} 1611}
1506 1612
@@ -1597,20 +1703,13 @@ xfs_alloc_delwrite_queue(
1597 xfs_buftarg_t *btp, 1703 xfs_buftarg_t *btp,
1598 const char *fsname) 1704 const char *fsname)
1599{ 1705{
1600 int error = 0;
1601
1602 INIT_LIST_HEAD(&btp->bt_list);
1603 INIT_LIST_HEAD(&btp->bt_delwrite_queue); 1706 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1604 spin_lock_init(&btp->bt_delwrite_lock); 1707 spin_lock_init(&btp->bt_delwrite_lock);
1605 btp->bt_flags = 0; 1708 btp->bt_flags = 0;
1606 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); 1709 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
1607 if (IS_ERR(btp->bt_task)) { 1710 if (IS_ERR(btp->bt_task))
1608 error = PTR_ERR(btp->bt_task); 1711 return PTR_ERR(btp->bt_task);
1609 goto out_error; 1712 return 0;
1610 }
1611 xfs_register_buftarg(btp);
1612out_error:
1613 return error;
1614} 1713}
1615 1714
1616xfs_buftarg_t * 1715xfs_buftarg_t *
@@ -1627,12 +1726,17 @@ xfs_alloc_buftarg(
1627 btp->bt_mount = mp; 1726 btp->bt_mount = mp;
1628 btp->bt_dev = bdev->bd_dev; 1727 btp->bt_dev = bdev->bd_dev;
1629 btp->bt_bdev = bdev; 1728 btp->bt_bdev = bdev;
1729 INIT_LIST_HEAD(&btp->bt_lru);
1730 spin_lock_init(&btp->bt_lru_lock);
1630 if (xfs_setsize_buftarg_early(btp, bdev)) 1731 if (xfs_setsize_buftarg_early(btp, bdev))
1631 goto error; 1732 goto error;
1632 if (xfs_mapping_buftarg(btp, bdev)) 1733 if (xfs_mapping_buftarg(btp, bdev))
1633 goto error; 1734 goto error;
1634 if (xfs_alloc_delwrite_queue(btp, fsname)) 1735 if (xfs_alloc_delwrite_queue(btp, fsname))
1635 goto error; 1736 goto error;
1737 btp->bt_shrinker.shrink = xfs_buftarg_shrink;
1738 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1739 register_shrinker(&btp->bt_shrinker);
1636 return btp; 1740 return btp;
1637 1741
1638error: 1742error:
@@ -1737,27 +1841,6 @@ xfs_buf_runall_queues(
1737 flush_workqueue(queue); 1841 flush_workqueue(queue);
1738} 1842}
1739 1843
1740STATIC int
1741xfsbufd_wakeup(
1742 struct shrinker *shrink,
1743 int priority,
1744 gfp_t mask)
1745{
1746 xfs_buftarg_t *btp;
1747
1748 spin_lock(&xfs_buftarg_lock);
1749 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1750 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1751 continue;
1752 if (list_empty(&btp->bt_delwrite_queue))
1753 continue;
1754 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1755 wake_up_process(btp->bt_task);
1756 }
1757 spin_unlock(&xfs_buftarg_lock);
1758 return 0;
1759}
1760
1761/* 1844/*
1762 * Move as many buffers as specified to the supplied list 1845 * Move as many buffers as specified to the supplied list
1763 * idicating if we skipped any buffers to prevent deadlocks. 1846 * idicating if we skipped any buffers to prevent deadlocks.
@@ -1952,7 +2035,6 @@ xfs_buf_init(void)
1952 if (!xfsconvertd_workqueue) 2035 if (!xfsconvertd_workqueue)
1953 goto out_destroy_xfsdatad_workqueue; 2036 goto out_destroy_xfsdatad_workqueue;
1954 2037
1955 register_shrinker(&xfs_buf_shake);
1956 return 0; 2038 return 0;
1957 2039
1958 out_destroy_xfsdatad_workqueue: 2040 out_destroy_xfsdatad_workqueue:
@@ -1968,7 +2050,6 @@ xfs_buf_init(void)
1968void 2050void
1969xfs_buf_terminate(void) 2051xfs_buf_terminate(void)
1970{ 2052{
1971 unregister_shrinker(&xfs_buf_shake);
1972 destroy_workqueue(xfsconvertd_workqueue); 2053 destroy_workqueue(xfsconvertd_workqueue);
1973 destroy_workqueue(xfsdatad_workqueue); 2054 destroy_workqueue(xfsdatad_workqueue);
1974 destroy_workqueue(xfslogd_workqueue); 2055 destroy_workqueue(xfslogd_workqueue);