diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 235 |
1 files changed, 158 insertions, 77 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 4c5deb6e9e31..92f1f2acc6ab 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -44,12 +44,7 @@ | |||
44 | 44 | ||
45 | static kmem_zone_t *xfs_buf_zone; | 45 | static kmem_zone_t *xfs_buf_zone; |
46 | STATIC int xfsbufd(void *); | 46 | STATIC int xfsbufd(void *); |
47 | STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); | ||
48 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 47 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
49 | static struct shrinker xfs_buf_shake = { | ||
50 | .shrink = xfsbufd_wakeup, | ||
51 | .seeks = DEFAULT_SEEKS, | ||
52 | }; | ||
53 | 48 | ||
54 | static struct workqueue_struct *xfslogd_workqueue; | 49 | static struct workqueue_struct *xfslogd_workqueue; |
55 | struct workqueue_struct *xfsdatad_workqueue; | 50 | struct workqueue_struct *xfsdatad_workqueue; |
@@ -168,8 +163,79 @@ test_page_region( | |||
168 | } | 163 | } |
169 | 164 | ||
170 | /* | 165 | /* |
171 | * Internal xfs_buf_t object manipulation | 166 | * xfs_buf_lru_add - add a buffer to the LRU. |
167 | * | ||
168 | * The LRU takes a new reference to the buffer so that it will only be freed | ||
169 | * once the shrinker takes the buffer off the LRU. | ||
172 | */ | 170 | */ |
171 | STATIC void | ||
172 | xfs_buf_lru_add( | ||
173 | struct xfs_buf *bp) | ||
174 | { | ||
175 | struct xfs_buftarg *btp = bp->b_target; | ||
176 | |||
177 | spin_lock(&btp->bt_lru_lock); | ||
178 | if (list_empty(&bp->b_lru)) { | ||
179 | atomic_inc(&bp->b_hold); | ||
180 | list_add_tail(&bp->b_lru, &btp->bt_lru); | ||
181 | btp->bt_lru_nr++; | ||
182 | } | ||
183 | spin_unlock(&btp->bt_lru_lock); | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * xfs_buf_lru_del - remove a buffer from the LRU | ||
188 | * | ||
189 | * The unlocked check is safe here because it only occurs when there are not | ||
190 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | ||
191 | * to optimise the shrinker removing the buffer from the LRU and calling | ||
192 | * xfs_buf_free(). i.e. it removes an unneccessary round trip on the | ||
193 | * bt_lru_lock. | ||
194 | */ | ||
195 | STATIC void | ||
196 | xfs_buf_lru_del( | ||
197 | struct xfs_buf *bp) | ||
198 | { | ||
199 | struct xfs_buftarg *btp = bp->b_target; | ||
200 | |||
201 | if (list_empty(&bp->b_lru)) | ||
202 | return; | ||
203 | |||
204 | spin_lock(&btp->bt_lru_lock); | ||
205 | if (!list_empty(&bp->b_lru)) { | ||
206 | list_del_init(&bp->b_lru); | ||
207 | btp->bt_lru_nr--; | ||
208 | } | ||
209 | spin_unlock(&btp->bt_lru_lock); | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the | ||
214 | * b_lru_ref count so that the buffer is freed immediately when the buffer | ||
215 | * reference count falls to zero. If the buffer is already on the LRU, we need | ||
216 | * to remove the reference that LRU holds on the buffer. | ||
217 | * | ||
218 | * This prevents build-up of stale buffers on the LRU. | ||
219 | */ | ||
220 | void | ||
221 | xfs_buf_stale( | ||
222 | struct xfs_buf *bp) | ||
223 | { | ||
224 | bp->b_flags |= XBF_STALE; | ||
225 | atomic_set(&(bp)->b_lru_ref, 0); | ||
226 | if (!list_empty(&bp->b_lru)) { | ||
227 | struct xfs_buftarg *btp = bp->b_target; | ||
228 | |||
229 | spin_lock(&btp->bt_lru_lock); | ||
230 | if (!list_empty(&bp->b_lru)) { | ||
231 | list_del_init(&bp->b_lru); | ||
232 | btp->bt_lru_nr--; | ||
233 | atomic_dec(&bp->b_hold); | ||
234 | } | ||
235 | spin_unlock(&btp->bt_lru_lock); | ||
236 | } | ||
237 | ASSERT(atomic_read(&bp->b_hold) >= 1); | ||
238 | } | ||
173 | 239 | ||
174 | STATIC void | 240 | STATIC void |
175 | _xfs_buf_initialize( | 241 | _xfs_buf_initialize( |
@@ -186,7 +252,9 @@ _xfs_buf_initialize( | |||
186 | 252 | ||
187 | memset(bp, 0, sizeof(xfs_buf_t)); | 253 | memset(bp, 0, sizeof(xfs_buf_t)); |
188 | atomic_set(&bp->b_hold, 1); | 254 | atomic_set(&bp->b_hold, 1); |
255 | atomic_set(&bp->b_lru_ref, 1); | ||
189 | init_completion(&bp->b_iowait); | 256 | init_completion(&bp->b_iowait); |
257 | INIT_LIST_HEAD(&bp->b_lru); | ||
190 | INIT_LIST_HEAD(&bp->b_list); | 258 | INIT_LIST_HEAD(&bp->b_list); |
191 | RB_CLEAR_NODE(&bp->b_rbnode); | 259 | RB_CLEAR_NODE(&bp->b_rbnode); |
192 | sema_init(&bp->b_sema, 0); /* held, no waiters */ | 260 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
@@ -262,6 +330,8 @@ xfs_buf_free( | |||
262 | { | 330 | { |
263 | trace_xfs_buf_free(bp, _RET_IP_); | 331 | trace_xfs_buf_free(bp, _RET_IP_); |
264 | 332 | ||
333 | ASSERT(list_empty(&bp->b_lru)); | ||
334 | |||
265 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 335 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { |
266 | uint i; | 336 | uint i; |
267 | 337 | ||
@@ -337,7 +407,6 @@ _xfs_buf_lookup_pages( | |||
337 | __func__, gfp_mask); | 407 | __func__, gfp_mask); |
338 | 408 | ||
339 | XFS_STATS_INC(xb_page_retries); | 409 | XFS_STATS_INC(xb_page_retries); |
340 | xfsbufd_wakeup(NULL, 0, gfp_mask); | ||
341 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 410 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
342 | goto retry; | 411 | goto retry; |
343 | } | 412 | } |
@@ -828,6 +897,7 @@ xfs_buf_rele( | |||
828 | 897 | ||
829 | if (!pag) { | 898 | if (!pag) { |
830 | ASSERT(!bp->b_relse); | 899 | ASSERT(!bp->b_relse); |
900 | ASSERT(list_empty(&bp->b_lru)); | ||
831 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | 901 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); |
832 | if (atomic_dec_and_test(&bp->b_hold)) | 902 | if (atomic_dec_and_test(&bp->b_hold)) |
833 | xfs_buf_free(bp); | 903 | xfs_buf_free(bp); |
@@ -835,13 +905,19 @@ xfs_buf_rele( | |||
835 | } | 905 | } |
836 | 906 | ||
837 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); | 907 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); |
908 | |||
838 | ASSERT(atomic_read(&bp->b_hold) > 0); | 909 | ASSERT(atomic_read(&bp->b_hold) > 0); |
839 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { | 910 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
840 | if (bp->b_relse) { | 911 | if (bp->b_relse) { |
841 | atomic_inc(&bp->b_hold); | 912 | atomic_inc(&bp->b_hold); |
842 | spin_unlock(&pag->pag_buf_lock); | 913 | spin_unlock(&pag->pag_buf_lock); |
843 | bp->b_relse(bp); | 914 | bp->b_relse(bp); |
915 | } else if (!(bp->b_flags & XBF_STALE) && | ||
916 | atomic_read(&bp->b_lru_ref)) { | ||
917 | xfs_buf_lru_add(bp); | ||
918 | spin_unlock(&pag->pag_buf_lock); | ||
844 | } else { | 919 | } else { |
920 | xfs_buf_lru_del(bp); | ||
845 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 921 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); |
846 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); | 922 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
847 | spin_unlock(&pag->pag_buf_lock); | 923 | spin_unlock(&pag->pag_buf_lock); |
@@ -1438,51 +1514,84 @@ xfs_buf_iomove( | |||
1438 | */ | 1514 | */ |
1439 | 1515 | ||
1440 | /* | 1516 | /* |
1441 | * Wait for any bufs with callbacks that have been submitted but | 1517 | * Wait for any bufs with callbacks that have been submitted but have not yet |
1442 | * have not yet returned... walk the hash list for the target. | 1518 | * returned. These buffers will have an elevated hold count, so wait on those |
1519 | * while freeing all the buffers only held by the LRU. | ||
1443 | */ | 1520 | */ |
1444 | void | 1521 | void |
1445 | xfs_wait_buftarg( | 1522 | xfs_wait_buftarg( |
1446 | struct xfs_buftarg *btp) | 1523 | struct xfs_buftarg *btp) |
1447 | { | 1524 | { |
1448 | struct xfs_perag *pag; | 1525 | struct xfs_buf *bp; |
1449 | uint i; | ||
1450 | 1526 | ||
1451 | for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { | 1527 | restart: |
1452 | pag = xfs_perag_get(btp->bt_mount, i); | 1528 | spin_lock(&btp->bt_lru_lock); |
1453 | spin_lock(&pag->pag_buf_lock); | 1529 | while (!list_empty(&btp->bt_lru)) { |
1454 | while (rb_first(&pag->pag_buf_tree)) { | 1530 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
1455 | spin_unlock(&pag->pag_buf_lock); | 1531 | if (atomic_read(&bp->b_hold) > 1) { |
1532 | spin_unlock(&btp->bt_lru_lock); | ||
1456 | delay(100); | 1533 | delay(100); |
1457 | spin_lock(&pag->pag_buf_lock); | 1534 | goto restart; |
1458 | } | 1535 | } |
1459 | spin_unlock(&pag->pag_buf_lock); | 1536 | /* |
1460 | xfs_perag_put(pag); | 1537 | * clear the LRU reference count so the bufer doesn't get |
1538 | * ignored in xfs_buf_rele(). | ||
1539 | */ | ||
1540 | atomic_set(&bp->b_lru_ref, 0); | ||
1541 | spin_unlock(&btp->bt_lru_lock); | ||
1542 | xfs_buf_rele(bp); | ||
1543 | spin_lock(&btp->bt_lru_lock); | ||
1461 | } | 1544 | } |
1545 | spin_unlock(&btp->bt_lru_lock); | ||
1462 | } | 1546 | } |
1463 | 1547 | ||
1464 | /* | 1548 | int |
1465 | * buftarg list for delwrite queue processing | 1549 | xfs_buftarg_shrink( |
1466 | */ | 1550 | struct shrinker *shrink, |
1467 | static LIST_HEAD(xfs_buftarg_list); | 1551 | int nr_to_scan, |
1468 | static DEFINE_SPINLOCK(xfs_buftarg_lock); | 1552 | gfp_t mask) |
1469 | |||
1470 | STATIC void | ||
1471 | xfs_register_buftarg( | ||
1472 | xfs_buftarg_t *btp) | ||
1473 | { | 1553 | { |
1474 | spin_lock(&xfs_buftarg_lock); | 1554 | struct xfs_buftarg *btp = container_of(shrink, |
1475 | list_add(&btp->bt_list, &xfs_buftarg_list); | 1555 | struct xfs_buftarg, bt_shrinker); |
1476 | spin_unlock(&xfs_buftarg_lock); | 1556 | struct xfs_buf *bp; |
1477 | } | 1557 | LIST_HEAD(dispose); |
1478 | 1558 | ||
1479 | STATIC void | 1559 | if (!nr_to_scan) |
1480 | xfs_unregister_buftarg( | 1560 | return btp->bt_lru_nr; |
1481 | xfs_buftarg_t *btp) | 1561 | |
1482 | { | 1562 | spin_lock(&btp->bt_lru_lock); |
1483 | spin_lock(&xfs_buftarg_lock); | 1563 | while (!list_empty(&btp->bt_lru)) { |
1484 | list_del(&btp->bt_list); | 1564 | if (nr_to_scan-- <= 0) |
1485 | spin_unlock(&xfs_buftarg_lock); | 1565 | break; |
1566 | |||
1567 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | ||
1568 | |||
1569 | /* | ||
1570 | * Decrement the b_lru_ref count unless the value is already | ||
1571 | * zero. If the value is already zero, we need to reclaim the | ||
1572 | * buffer, otherwise it gets another trip through the LRU. | ||
1573 | */ | ||
1574 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { | ||
1575 | list_move_tail(&bp->b_lru, &btp->bt_lru); | ||
1576 | continue; | ||
1577 | } | ||
1578 | |||
1579 | /* | ||
1580 | * remove the buffer from the LRU now to avoid needing another | ||
1581 | * lock round trip inside xfs_buf_rele(). | ||
1582 | */ | ||
1583 | list_move(&bp->b_lru, &dispose); | ||
1584 | btp->bt_lru_nr--; | ||
1585 | } | ||
1586 | spin_unlock(&btp->bt_lru_lock); | ||
1587 | |||
1588 | while (!list_empty(&dispose)) { | ||
1589 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | ||
1590 | list_del_init(&bp->b_lru); | ||
1591 | xfs_buf_rele(bp); | ||
1592 | } | ||
1593 | |||
1594 | return btp->bt_lru_nr; | ||
1486 | } | 1595 | } |
1487 | 1596 | ||
1488 | void | 1597 | void |
@@ -1490,17 +1599,14 @@ xfs_free_buftarg( | |||
1490 | struct xfs_mount *mp, | 1599 | struct xfs_mount *mp, |
1491 | struct xfs_buftarg *btp) | 1600 | struct xfs_buftarg *btp) |
1492 | { | 1601 | { |
1602 | unregister_shrinker(&btp->bt_shrinker); | ||
1603 | |||
1493 | xfs_flush_buftarg(btp, 1); | 1604 | xfs_flush_buftarg(btp, 1); |
1494 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1605 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1495 | xfs_blkdev_issue_flush(btp); | 1606 | xfs_blkdev_issue_flush(btp); |
1496 | iput(btp->bt_mapping->host); | 1607 | iput(btp->bt_mapping->host); |
1497 | 1608 | ||
1498 | /* Unregister the buftarg first so that we don't get a | ||
1499 | * wakeup finding a non-existent task | ||
1500 | */ | ||
1501 | xfs_unregister_buftarg(btp); | ||
1502 | kthread_stop(btp->bt_task); | 1609 | kthread_stop(btp->bt_task); |
1503 | |||
1504 | kmem_free(btp); | 1610 | kmem_free(btp); |
1505 | } | 1611 | } |
1506 | 1612 | ||
@@ -1597,20 +1703,13 @@ xfs_alloc_delwrite_queue( | |||
1597 | xfs_buftarg_t *btp, | 1703 | xfs_buftarg_t *btp, |
1598 | const char *fsname) | 1704 | const char *fsname) |
1599 | { | 1705 | { |
1600 | int error = 0; | ||
1601 | |||
1602 | INIT_LIST_HEAD(&btp->bt_list); | ||
1603 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | 1706 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); |
1604 | spin_lock_init(&btp->bt_delwrite_lock); | 1707 | spin_lock_init(&btp->bt_delwrite_lock); |
1605 | btp->bt_flags = 0; | 1708 | btp->bt_flags = 0; |
1606 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | 1709 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); |
1607 | if (IS_ERR(btp->bt_task)) { | 1710 | if (IS_ERR(btp->bt_task)) |
1608 | error = PTR_ERR(btp->bt_task); | 1711 | return PTR_ERR(btp->bt_task); |
1609 | goto out_error; | 1712 | return 0; |
1610 | } | ||
1611 | xfs_register_buftarg(btp); | ||
1612 | out_error: | ||
1613 | return error; | ||
1614 | } | 1713 | } |
1615 | 1714 | ||
1616 | xfs_buftarg_t * | 1715 | xfs_buftarg_t * |
@@ -1627,12 +1726,17 @@ xfs_alloc_buftarg( | |||
1627 | btp->bt_mount = mp; | 1726 | btp->bt_mount = mp; |
1628 | btp->bt_dev = bdev->bd_dev; | 1727 | btp->bt_dev = bdev->bd_dev; |
1629 | btp->bt_bdev = bdev; | 1728 | btp->bt_bdev = bdev; |
1729 | INIT_LIST_HEAD(&btp->bt_lru); | ||
1730 | spin_lock_init(&btp->bt_lru_lock); | ||
1630 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1731 | if (xfs_setsize_buftarg_early(btp, bdev)) |
1631 | goto error; | 1732 | goto error; |
1632 | if (xfs_mapping_buftarg(btp, bdev)) | 1733 | if (xfs_mapping_buftarg(btp, bdev)) |
1633 | goto error; | 1734 | goto error; |
1634 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1735 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1635 | goto error; | 1736 | goto error; |
1737 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | ||
1738 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | ||
1739 | register_shrinker(&btp->bt_shrinker); | ||
1636 | return btp; | 1740 | return btp; |
1637 | 1741 | ||
1638 | error: | 1742 | error: |
@@ -1737,27 +1841,6 @@ xfs_buf_runall_queues( | |||
1737 | flush_workqueue(queue); | 1841 | flush_workqueue(queue); |
1738 | } | 1842 | } |
1739 | 1843 | ||
1740 | STATIC int | ||
1741 | xfsbufd_wakeup( | ||
1742 | struct shrinker *shrink, | ||
1743 | int priority, | ||
1744 | gfp_t mask) | ||
1745 | { | ||
1746 | xfs_buftarg_t *btp; | ||
1747 | |||
1748 | spin_lock(&xfs_buftarg_lock); | ||
1749 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { | ||
1750 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) | ||
1751 | continue; | ||
1752 | if (list_empty(&btp->bt_delwrite_queue)) | ||
1753 | continue; | ||
1754 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); | ||
1755 | wake_up_process(btp->bt_task); | ||
1756 | } | ||
1757 | spin_unlock(&xfs_buftarg_lock); | ||
1758 | return 0; | ||
1759 | } | ||
1760 | |||
1761 | /* | 1844 | /* |
1762 | * Move as many buffers as specified to the supplied list | 1845 | * Move as many buffers as specified to the supplied list |
1763 | * idicating if we skipped any buffers to prevent deadlocks. | 1846 | * idicating if we skipped any buffers to prevent deadlocks. |
@@ -1952,7 +2035,6 @@ xfs_buf_init(void) | |||
1952 | if (!xfsconvertd_workqueue) | 2035 | if (!xfsconvertd_workqueue) |
1953 | goto out_destroy_xfsdatad_workqueue; | 2036 | goto out_destroy_xfsdatad_workqueue; |
1954 | 2037 | ||
1955 | register_shrinker(&xfs_buf_shake); | ||
1956 | return 0; | 2038 | return 0; |
1957 | 2039 | ||
1958 | out_destroy_xfsdatad_workqueue: | 2040 | out_destroy_xfsdatad_workqueue: |
@@ -1968,7 +2050,6 @@ xfs_buf_init(void) | |||
1968 | void | 2050 | void |
1969 | xfs_buf_terminate(void) | 2051 | xfs_buf_terminate(void) |
1970 | { | 2052 | { |
1971 | unregister_shrinker(&xfs_buf_shake); | ||
1972 | destroy_workqueue(xfsconvertd_workqueue); | 2053 | destroy_workqueue(xfsconvertd_workqueue); |
1973 | destroy_workqueue(xfsdatad_workqueue); | 2054 | destroy_workqueue(xfsdatad_workqueue); |
1974 | destroy_workqueue(xfslogd_workqueue); | 2055 | destroy_workqueue(xfslogd_workqueue); |