diff options
-rw-r--r-- | fs/xfs/xfs_buf.c | 341 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 28 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 96 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c | 33 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.c | 161 | ||||
-rw-r--r-- | fs/xfs/xfs_extfree_item.c | 55 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 25 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 152 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 46 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 148 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_sync.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.h | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_ail.c | 129 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 84 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_priv.h | 1 |
19 files changed, 442 insertions, 918 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 6819b5163e33..b82fc5c67fed 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
43 | 43 | ||
44 | static kmem_zone_t *xfs_buf_zone; | 44 | static kmem_zone_t *xfs_buf_zone; |
45 | STATIC int xfsbufd(void *); | ||
46 | 45 | ||
47 | static struct workqueue_struct *xfslogd_workqueue; | 46 | static struct workqueue_struct *xfslogd_workqueue; |
48 | 47 | ||
@@ -144,8 +143,17 @@ void | |||
144 | xfs_buf_stale( | 143 | xfs_buf_stale( |
145 | struct xfs_buf *bp) | 144 | struct xfs_buf *bp) |
146 | { | 145 | { |
146 | ASSERT(xfs_buf_islocked(bp)); | ||
147 | |||
147 | bp->b_flags |= XBF_STALE; | 148 | bp->b_flags |= XBF_STALE; |
148 | xfs_buf_delwri_dequeue(bp); | 149 | |
150 | /* | ||
151 | * Clear the delwri status so that a delwri queue walker will not | ||
152 | * flush this buffer to disk now that it is stale. The delwri queue has | ||
153 | * a reference to the buffer, so this is safe to do. | ||
154 | */ | ||
155 | bp->b_flags &= ~_XBF_DELWRI_Q; | ||
156 | |||
149 | atomic_set(&(bp)->b_lru_ref, 0); | 157 | atomic_set(&(bp)->b_lru_ref, 0); |
150 | if (!list_empty(&bp->b_lru)) { | 158 | if (!list_empty(&bp->b_lru)) { |
151 | struct xfs_buftarg *btp = bp->b_target; | 159 | struct xfs_buftarg *btp = bp->b_target; |
@@ -592,10 +600,10 @@ _xfs_buf_read( | |||
592 | { | 600 | { |
593 | int status; | 601 | int status; |
594 | 602 | ||
595 | ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); | 603 | ASSERT(!(flags & XBF_WRITE)); |
596 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); | 604 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); |
597 | 605 | ||
598 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); | 606 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); |
599 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); | 607 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); |
600 | 608 | ||
601 | status = xfs_buf_iorequest(bp); | 609 | status = xfs_buf_iorequest(bp); |
@@ -855,7 +863,7 @@ xfs_buf_rele( | |||
855 | spin_unlock(&pag->pag_buf_lock); | 863 | spin_unlock(&pag->pag_buf_lock); |
856 | } else { | 864 | } else { |
857 | xfs_buf_lru_del(bp); | 865 | xfs_buf_lru_del(bp); |
858 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 866 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); |
859 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); | 867 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
860 | spin_unlock(&pag->pag_buf_lock); | 868 | spin_unlock(&pag->pag_buf_lock); |
861 | xfs_perag_put(pag); | 869 | xfs_perag_put(pag); |
@@ -915,13 +923,6 @@ xfs_buf_lock( | |||
915 | trace_xfs_buf_lock_done(bp, _RET_IP_); | 923 | trace_xfs_buf_lock_done(bp, _RET_IP_); |
916 | } | 924 | } |
917 | 925 | ||
918 | /* | ||
919 | * Releases the lock on the buffer object. | ||
920 | * If the buffer is marked delwri but is not queued, do so before we | ||
921 | * unlock the buffer as we need to set flags correctly. We also need to | ||
922 | * take a reference for the delwri queue because the unlocker is going to | ||
923 | * drop their's and they don't know we just queued it. | ||
924 | */ | ||
925 | void | 926 | void |
926 | xfs_buf_unlock( | 927 | xfs_buf_unlock( |
927 | struct xfs_buf *bp) | 928 | struct xfs_buf *bp) |
@@ -1019,10 +1020,11 @@ xfs_bwrite( | |||
1019 | { | 1020 | { |
1020 | int error; | 1021 | int error; |
1021 | 1022 | ||
1023 | ASSERT(xfs_buf_islocked(bp)); | ||
1024 | |||
1022 | bp->b_flags |= XBF_WRITE; | 1025 | bp->b_flags |= XBF_WRITE; |
1023 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); | 1026 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); |
1024 | 1027 | ||
1025 | xfs_buf_delwri_dequeue(bp); | ||
1026 | xfs_bdstrat_cb(bp); | 1028 | xfs_bdstrat_cb(bp); |
1027 | 1029 | ||
1028 | error = xfs_buf_iowait(bp); | 1030 | error = xfs_buf_iowait(bp); |
@@ -1254,7 +1256,7 @@ xfs_buf_iorequest( | |||
1254 | { | 1256 | { |
1255 | trace_xfs_buf_iorequest(bp, _RET_IP_); | 1257 | trace_xfs_buf_iorequest(bp, _RET_IP_); |
1256 | 1258 | ||
1257 | ASSERT(!(bp->b_flags & XBF_DELWRI)); | 1259 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); |
1258 | 1260 | ||
1259 | if (bp->b_flags & XBF_WRITE) | 1261 | if (bp->b_flags & XBF_WRITE) |
1260 | xfs_buf_wait_unpin(bp); | 1262 | xfs_buf_wait_unpin(bp); |
@@ -1435,11 +1437,9 @@ xfs_free_buftarg( | |||
1435 | { | 1437 | { |
1436 | unregister_shrinker(&btp->bt_shrinker); | 1438 | unregister_shrinker(&btp->bt_shrinker); |
1437 | 1439 | ||
1438 | xfs_flush_buftarg(btp, 1); | ||
1439 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1440 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1440 | xfs_blkdev_issue_flush(btp); | 1441 | xfs_blkdev_issue_flush(btp); |
1441 | 1442 | ||
1442 | kthread_stop(btp->bt_task); | ||
1443 | kmem_free(btp); | 1443 | kmem_free(btp); |
1444 | } | 1444 | } |
1445 | 1445 | ||
@@ -1491,20 +1491,6 @@ xfs_setsize_buftarg( | |||
1491 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); | 1491 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); |
1492 | } | 1492 | } |
1493 | 1493 | ||
1494 | STATIC int | ||
1495 | xfs_alloc_delwri_queue( | ||
1496 | xfs_buftarg_t *btp, | ||
1497 | const char *fsname) | ||
1498 | { | ||
1499 | INIT_LIST_HEAD(&btp->bt_delwri_queue); | ||
1500 | spin_lock_init(&btp->bt_delwri_lock); | ||
1501 | btp->bt_flags = 0; | ||
1502 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | ||
1503 | if (IS_ERR(btp->bt_task)) | ||
1504 | return PTR_ERR(btp->bt_task); | ||
1505 | return 0; | ||
1506 | } | ||
1507 | |||
1508 | xfs_buftarg_t * | 1494 | xfs_buftarg_t * |
1509 | xfs_alloc_buftarg( | 1495 | xfs_alloc_buftarg( |
1510 | struct xfs_mount *mp, | 1496 | struct xfs_mount *mp, |
@@ -1527,8 +1513,6 @@ xfs_alloc_buftarg( | |||
1527 | spin_lock_init(&btp->bt_lru_lock); | 1513 | spin_lock_init(&btp->bt_lru_lock); |
1528 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1514 | if (xfs_setsize_buftarg_early(btp, bdev)) |
1529 | goto error; | 1515 | goto error; |
1530 | if (xfs_alloc_delwri_queue(btp, fsname)) | ||
1531 | goto error; | ||
1532 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | 1516 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; |
1533 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | 1517 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; |
1534 | register_shrinker(&btp->bt_shrinker); | 1518 | register_shrinker(&btp->bt_shrinker); |
@@ -1539,125 +1523,52 @@ error: | |||
1539 | return NULL; | 1523 | return NULL; |
1540 | } | 1524 | } |
1541 | 1525 | ||
1542 | |||
1543 | /* | 1526 | /* |
1544 | * Delayed write buffer handling | 1527 | * Add a buffer to the delayed write list. |
1528 | * | ||
1529 | * This queues a buffer for writeout if it hasn't already been. Note that | ||
1530 | * neither this routine nor the buffer list submission functions perform | ||
1531 | * any internal synchronization. It is expected that the lists are thread-local | ||
1532 | * to the callers. | ||
1533 | * | ||
1534 | * Returns true if we queued up the buffer, or false if it already had | ||
1535 | * been on the buffer list. | ||
1545 | */ | 1536 | */ |
1546 | void | 1537 | bool |
1547 | xfs_buf_delwri_queue( | 1538 | xfs_buf_delwri_queue( |
1548 | xfs_buf_t *bp) | 1539 | struct xfs_buf *bp, |
1540 | struct list_head *list) | ||
1549 | { | 1541 | { |
1550 | struct xfs_buftarg *btp = bp->b_target; | 1542 | ASSERT(xfs_buf_islocked(bp)); |
1551 | |||
1552 | trace_xfs_buf_delwri_queue(bp, _RET_IP_); | ||
1553 | |||
1554 | ASSERT(!(bp->b_flags & XBF_READ)); | 1543 | ASSERT(!(bp->b_flags & XBF_READ)); |
1555 | 1544 | ||
1556 | spin_lock(&btp->bt_delwri_lock); | 1545 | /* |
1557 | if (!list_empty(&bp->b_list)) { | 1546 | * If the buffer is already marked delwri it already is queued up |
1558 | /* if already in the queue, move it to the tail */ | 1547 | * by someone else for imediate writeout. Just ignore it in that |
1559 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | 1548 | * case. |
1560 | list_move_tail(&bp->b_list, &btp->bt_delwri_queue); | 1549 | */ |
1561 | } else { | 1550 | if (bp->b_flags & _XBF_DELWRI_Q) { |
1562 | /* start xfsbufd as it is about to have something to do */ | 1551 | trace_xfs_buf_delwri_queued(bp, _RET_IP_); |
1563 | if (list_empty(&btp->bt_delwri_queue)) | 1552 | return false; |
1564 | wake_up_process(bp->b_target->bt_task); | ||
1565 | |||
1566 | atomic_inc(&bp->b_hold); | ||
1567 | bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC; | ||
1568 | list_add_tail(&bp->b_list, &btp->bt_delwri_queue); | ||
1569 | } | ||
1570 | bp->b_queuetime = jiffies; | ||
1571 | spin_unlock(&btp->bt_delwri_lock); | ||
1572 | } | ||
1573 | |||
1574 | void | ||
1575 | xfs_buf_delwri_dequeue( | ||
1576 | xfs_buf_t *bp) | ||
1577 | { | ||
1578 | int dequeued = 0; | ||
1579 | |||
1580 | spin_lock(&bp->b_target->bt_delwri_lock); | ||
1581 | if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { | ||
1582 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | ||
1583 | list_del_init(&bp->b_list); | ||
1584 | dequeued = 1; | ||
1585 | } | 1553 | } |
1586 | bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); | ||
1587 | spin_unlock(&bp->b_target->bt_delwri_lock); | ||
1588 | |||
1589 | if (dequeued) | ||
1590 | xfs_buf_rele(bp); | ||
1591 | |||
1592 | trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); | ||
1593 | } | ||
1594 | 1554 | ||
1595 | /* | 1555 | trace_xfs_buf_delwri_queue(bp, _RET_IP_); |
1596 | * If a delwri buffer needs to be pushed before it has aged out, then promote | ||
1597 | * it to the head of the delwri queue so that it will be flushed on the next | ||
1598 | * xfsbufd run. We do this by resetting the queuetime of the buffer to be older | ||
1599 | * than the age currently needed to flush the buffer. Hence the next time the | ||
1600 | * xfsbufd sees it is guaranteed to be considered old enough to flush. | ||
1601 | */ | ||
1602 | void | ||
1603 | xfs_buf_delwri_promote( | ||
1604 | struct xfs_buf *bp) | ||
1605 | { | ||
1606 | struct xfs_buftarg *btp = bp->b_target; | ||
1607 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; | ||
1608 | |||
1609 | ASSERT(bp->b_flags & XBF_DELWRI); | ||
1610 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | ||
1611 | 1556 | ||
1612 | /* | 1557 | /* |
1613 | * Check the buffer age before locking the delayed write queue as we | 1558 | * If a buffer gets written out synchronously or marked stale while it |
1614 | * don't need to promote buffers that are already past the flush age. | 1559 | * is on a delwri list we lazily remove it. To do this, the other party |
1560 | * clears the _XBF_DELWRI_Q flag but otherwise leaves the buffer alone. | ||
1561 | * It remains referenced and on the list. In a rare corner case it | ||
1562 | * might get readded to a delwri list after the synchronous writeout, in | ||
1563 | * which case we need just need to re-add the flag here. | ||
1615 | */ | 1564 | */ |
1616 | if (bp->b_queuetime < jiffies - age) | 1565 | bp->b_flags |= _XBF_DELWRI_Q; |
1617 | return; | 1566 | if (list_empty(&bp->b_list)) { |
1618 | bp->b_queuetime = jiffies - age; | 1567 | atomic_inc(&bp->b_hold); |
1619 | spin_lock(&btp->bt_delwri_lock); | 1568 | list_add_tail(&bp->b_list, list); |
1620 | list_move(&bp->b_list, &btp->bt_delwri_queue); | ||
1621 | spin_unlock(&btp->bt_delwri_lock); | ||
1622 | } | ||
1623 | |||
1624 | /* | ||
1625 | * Move as many buffers as specified to the supplied list | ||
1626 | * idicating if we skipped any buffers to prevent deadlocks. | ||
1627 | */ | ||
1628 | STATIC int | ||
1629 | xfs_buf_delwri_split( | ||
1630 | xfs_buftarg_t *target, | ||
1631 | struct list_head *list, | ||
1632 | unsigned long age) | ||
1633 | { | ||
1634 | xfs_buf_t *bp, *n; | ||
1635 | int skipped = 0; | ||
1636 | int force; | ||
1637 | |||
1638 | force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); | ||
1639 | INIT_LIST_HEAD(list); | ||
1640 | spin_lock(&target->bt_delwri_lock); | ||
1641 | list_for_each_entry_safe(bp, n, &target->bt_delwri_queue, b_list) { | ||
1642 | ASSERT(bp->b_flags & XBF_DELWRI); | ||
1643 | |||
1644 | if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { | ||
1645 | if (!force && | ||
1646 | time_before(jiffies, bp->b_queuetime + age)) { | ||
1647 | xfs_buf_unlock(bp); | ||
1648 | break; | ||
1649 | } | ||
1650 | |||
1651 | bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); | ||
1652 | bp->b_flags |= XBF_WRITE; | ||
1653 | list_move_tail(&bp->b_list, list); | ||
1654 | trace_xfs_buf_delwri_split(bp, _RET_IP_); | ||
1655 | } else | ||
1656 | skipped++; | ||
1657 | } | 1569 | } |
1658 | 1570 | ||
1659 | spin_unlock(&target->bt_delwri_lock); | 1571 | return true; |
1660 | return skipped; | ||
1661 | } | 1572 | } |
1662 | 1573 | ||
1663 | /* | 1574 | /* |
@@ -1683,99 +1594,109 @@ xfs_buf_cmp( | |||
1683 | return 0; | 1594 | return 0; |
1684 | } | 1595 | } |
1685 | 1596 | ||
1686 | STATIC int | 1597 | static int |
1687 | xfsbufd( | 1598 | __xfs_buf_delwri_submit( |
1688 | void *data) | 1599 | struct list_head *buffer_list, |
1600 | struct list_head *io_list, | ||
1601 | bool wait) | ||
1689 | { | 1602 | { |
1690 | xfs_buftarg_t *target = (xfs_buftarg_t *)data; | 1603 | struct blk_plug plug; |
1691 | 1604 | struct xfs_buf *bp, *n; | |
1692 | current->flags |= PF_MEMALLOC; | 1605 | int pinned = 0; |
1693 | 1606 | ||
1694 | set_freezable(); | 1607 | list_for_each_entry_safe(bp, n, buffer_list, b_list) { |
1608 | if (!wait) { | ||
1609 | if (xfs_buf_ispinned(bp)) { | ||
1610 | pinned++; | ||
1611 | continue; | ||
1612 | } | ||
1613 | if (!xfs_buf_trylock(bp)) | ||
1614 | continue; | ||
1615 | } else { | ||
1616 | xfs_buf_lock(bp); | ||
1617 | } | ||
1695 | 1618 | ||
1696 | do { | 1619 | /* |
1697 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1620 | * Someone else might have written the buffer synchronously or |
1698 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); | 1621 | * marked it stale in the meantime. In that case only the |
1699 | struct list_head tmp; | 1622 | * _XBF_DELWRI_Q flag got cleared, and we have to drop the |
1700 | struct blk_plug plug; | 1623 | * reference and remove it from the list here. |
1624 | */ | ||
1625 | if (!(bp->b_flags & _XBF_DELWRI_Q)) { | ||
1626 | list_del_init(&bp->b_list); | ||
1627 | xfs_buf_relse(bp); | ||
1628 | continue; | ||
1629 | } | ||
1701 | 1630 | ||
1702 | if (unlikely(freezing(current))) | 1631 | list_move_tail(&bp->b_list, io_list); |
1703 | try_to_freeze(); | 1632 | trace_xfs_buf_delwri_split(bp, _RET_IP_); |
1633 | } | ||
1704 | 1634 | ||
1705 | /* sleep for a long time if there is nothing to do. */ | 1635 | list_sort(NULL, io_list, xfs_buf_cmp); |
1706 | if (list_empty(&target->bt_delwri_queue)) | ||
1707 | tout = MAX_SCHEDULE_TIMEOUT; | ||
1708 | schedule_timeout_interruptible(tout); | ||
1709 | 1636 | ||
1710 | xfs_buf_delwri_split(target, &tmp, age); | 1637 | blk_start_plug(&plug); |
1711 | list_sort(NULL, &tmp, xfs_buf_cmp); | 1638 | list_for_each_entry_safe(bp, n, io_list, b_list) { |
1639 | bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); | ||
1640 | bp->b_flags |= XBF_WRITE; | ||
1712 | 1641 | ||
1713 | blk_start_plug(&plug); | 1642 | if (!wait) { |
1714 | while (!list_empty(&tmp)) { | 1643 | bp->b_flags |= XBF_ASYNC; |
1715 | struct xfs_buf *bp; | ||
1716 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); | ||
1717 | list_del_init(&bp->b_list); | 1644 | list_del_init(&bp->b_list); |
1718 | xfs_bdstrat_cb(bp); | ||
1719 | } | 1645 | } |
1720 | blk_finish_plug(&plug); | 1646 | xfs_bdstrat_cb(bp); |
1721 | } while (!kthread_should_stop()); | 1647 | } |
1648 | blk_finish_plug(&plug); | ||
1722 | 1649 | ||
1723 | return 0; | 1650 | return pinned; |
1724 | } | 1651 | } |
1725 | 1652 | ||
1726 | /* | 1653 | /* |
1727 | * Go through all incore buffers, and release buffers if they belong to | 1654 | * Write out a buffer list asynchronously. |
1728 | * the given device. This is used in filesystem error handling to | 1655 | * |
1729 | * preserve the consistency of its metadata. | 1656 | * This will take the @buffer_list, write all non-locked and non-pinned buffers |
1657 | * out and not wait for I/O completion on any of the buffers. This interface | ||
1658 | * is only safely useable for callers that can track I/O completion by higher | ||
1659 | * level means, e.g. AIL pushing as the @buffer_list is consumed in this | ||
1660 | * function. | ||
1730 | */ | 1661 | */ |
1731 | int | 1662 | int |
1732 | xfs_flush_buftarg( | 1663 | xfs_buf_delwri_submit_nowait( |
1733 | xfs_buftarg_t *target, | 1664 | struct list_head *buffer_list) |
1734 | int wait) | ||
1735 | { | 1665 | { |
1736 | xfs_buf_t *bp; | 1666 | LIST_HEAD (io_list); |
1737 | int pincount = 0; | 1667 | return __xfs_buf_delwri_submit(buffer_list, &io_list, false); |
1738 | LIST_HEAD(tmp_list); | 1668 | } |
1739 | LIST_HEAD(wait_list); | ||
1740 | struct blk_plug plug; | ||
1741 | 1669 | ||
1742 | flush_workqueue(xfslogd_workqueue); | 1670 | /* |
1671 | * Write out a buffer list synchronously. | ||
1672 | * | ||
1673 | * This will take the @buffer_list, write all buffers out and wait for I/O | ||
1674 | * completion on all of the buffers. @buffer_list is consumed by the function, | ||
1675 | * so callers must have some other way of tracking buffers if they require such | ||
1676 | * functionality. | ||
1677 | */ | ||
1678 | int | ||
1679 | xfs_buf_delwri_submit( | ||
1680 | struct list_head *buffer_list) | ||
1681 | { | ||
1682 | LIST_HEAD (io_list); | ||
1683 | int error = 0, error2; | ||
1684 | struct xfs_buf *bp; | ||
1743 | 1685 | ||
1744 | set_bit(XBT_FORCE_FLUSH, &target->bt_flags); | 1686 | __xfs_buf_delwri_submit(buffer_list, &io_list, true); |
1745 | pincount = xfs_buf_delwri_split(target, &tmp_list, 0); | ||
1746 | 1687 | ||
1747 | /* | 1688 | /* Wait for IO to complete. */ |
1748 | * Dropped the delayed write list lock, now walk the temporary list. | 1689 | while (!list_empty(&io_list)) { |
1749 | * All I/O is issued async and then if we need to wait for completion | 1690 | bp = list_first_entry(&io_list, struct xfs_buf, b_list); |
1750 | * we do that after issuing all the IO. | ||
1751 | */ | ||
1752 | list_sort(NULL, &tmp_list, xfs_buf_cmp); | ||
1753 | 1691 | ||
1754 | blk_start_plug(&plug); | ||
1755 | while (!list_empty(&tmp_list)) { | ||
1756 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); | ||
1757 | ASSERT(target == bp->b_target); | ||
1758 | list_del_init(&bp->b_list); | 1692 | list_del_init(&bp->b_list); |
1759 | if (wait) { | 1693 | error2 = xfs_buf_iowait(bp); |
1760 | bp->b_flags &= ~XBF_ASYNC; | 1694 | xfs_buf_relse(bp); |
1761 | list_add(&bp->b_list, &wait_list); | 1695 | if (!error) |
1762 | } | 1696 | error = error2; |
1763 | xfs_bdstrat_cb(bp); | ||
1764 | } | ||
1765 | blk_finish_plug(&plug); | ||
1766 | |||
1767 | if (wait) { | ||
1768 | /* Wait for IO to complete. */ | ||
1769 | while (!list_empty(&wait_list)) { | ||
1770 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | ||
1771 | |||
1772 | list_del_init(&bp->b_list); | ||
1773 | xfs_buf_iowait(bp); | ||
1774 | xfs_buf_relse(bp); | ||
1775 | } | ||
1776 | } | 1697 | } |
1777 | 1698 | ||
1778 | return pincount; | 1699 | return error; |
1779 | } | 1700 | } |
1780 | 1701 | ||
1781 | int __init | 1702 | int __init |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5bf3be45f543..7083cf44d95f 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -49,8 +49,7 @@ typedef enum { | |||
49 | #define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ | 49 | #define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ |
50 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ | 50 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ |
51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
52 | #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ | 52 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ |
53 | #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ | ||
54 | 53 | ||
55 | /* I/O hints for the BIO layer */ | 54 | /* I/O hints for the BIO layer */ |
56 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ | 55 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ |
@@ -65,7 +64,7 @@ typedef enum { | |||
65 | /* flags used only internally */ | 64 | /* flags used only internally */ |
66 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ | 65 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ |
67 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ | 66 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ |
68 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ | 67 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ |
69 | 68 | ||
70 | typedef unsigned int xfs_buf_flags_t; | 69 | typedef unsigned int xfs_buf_flags_t; |
71 | 70 | ||
@@ -76,7 +75,6 @@ typedef unsigned int xfs_buf_flags_t; | |||
76 | { XBF_MAPPED, "MAPPED" }, \ | 75 | { XBF_MAPPED, "MAPPED" }, \ |
77 | { XBF_ASYNC, "ASYNC" }, \ | 76 | { XBF_ASYNC, "ASYNC" }, \ |
78 | { XBF_DONE, "DONE" }, \ | 77 | { XBF_DONE, "DONE" }, \ |
79 | { XBF_DELWRI, "DELWRI" }, \ | ||
80 | { XBF_STALE, "STALE" }, \ | 78 | { XBF_STALE, "STALE" }, \ |
81 | { XBF_SYNCIO, "SYNCIO" }, \ | 79 | { XBF_SYNCIO, "SYNCIO" }, \ |
82 | { XBF_FUA, "FUA" }, \ | 80 | { XBF_FUA, "FUA" }, \ |
@@ -88,10 +86,6 @@ typedef unsigned int xfs_buf_flags_t; | |||
88 | { _XBF_KMEM, "KMEM" }, \ | 86 | { _XBF_KMEM, "KMEM" }, \ |
89 | { _XBF_DELWRI_Q, "DELWRI_Q" } | 87 | { _XBF_DELWRI_Q, "DELWRI_Q" } |
90 | 88 | ||
91 | typedef enum { | ||
92 | XBT_FORCE_FLUSH = 0, | ||
93 | } xfs_buftarg_flags_t; | ||
94 | |||
95 | typedef struct xfs_buftarg { | 89 | typedef struct xfs_buftarg { |
96 | dev_t bt_dev; | 90 | dev_t bt_dev; |
97 | struct block_device *bt_bdev; | 91 | struct block_device *bt_bdev; |
@@ -101,12 +95,6 @@ typedef struct xfs_buftarg { | |||
101 | unsigned int bt_sshift; | 95 | unsigned int bt_sshift; |
102 | size_t bt_smask; | 96 | size_t bt_smask; |
103 | 97 | ||
104 | /* per device delwri queue */ | ||
105 | struct task_struct *bt_task; | ||
106 | struct list_head bt_delwri_queue; | ||
107 | spinlock_t bt_delwri_lock; | ||
108 | unsigned long bt_flags; | ||
109 | |||
110 | /* LRU control structures */ | 98 | /* LRU control structures */ |
111 | struct shrinker bt_shrinker; | 99 | struct shrinker bt_shrinker; |
112 | struct list_head bt_lru; | 100 | struct list_head bt_lru; |
@@ -150,7 +138,6 @@ typedef struct xfs_buf { | |||
150 | struct xfs_trans *b_transp; | 138 | struct xfs_trans *b_transp; |
151 | struct page **b_pages; /* array of page pointers */ | 139 | struct page **b_pages; /* array of page pointers */ |
152 | struct page *b_page_array[XB_PAGES]; /* inline pages */ | 140 | struct page *b_page_array[XB_PAGES]; /* inline pages */ |
153 | unsigned long b_queuetime; /* time buffer was queued */ | ||
154 | atomic_t b_pin_count; /* pin count */ | 141 | atomic_t b_pin_count; /* pin count */ |
155 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | 142 | atomic_t b_io_remaining; /* #outstanding I/O requests */ |
156 | unsigned int b_page_count; /* size of page array */ | 143 | unsigned int b_page_count; /* size of page array */ |
@@ -220,24 +207,22 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp) | |||
220 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); | 207 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); |
221 | 208 | ||
222 | /* Delayed Write Buffer Routines */ | 209 | /* Delayed Write Buffer Routines */ |
223 | extern void xfs_buf_delwri_queue(struct xfs_buf *); | 210 | extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); |
224 | extern void xfs_buf_delwri_dequeue(struct xfs_buf *); | 211 | extern int xfs_buf_delwri_submit(struct list_head *); |
225 | extern void xfs_buf_delwri_promote(struct xfs_buf *); | 212 | extern int xfs_buf_delwri_submit_nowait(struct list_head *); |
226 | 213 | ||
227 | /* Buffer Daemon Setup Routines */ | 214 | /* Buffer Daemon Setup Routines */ |
228 | extern int xfs_buf_init(void); | 215 | extern int xfs_buf_init(void); |
229 | extern void xfs_buf_terminate(void); | 216 | extern void xfs_buf_terminate(void); |
230 | 217 | ||
231 | #define XFS_BUF_ZEROFLAGS(bp) \ | 218 | #define XFS_BUF_ZEROFLAGS(bp) \ |
232 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ | 219 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ |
233 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) | 220 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) |
234 | 221 | ||
235 | void xfs_buf_stale(struct xfs_buf *bp); | 222 | void xfs_buf_stale(struct xfs_buf *bp); |
236 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 223 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
237 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) | 224 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) |
238 | 225 | ||
239 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) | ||
240 | |||
241 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) | 226 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) |
242 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) | 227 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) |
243 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) | 228 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) |
@@ -287,7 +272,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, | |||
287 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); | 272 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); |
288 | extern void xfs_wait_buftarg(xfs_buftarg_t *); | 273 | extern void xfs_wait_buftarg(xfs_buftarg_t *); |
289 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | 274 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); |
290 | extern int xfs_flush_buftarg(xfs_buftarg_t *, int); | ||
291 | 275 | ||
292 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) | 276 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) |
293 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) | 277 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 3a0bc38f1859..fb20f384b566 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -418,7 +418,6 @@ xfs_buf_item_unpin( | |||
418 | if (freed && stale) { | 418 | if (freed && stale) { |
419 | ASSERT(bip->bli_flags & XFS_BLI_STALE); | 419 | ASSERT(bip->bli_flags & XFS_BLI_STALE); |
420 | ASSERT(xfs_buf_islocked(bp)); | 420 | ASSERT(xfs_buf_islocked(bp)); |
421 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | ||
422 | ASSERT(XFS_BUF_ISSTALE(bp)); | 421 | ASSERT(XFS_BUF_ISSTALE(bp)); |
423 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); | 422 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
424 | 423 | ||
@@ -469,34 +468,28 @@ xfs_buf_item_unpin( | |||
469 | } | 468 | } |
470 | } | 469 | } |
471 | 470 | ||
472 | /* | ||
473 | * This is called to attempt to lock the buffer associated with this | ||
474 | * buf log item. Don't sleep on the buffer lock. If we can't get | ||
475 | * the lock right away, return 0. If we can get the lock, take a | ||
476 | * reference to the buffer. If this is a delayed write buffer that | ||
477 | * needs AIL help to be written back, invoke the pushbuf routine | ||
478 | * rather than the normal success path. | ||
479 | */ | ||
480 | STATIC uint | 471 | STATIC uint |
481 | xfs_buf_item_trylock( | 472 | xfs_buf_item_push( |
482 | struct xfs_log_item *lip) | 473 | struct xfs_log_item *lip, |
474 | struct list_head *buffer_list) | ||
483 | { | 475 | { |
484 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 476 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
485 | struct xfs_buf *bp = bip->bli_buf; | 477 | struct xfs_buf *bp = bip->bli_buf; |
478 | uint rval = XFS_ITEM_SUCCESS; | ||
486 | 479 | ||
487 | if (xfs_buf_ispinned(bp)) | 480 | if (xfs_buf_ispinned(bp)) |
488 | return XFS_ITEM_PINNED; | 481 | return XFS_ITEM_PINNED; |
489 | if (!xfs_buf_trylock(bp)) | 482 | if (!xfs_buf_trylock(bp)) |
490 | return XFS_ITEM_LOCKED; | 483 | return XFS_ITEM_LOCKED; |
491 | 484 | ||
492 | /* take a reference to the buffer. */ | ||
493 | xfs_buf_hold(bp); | ||
494 | |||
495 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 485 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
496 | trace_xfs_buf_item_trylock(bip); | 486 | |
497 | if (XFS_BUF_ISDELAYWRITE(bp)) | 487 | trace_xfs_buf_item_push(bip); |
498 | return XFS_ITEM_PUSHBUF; | 488 | |
499 | return XFS_ITEM_SUCCESS; | 489 | if (!xfs_buf_delwri_queue(bp, buffer_list)) |
490 | rval = XFS_ITEM_FLUSHING; | ||
491 | xfs_buf_unlock(bp); | ||
492 | return rval; | ||
500 | } | 493 | } |
501 | 494 | ||
502 | /* | 495 | /* |
@@ -609,48 +602,6 @@ xfs_buf_item_committed( | |||
609 | return lsn; | 602 | return lsn; |
610 | } | 603 | } |
611 | 604 | ||
612 | /* | ||
613 | * The buffer is locked, but is not a delayed write buffer. | ||
614 | */ | ||
615 | STATIC void | ||
616 | xfs_buf_item_push( | ||
617 | struct xfs_log_item *lip) | ||
618 | { | ||
619 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | ||
620 | struct xfs_buf *bp = bip->bli_buf; | ||
621 | |||
622 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | ||
623 | ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); | ||
624 | |||
625 | trace_xfs_buf_item_push(bip); | ||
626 | |||
627 | xfs_buf_delwri_queue(bp); | ||
628 | xfs_buf_relse(bp); | ||
629 | } | ||
630 | |||
631 | /* | ||
632 | * The buffer is locked and is a delayed write buffer. Promote the buffer | ||
633 | * in the delayed write queue as the caller knows that they must invoke | ||
634 | * the xfsbufd to get this buffer written. We have to unlock the buffer | ||
635 | * to allow the xfsbufd to write it, too. | ||
636 | */ | ||
637 | STATIC bool | ||
638 | xfs_buf_item_pushbuf( | ||
639 | struct xfs_log_item *lip) | ||
640 | { | ||
641 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | ||
642 | struct xfs_buf *bp = bip->bli_buf; | ||
643 | |||
644 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | ||
645 | ASSERT(XFS_BUF_ISDELAYWRITE(bp)); | ||
646 | |||
647 | trace_xfs_buf_item_pushbuf(bip); | ||
648 | |||
649 | xfs_buf_delwri_promote(bp); | ||
650 | xfs_buf_relse(bp); | ||
651 | return true; | ||
652 | } | ||
653 | |||
654 | STATIC void | 605 | STATIC void |
655 | xfs_buf_item_committing( | 606 | xfs_buf_item_committing( |
656 | struct xfs_log_item *lip, | 607 | struct xfs_log_item *lip, |
@@ -666,11 +617,9 @@ static const struct xfs_item_ops xfs_buf_item_ops = { | |||
666 | .iop_format = xfs_buf_item_format, | 617 | .iop_format = xfs_buf_item_format, |
667 | .iop_pin = xfs_buf_item_pin, | 618 | .iop_pin = xfs_buf_item_pin, |
668 | .iop_unpin = xfs_buf_item_unpin, | 619 | .iop_unpin = xfs_buf_item_unpin, |
669 | .iop_trylock = xfs_buf_item_trylock, | ||
670 | .iop_unlock = xfs_buf_item_unlock, | 620 | .iop_unlock = xfs_buf_item_unlock, |
671 | .iop_committed = xfs_buf_item_committed, | 621 | .iop_committed = xfs_buf_item_committed, |
672 | .iop_push = xfs_buf_item_push, | 622 | .iop_push = xfs_buf_item_push, |
673 | .iop_pushbuf = xfs_buf_item_pushbuf, | ||
674 | .iop_committing = xfs_buf_item_committing | 623 | .iop_committing = xfs_buf_item_committing |
675 | }; | 624 | }; |
676 | 625 | ||
@@ -989,20 +938,27 @@ xfs_buf_iodone_callbacks( | |||
989 | * If the write was asynchronous then no one will be looking for the | 938 | * If the write was asynchronous then no one will be looking for the |
990 | * error. Clear the error state and write the buffer out again. | 939 | * error. Clear the error state and write the buffer out again. |
991 | * | 940 | * |
992 | * During sync or umount we'll write all pending buffers again | 941 | * XXX: This helps against transient write errors, but we need to find |
993 | * synchronous, which will catch these errors if they keep hanging | 942 | * a way to shut the filesystem down if the writes keep failing. |
994 | * around. | 943 | * |
944 | * In practice we'll shut the filesystem down soon as non-transient | ||
945 | * erorrs tend to affect the whole device and a failing log write | ||
946 | * will make us give up. But we really ought to do better here. | ||
995 | */ | 947 | */ |
996 | if (XFS_BUF_ISASYNC(bp)) { | 948 | if (XFS_BUF_ISASYNC(bp)) { |
949 | ASSERT(bp->b_iodone != NULL); | ||
950 | |||
951 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
952 | |||
997 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ | 953 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ |
998 | 954 | ||
999 | if (!XFS_BUF_ISSTALE(bp)) { | 955 | if (!XFS_BUF_ISSTALE(bp)) { |
1000 | xfs_buf_delwri_queue(bp); | 956 | bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; |
1001 | XFS_BUF_DONE(bp); | 957 | xfs_bdstrat_cb(bp); |
958 | } else { | ||
959 | xfs_buf_relse(bp); | ||
1002 | } | 960 | } |
1003 | ASSERT(bp->b_iodone != NULL); | 961 | |
1004 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1005 | xfs_buf_relse(bp); | ||
1006 | return; | 962 | return; |
1007 | } | 963 | } |
1008 | 964 | ||
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 53757d83e4f6..65b8aa37622e 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -1005,39 +1005,6 @@ xfs_dqlock2( | |||
1005 | } | 1005 | } |
1006 | } | 1006 | } |
1007 | 1007 | ||
1008 | /* | ||
1009 | * Give the buffer a little push if it is incore and | ||
1010 | * wait on the flush lock. | ||
1011 | */ | ||
1012 | void | ||
1013 | xfs_dqflock_pushbuf_wait( | ||
1014 | xfs_dquot_t *dqp) | ||
1015 | { | ||
1016 | xfs_mount_t *mp = dqp->q_mount; | ||
1017 | xfs_buf_t *bp; | ||
1018 | |||
1019 | /* | ||
1020 | * Check to see if the dquot has been flushed delayed | ||
1021 | * write. If so, grab its buffer and send it | ||
1022 | * out immediately. We'll be able to acquire | ||
1023 | * the flush lock when the I/O completes. | ||
1024 | */ | ||
1025 | bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno, | ||
1026 | mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); | ||
1027 | if (!bp) | ||
1028 | goto out_lock; | ||
1029 | |||
1030 | if (XFS_BUF_ISDELAYWRITE(bp)) { | ||
1031 | if (xfs_buf_ispinned(bp)) | ||
1032 | xfs_log_force(mp, 0); | ||
1033 | xfs_buf_delwri_promote(bp); | ||
1034 | wake_up_process(bp->b_target->bt_task); | ||
1035 | } | ||
1036 | xfs_buf_relse(bp); | ||
1037 | out_lock: | ||
1038 | xfs_dqflock(dqp); | ||
1039 | } | ||
1040 | |||
1041 | int __init | 1008 | int __init |
1042 | xfs_qm_init(void) | 1009 | xfs_qm_init(void) |
1043 | { | 1010 | { |
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 5f2a2f2c0c5b..7d20af27346d 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
@@ -152,7 +152,6 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, | |||
152 | extern void xfs_qm_dqput(xfs_dquot_t *); | 152 | extern void xfs_qm_dqput(xfs_dquot_t *); |
153 | 153 | ||
154 | extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); | 154 | extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); |
155 | extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); | ||
156 | 155 | ||
157 | static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) | 156 | static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) |
158 | { | 157 | { |
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 8d8295814272..9c5d58d24e54 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c | |||
@@ -108,46 +108,6 @@ xfs_qm_dquot_logitem_unpin( | |||
108 | wake_up(&dqp->q_pinwait); | 108 | wake_up(&dqp->q_pinwait); |
109 | } | 109 | } |
110 | 110 | ||
111 | /* | ||
112 | * Given the logitem, this writes the corresponding dquot entry to disk | ||
113 | * asynchronously. This is called with the dquot entry securely locked; | ||
114 | * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot | ||
115 | * at the end. | ||
116 | */ | ||
117 | STATIC void | ||
118 | xfs_qm_dquot_logitem_push( | ||
119 | struct xfs_log_item *lip) | ||
120 | { | ||
121 | struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; | ||
122 | struct xfs_buf *bp = NULL; | ||
123 | int error; | ||
124 | |||
125 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
126 | ASSERT(!completion_done(&dqp->q_flush)); | ||
127 | ASSERT(atomic_read(&dqp->q_pincount) == 0); | ||
128 | |||
129 | /* | ||
130 | * Since we were able to lock the dquot's flush lock and | ||
131 | * we found it on the AIL, the dquot must be dirty. This | ||
132 | * is because the dquot is removed from the AIL while still | ||
133 | * holding the flush lock in xfs_dqflush_done(). Thus, if | ||
134 | * we found it in the AIL and were able to obtain the flush | ||
135 | * lock without sleeping, then there must not have been | ||
136 | * anyone in the process of flushing the dquot. | ||
137 | */ | ||
138 | error = xfs_qm_dqflush(dqp, &bp); | ||
139 | if (error) { | ||
140 | xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", | ||
141 | __func__, error, dqp); | ||
142 | goto out_unlock; | ||
143 | } | ||
144 | |||
145 | xfs_buf_delwri_queue(bp); | ||
146 | xfs_buf_relse(bp); | ||
147 | out_unlock: | ||
148 | xfs_dqunlock(dqp); | ||
149 | } | ||
150 | |||
151 | STATIC xfs_lsn_t | 111 | STATIC xfs_lsn_t |
152 | xfs_qm_dquot_logitem_committed( | 112 | xfs_qm_dquot_logitem_committed( |
153 | struct xfs_log_item *lip, | 113 | struct xfs_log_item *lip, |
@@ -179,67 +139,15 @@ xfs_qm_dqunpin_wait( | |||
179 | wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); | 139 | wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); |
180 | } | 140 | } |
181 | 141 | ||
182 | /* | ||
183 | * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that | ||
184 | * the dquot is locked by us, but the flush lock isn't. So, here we are | ||
185 | * going to see if the relevant dquot buffer is incore, waiting on DELWRI. | ||
186 | * If so, we want to push it out to help us take this item off the AIL as soon | ||
187 | * as possible. | ||
188 | * | ||
189 | * We must not be holding the AIL lock at this point. Calling incore() to | ||
190 | * search the buffer cache can be a time consuming thing, and AIL lock is a | ||
191 | * spinlock. | ||
192 | */ | ||
193 | STATIC bool | ||
194 | xfs_qm_dquot_logitem_pushbuf( | ||
195 | struct xfs_log_item *lip) | ||
196 | { | ||
197 | struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); | ||
198 | struct xfs_dquot *dqp = qlip->qli_dquot; | ||
199 | struct xfs_buf *bp; | ||
200 | bool ret = true; | ||
201 | |||
202 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
203 | |||
204 | /* | ||
205 | * If flushlock isn't locked anymore, chances are that the | ||
206 | * inode flush completed and the inode was taken off the AIL. | ||
207 | * So, just get out. | ||
208 | */ | ||
209 | if (completion_done(&dqp->q_flush) || | ||
210 | !(lip->li_flags & XFS_LI_IN_AIL)) { | ||
211 | xfs_dqunlock(dqp); | ||
212 | return true; | ||
213 | } | ||
214 | |||
215 | bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, | ||
216 | dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); | ||
217 | xfs_dqunlock(dqp); | ||
218 | if (!bp) | ||
219 | return true; | ||
220 | if (XFS_BUF_ISDELAYWRITE(bp)) | ||
221 | xfs_buf_delwri_promote(bp); | ||
222 | if (xfs_buf_ispinned(bp)) | ||
223 | ret = false; | ||
224 | xfs_buf_relse(bp); | ||
225 | return ret; | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * This is called to attempt to lock the dquot associated with this | ||
230 | * dquot log item. Don't sleep on the dquot lock or the flush lock. | ||
231 | * If the flush lock is already held, indicating that the dquot has | ||
232 | * been or is in the process of being flushed, then see if we can | ||
233 | * find the dquot's buffer in the buffer cache without sleeping. If | ||
234 | * we can and it is marked delayed write, then we want to send it out. | ||
235 | * We delay doing so until the push routine, though, to avoid sleeping | ||
236 | * in any device strategy routines. | ||
237 | */ | ||
238 | STATIC uint | 142 | STATIC uint |
239 | xfs_qm_dquot_logitem_trylock( | 143 | xfs_qm_dquot_logitem_push( |
240 | struct xfs_log_item *lip) | 144 | struct xfs_log_item *lip, |
145 | struct list_head *buffer_list) | ||
241 | { | 146 | { |
242 | struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; | 147 | struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; |
148 | struct xfs_buf *bp = NULL; | ||
149 | uint rval = XFS_ITEM_SUCCESS; | ||
150 | int error; | ||
243 | 151 | ||
244 | if (atomic_read(&dqp->q_pincount) > 0) | 152 | if (atomic_read(&dqp->q_pincount) > 0) |
245 | return XFS_ITEM_PINNED; | 153 | return XFS_ITEM_PINNED; |
@@ -252,20 +160,36 @@ xfs_qm_dquot_logitem_trylock( | |||
252 | * taking the quota lock. | 160 | * taking the quota lock. |
253 | */ | 161 | */ |
254 | if (atomic_read(&dqp->q_pincount) > 0) { | 162 | if (atomic_read(&dqp->q_pincount) > 0) { |
255 | xfs_dqunlock(dqp); | 163 | rval = XFS_ITEM_PINNED; |
256 | return XFS_ITEM_PINNED; | 164 | goto out_unlock; |
257 | } | 165 | } |
258 | 166 | ||
167 | /* | ||
168 | * Someone else is already flushing the dquot. Nothing we can do | ||
169 | * here but wait for the flush to finish and remove the item from | ||
170 | * the AIL. | ||
171 | */ | ||
259 | if (!xfs_dqflock_nowait(dqp)) { | 172 | if (!xfs_dqflock_nowait(dqp)) { |
260 | /* | 173 | rval = XFS_ITEM_FLUSHING; |
261 | * dquot has already been flushed to the backing buffer, | 174 | goto out_unlock; |
262 | * leave it locked, pushbuf routine will unlock it. | 175 | } |
263 | */ | 176 | |
264 | return XFS_ITEM_PUSHBUF; | 177 | spin_unlock(&lip->li_ailp->xa_lock); |
178 | |||
179 | error = xfs_qm_dqflush(dqp, &bp); | ||
180 | if (error) { | ||
181 | xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", | ||
182 | __func__, error, dqp); | ||
183 | } else { | ||
184 | if (!xfs_buf_delwri_queue(bp, buffer_list)) | ||
185 | rval = XFS_ITEM_FLUSHING; | ||
186 | xfs_buf_relse(bp); | ||
265 | } | 187 | } |
266 | 188 | ||
267 | ASSERT(lip->li_flags & XFS_LI_IN_AIL); | 189 | spin_lock(&lip->li_ailp->xa_lock); |
268 | return XFS_ITEM_SUCCESS; | 190 | out_unlock: |
191 | xfs_dqunlock(dqp); | ||
192 | return rval; | ||
269 | } | 193 | } |
270 | 194 | ||
271 | /* | 195 | /* |
@@ -316,11 +240,9 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { | |||
316 | .iop_format = xfs_qm_dquot_logitem_format, | 240 | .iop_format = xfs_qm_dquot_logitem_format, |
317 | .iop_pin = xfs_qm_dquot_logitem_pin, | 241 | .iop_pin = xfs_qm_dquot_logitem_pin, |
318 | .iop_unpin = xfs_qm_dquot_logitem_unpin, | 242 | .iop_unpin = xfs_qm_dquot_logitem_unpin, |
319 | .iop_trylock = xfs_qm_dquot_logitem_trylock, | ||
320 | .iop_unlock = xfs_qm_dquot_logitem_unlock, | 243 | .iop_unlock = xfs_qm_dquot_logitem_unlock, |
321 | .iop_committed = xfs_qm_dquot_logitem_committed, | 244 | .iop_committed = xfs_qm_dquot_logitem_committed, |
322 | .iop_push = xfs_qm_dquot_logitem_push, | 245 | .iop_push = xfs_qm_dquot_logitem_push, |
323 | .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf, | ||
324 | .iop_committing = xfs_qm_dquot_logitem_committing | 246 | .iop_committing = xfs_qm_dquot_logitem_committing |
325 | }; | 247 | }; |
326 | 248 | ||
@@ -415,11 +337,13 @@ xfs_qm_qoff_logitem_unpin( | |||
415 | } | 337 | } |
416 | 338 | ||
417 | /* | 339 | /* |
418 | * Quotaoff items have no locking, so just return success. | 340 | * There isn't much you can do to push a quotaoff item. It is simply |
341 | * stuck waiting for the log to be flushed to disk. | ||
419 | */ | 342 | */ |
420 | STATIC uint | 343 | STATIC uint |
421 | xfs_qm_qoff_logitem_trylock( | 344 | xfs_qm_qoff_logitem_push( |
422 | struct xfs_log_item *lip) | 345 | struct xfs_log_item *lip, |
346 | struct list_head *buffer_list) | ||
423 | { | 347 | { |
424 | return XFS_ITEM_LOCKED; | 348 | return XFS_ITEM_LOCKED; |
425 | } | 349 | } |
@@ -446,17 +370,6 @@ xfs_qm_qoff_logitem_committed( | |||
446 | return lsn; | 370 | return lsn; |
447 | } | 371 | } |
448 | 372 | ||
449 | /* | ||
450 | * There isn't much you can do to push on an quotaoff item. It is simply | ||
451 | * stuck waiting for the log to be flushed to disk. | ||
452 | */ | ||
453 | STATIC void | ||
454 | xfs_qm_qoff_logitem_push( | ||
455 | struct xfs_log_item *lip) | ||
456 | { | ||
457 | } | ||
458 | |||
459 | |||
460 | STATIC xfs_lsn_t | 373 | STATIC xfs_lsn_t |
461 | xfs_qm_qoffend_logitem_committed( | 374 | xfs_qm_qoffend_logitem_committed( |
462 | struct xfs_log_item *lip, | 375 | struct xfs_log_item *lip, |
@@ -504,7 +417,6 @@ static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { | |||
504 | .iop_format = xfs_qm_qoff_logitem_format, | 417 | .iop_format = xfs_qm_qoff_logitem_format, |
505 | .iop_pin = xfs_qm_qoff_logitem_pin, | 418 | .iop_pin = xfs_qm_qoff_logitem_pin, |
506 | .iop_unpin = xfs_qm_qoff_logitem_unpin, | 419 | .iop_unpin = xfs_qm_qoff_logitem_unpin, |
507 | .iop_trylock = xfs_qm_qoff_logitem_trylock, | ||
508 | .iop_unlock = xfs_qm_qoff_logitem_unlock, | 420 | .iop_unlock = xfs_qm_qoff_logitem_unlock, |
509 | .iop_committed = xfs_qm_qoffend_logitem_committed, | 421 | .iop_committed = xfs_qm_qoffend_logitem_committed, |
510 | .iop_push = xfs_qm_qoff_logitem_push, | 422 | .iop_push = xfs_qm_qoff_logitem_push, |
@@ -519,7 +431,6 @@ static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { | |||
519 | .iop_format = xfs_qm_qoff_logitem_format, | 431 | .iop_format = xfs_qm_qoff_logitem_format, |
520 | .iop_pin = xfs_qm_qoff_logitem_pin, | 432 | .iop_pin = xfs_qm_qoff_logitem_pin, |
521 | .iop_unpin = xfs_qm_qoff_logitem_unpin, | 433 | .iop_unpin = xfs_qm_qoff_logitem_unpin, |
522 | .iop_trylock = xfs_qm_qoff_logitem_trylock, | ||
523 | .iop_unlock = xfs_qm_qoff_logitem_unlock, | 434 | .iop_unlock = xfs_qm_qoff_logitem_unlock, |
524 | .iop_committed = xfs_qm_qoff_logitem_committed, | 435 | .iop_committed = xfs_qm_qoff_logitem_committed, |
525 | .iop_push = xfs_qm_qoff_logitem_push, | 436 | .iop_push = xfs_qm_qoff_logitem_push, |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 35c2aff38b20..9549ef179e06 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -147,22 +147,20 @@ xfs_efi_item_unpin( | |||
147 | } | 147 | } |
148 | 148 | ||
149 | /* | 149 | /* |
150 | * Efi items have no locking or pushing. However, since EFIs are | 150 | * Efi items have no locking or pushing. However, since EFIs are pulled from |
151 | * pulled from the AIL when their corresponding EFDs are committed | 151 | * the AIL when their corresponding EFDs are committed to disk, their situation |
152 | * to disk, their situation is very similar to being pinned. Return | 152 | * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller |
153 | * XFS_ITEM_PINNED so that the caller will eventually flush the log. | 153 | * will eventually flush the log. This should help in getting the EFI out of |
154 | * This should help in getting the EFI out of the AIL. | 154 | * the AIL. |
155 | */ | 155 | */ |
156 | STATIC uint | 156 | STATIC uint |
157 | xfs_efi_item_trylock( | 157 | xfs_efi_item_push( |
158 | struct xfs_log_item *lip) | 158 | struct xfs_log_item *lip, |
159 | struct list_head *buffer_list) | ||
159 | { | 160 | { |
160 | return XFS_ITEM_PINNED; | 161 | return XFS_ITEM_PINNED; |
161 | } | 162 | } |
162 | 163 | ||
163 | /* | ||
164 | * Efi items have no locking, so just return. | ||
165 | */ | ||
166 | STATIC void | 164 | STATIC void |
167 | xfs_efi_item_unlock( | 165 | xfs_efi_item_unlock( |
168 | struct xfs_log_item *lip) | 166 | struct xfs_log_item *lip) |
@@ -190,17 +188,6 @@ xfs_efi_item_committed( | |||
190 | } | 188 | } |
191 | 189 | ||
192 | /* | 190 | /* |
193 | * There isn't much you can do to push on an efi item. It is simply | ||
194 | * stuck waiting for all of its corresponding efd items to be | ||
195 | * committed to disk. | ||
196 | */ | ||
197 | STATIC void | ||
198 | xfs_efi_item_push( | ||
199 | struct xfs_log_item *lip) | ||
200 | { | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * The EFI dependency tracking op doesn't do squat. It can't because | 191 | * The EFI dependency tracking op doesn't do squat. It can't because |
205 | * it doesn't know where the free extent is coming from. The dependency | 192 | * it doesn't know where the free extent is coming from. The dependency |
206 | * tracking has to be handled by the "enclosing" metadata object. For | 193 | * tracking has to be handled by the "enclosing" metadata object. For |
@@ -222,7 +209,6 @@ static const struct xfs_item_ops xfs_efi_item_ops = { | |||
222 | .iop_format = xfs_efi_item_format, | 209 | .iop_format = xfs_efi_item_format, |
223 | .iop_pin = xfs_efi_item_pin, | 210 | .iop_pin = xfs_efi_item_pin, |
224 | .iop_unpin = xfs_efi_item_unpin, | 211 | .iop_unpin = xfs_efi_item_unpin, |
225 | .iop_trylock = xfs_efi_item_trylock, | ||
226 | .iop_unlock = xfs_efi_item_unlock, | 212 | .iop_unlock = xfs_efi_item_unlock, |
227 | .iop_committed = xfs_efi_item_committed, | 213 | .iop_committed = xfs_efi_item_committed, |
228 | .iop_push = xfs_efi_item_push, | 214 | .iop_push = xfs_efi_item_push, |
@@ -404,19 +390,17 @@ xfs_efd_item_unpin( | |||
404 | } | 390 | } |
405 | 391 | ||
406 | /* | 392 | /* |
407 | * Efd items have no locking, so just return success. | 393 | * There isn't much you can do to push on an efd item. It is simply stuck |
394 | * waiting for the log to be flushed to disk. | ||
408 | */ | 395 | */ |
409 | STATIC uint | 396 | STATIC uint |
410 | xfs_efd_item_trylock( | 397 | xfs_efd_item_push( |
411 | struct xfs_log_item *lip) | 398 | struct xfs_log_item *lip, |
399 | struct list_head *buffer_list) | ||
412 | { | 400 | { |
413 | return XFS_ITEM_LOCKED; | 401 | return XFS_ITEM_PINNED; |
414 | } | 402 | } |
415 | 403 | ||
416 | /* | ||
417 | * Efd items have no locking or pushing, so return failure | ||
418 | * so that the caller doesn't bother with us. | ||
419 | */ | ||
420 | STATIC void | 404 | STATIC void |
421 | xfs_efd_item_unlock( | 405 | xfs_efd_item_unlock( |
422 | struct xfs_log_item *lip) | 406 | struct xfs_log_item *lip) |
@@ -451,16 +435,6 @@ xfs_efd_item_committed( | |||
451 | } | 435 | } |
452 | 436 | ||
453 | /* | 437 | /* |
454 | * There isn't much you can do to push on an efd item. It is simply | ||
455 | * stuck waiting for the log to be flushed to disk. | ||
456 | */ | ||
457 | STATIC void | ||
458 | xfs_efd_item_push( | ||
459 | struct xfs_log_item *lip) | ||
460 | { | ||
461 | } | ||
462 | |||
463 | /* | ||
464 | * The EFD dependency tracking op doesn't do squat. It can't because | 438 | * The EFD dependency tracking op doesn't do squat. It can't because |
465 | * it doesn't know where the free extent is coming from. The dependency | 439 | * it doesn't know where the free extent is coming from. The dependency |
466 | * tracking has to be handled by the "enclosing" metadata object. For | 440 | * tracking has to be handled by the "enclosing" metadata object. For |
@@ -482,7 +456,6 @@ static const struct xfs_item_ops xfs_efd_item_ops = { | |||
482 | .iop_format = xfs_efd_item_format, | 456 | .iop_format = xfs_efd_item_format, |
483 | .iop_pin = xfs_efd_item_pin, | 457 | .iop_pin = xfs_efd_item_pin, |
484 | .iop_unpin = xfs_efd_item_unpin, | 458 | .iop_unpin = xfs_efd_item_unpin, |
485 | .iop_trylock = xfs_efd_item_trylock, | ||
486 | .iop_unlock = xfs_efd_item_unlock, | 459 | .iop_unlock = xfs_efd_item_unlock, |
487 | .iop_committed = xfs_efd_item_committed, | 460 | .iop_committed = xfs_efd_item_committed, |
488 | .iop_push = xfs_efd_item_push, | 461 | .iop_push = xfs_efd_item_push, |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0fa987dea242..acd846d808b2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2347,11 +2347,11 @@ cluster_corrupt_out: | |||
2347 | */ | 2347 | */ |
2348 | rcu_read_unlock(); | 2348 | rcu_read_unlock(); |
2349 | /* | 2349 | /* |
2350 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 2350 | * Clean up the buffer. If it was delwri, just release it -- |
2351 | * brelse can handle it with no problems. If not, shut down the | 2351 | * brelse can handle it with no problems. If not, shut down the |
2352 | * filesystem before releasing the buffer. | 2352 | * filesystem before releasing the buffer. |
2353 | */ | 2353 | */ |
2354 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); | 2354 | bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q); |
2355 | if (bufwasdelwri) | 2355 | if (bufwasdelwri) |
2356 | xfs_buf_relse(bp); | 2356 | xfs_buf_relse(bp); |
2357 | 2357 | ||
@@ -2685,27 +2685,6 @@ corrupt_out: | |||
2685 | return XFS_ERROR(EFSCORRUPTED); | 2685 | return XFS_ERROR(EFSCORRUPTED); |
2686 | } | 2686 | } |
2687 | 2687 | ||
2688 | void | ||
2689 | xfs_promote_inode( | ||
2690 | struct xfs_inode *ip) | ||
2691 | { | ||
2692 | struct xfs_buf *bp; | ||
2693 | |||
2694 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | ||
2695 | |||
2696 | bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno, | ||
2697 | ip->i_imap.im_len, XBF_TRYLOCK); | ||
2698 | if (!bp) | ||
2699 | return; | ||
2700 | |||
2701 | if (XFS_BUF_ISDELAYWRITE(bp)) { | ||
2702 | xfs_buf_delwri_promote(bp); | ||
2703 | wake_up_process(ip->i_mount->m_ddev_targp->bt_task); | ||
2704 | } | ||
2705 | |||
2706 | xfs_buf_relse(bp); | ||
2707 | } | ||
2708 | |||
2709 | /* | 2688 | /* |
2710 | * Return a pointer to the extent record at file index idx. | 2689 | * Return a pointer to the extent record at file index idx. |
2711 | */ | 2690 | */ |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a2fa79ae410f..f0e252f384f9 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -530,7 +530,6 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | |||
530 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 530 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
531 | void xfs_iunpin_wait(xfs_inode_t *); | 531 | void xfs_iunpin_wait(xfs_inode_t *); |
532 | int xfs_iflush(struct xfs_inode *, struct xfs_buf **); | 532 | int xfs_iflush(struct xfs_inode *, struct xfs_buf **); |
533 | void xfs_promote_inode(struct xfs_inode *); | ||
534 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 533 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
535 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 534 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
536 | 535 | ||
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index d3601ab75dd3..8aaebb2f9efa 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -480,25 +480,16 @@ xfs_inode_item_unpin( | |||
480 | wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); | 480 | wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); |
481 | } | 481 | } |
482 | 482 | ||
483 | /* | ||
484 | * This is called to attempt to lock the inode associated with this | ||
485 | * inode log item, in preparation for the push routine which does the actual | ||
486 | * iflush. Don't sleep on the inode lock or the flush lock. | ||
487 | * | ||
488 | * If the flush lock is already held, indicating that the inode has | ||
489 | * been or is in the process of being flushed, then (ideally) we'd like to | ||
490 | * see if the inode's buffer is still incore, and if so give it a nudge. | ||
491 | * We delay doing so until the pushbuf routine, though, to avoid holding | ||
492 | * the AIL lock across a call to the blackhole which is the buffer cache. | ||
493 | * Also we don't want to sleep in any device strategy routines, which can happen | ||
494 | * if we do the subsequent bawrite in here. | ||
495 | */ | ||
496 | STATIC uint | 483 | STATIC uint |
497 | xfs_inode_item_trylock( | 484 | xfs_inode_item_push( |
498 | struct xfs_log_item *lip) | 485 | struct xfs_log_item *lip, |
486 | struct list_head *buffer_list) | ||
499 | { | 487 | { |
500 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 488 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
501 | struct xfs_inode *ip = iip->ili_inode; | 489 | struct xfs_inode *ip = iip->ili_inode; |
490 | struct xfs_buf *bp = NULL; | ||
491 | uint rval = XFS_ITEM_SUCCESS; | ||
492 | int error; | ||
502 | 493 | ||
503 | if (xfs_ipincount(ip) > 0) | 494 | if (xfs_ipincount(ip) > 0) |
504 | return XFS_ITEM_PINNED; | 495 | return XFS_ITEM_PINNED; |
@@ -511,34 +502,45 @@ xfs_inode_item_trylock( | |||
511 | * taking the ilock. | 502 | * taking the ilock. |
512 | */ | 503 | */ |
513 | if (xfs_ipincount(ip) > 0) { | 504 | if (xfs_ipincount(ip) > 0) { |
514 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 505 | rval = XFS_ITEM_PINNED; |
515 | return XFS_ITEM_PINNED; | 506 | goto out_unlock; |
516 | } | 507 | } |
517 | 508 | ||
509 | /* | ||
510 | * Someone else is already flushing the inode. Nothing we can do | ||
511 | * here but wait for the flush to finish and remove the item from | ||
512 | * the AIL. | ||
513 | */ | ||
518 | if (!xfs_iflock_nowait(ip)) { | 514 | if (!xfs_iflock_nowait(ip)) { |
519 | /* | 515 | rval = XFS_ITEM_FLUSHING; |
520 | * inode has already been flushed to the backing buffer, | 516 | goto out_unlock; |
521 | * leave it locked in shared mode, pushbuf routine will | ||
522 | * unlock it. | ||
523 | */ | ||
524 | return XFS_ITEM_PUSHBUF; | ||
525 | } | 517 | } |
526 | 518 | ||
527 | /* Stale items should force out the iclog */ | 519 | /* |
520 | * Stale inode items should force out the iclog. | ||
521 | */ | ||
528 | if (ip->i_flags & XFS_ISTALE) { | 522 | if (ip->i_flags & XFS_ISTALE) { |
529 | xfs_ifunlock(ip); | 523 | xfs_ifunlock(ip); |
530 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 524 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
531 | return XFS_ITEM_PINNED; | 525 | return XFS_ITEM_PINNED; |
532 | } | 526 | } |
533 | 527 | ||
534 | #ifdef DEBUG | 528 | ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); |
535 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 529 | ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); |
536 | ASSERT(iip->ili_fields != 0); | 530 | |
537 | ASSERT(iip->ili_logged == 0); | 531 | spin_unlock(&lip->li_ailp->xa_lock); |
538 | ASSERT(lip->li_flags & XFS_LI_IN_AIL); | 532 | |
533 | error = xfs_iflush(ip, &bp); | ||
534 | if (!error) { | ||
535 | if (!xfs_buf_delwri_queue(bp, buffer_list)) | ||
536 | rval = XFS_ITEM_FLUSHING; | ||
537 | xfs_buf_relse(bp); | ||
539 | } | 538 | } |
540 | #endif | 539 | |
541 | return XFS_ITEM_SUCCESS; | 540 | spin_lock(&lip->li_ailp->xa_lock); |
541 | out_unlock: | ||
542 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
543 | return rval; | ||
542 | } | 544 | } |
543 | 545 | ||
544 | /* | 546 | /* |
@@ -623,92 +625,6 @@ xfs_inode_item_committed( | |||
623 | } | 625 | } |
624 | 626 | ||
625 | /* | 627 | /* |
626 | * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK | ||
627 | * failed to get the inode flush lock but did get the inode locked SHARED. | ||
628 | * Here we're trying to see if the inode buffer is incore, and if so whether it's | ||
629 | * marked delayed write. If that's the case, we'll promote it and that will | ||
630 | * allow the caller to write the buffer by triggering the xfsbufd to run. | ||
631 | */ | ||
632 | STATIC bool | ||
633 | xfs_inode_item_pushbuf( | ||
634 | struct xfs_log_item *lip) | ||
635 | { | ||
636 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
637 | struct xfs_inode *ip = iip->ili_inode; | ||
638 | struct xfs_buf *bp; | ||
639 | bool ret = true; | ||
640 | |||
641 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | ||
642 | |||
643 | /* | ||
644 | * If a flush is not in progress anymore, chances are that the | ||
645 | * inode was taken off the AIL. So, just get out. | ||
646 | */ | ||
647 | if (!xfs_isiflocked(ip) || | ||
648 | !(lip->li_flags & XFS_LI_IN_AIL)) { | ||
649 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
650 | return true; | ||
651 | } | ||
652 | |||
653 | bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, | ||
654 | iip->ili_format.ilf_len, XBF_TRYLOCK); | ||
655 | |||
656 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
657 | if (!bp) | ||
658 | return true; | ||
659 | if (XFS_BUF_ISDELAYWRITE(bp)) | ||
660 | xfs_buf_delwri_promote(bp); | ||
661 | if (xfs_buf_ispinned(bp)) | ||
662 | ret = false; | ||
663 | xfs_buf_relse(bp); | ||
664 | return ret; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * This is called to asynchronously write the inode associated with this | ||
669 | * inode log item out to disk. The inode will already have been locked by | ||
670 | * a successful call to xfs_inode_item_trylock(). | ||
671 | */ | ||
672 | STATIC void | ||
673 | xfs_inode_item_push( | ||
674 | struct xfs_log_item *lip) | ||
675 | { | ||
676 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
677 | struct xfs_inode *ip = iip->ili_inode; | ||
678 | struct xfs_buf *bp = NULL; | ||
679 | int error; | ||
680 | |||
681 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | ||
682 | ASSERT(xfs_isiflocked(ip)); | ||
683 | |||
684 | /* | ||
685 | * Since we were able to lock the inode's flush lock and | ||
686 | * we found it on the AIL, the inode must be dirty. This | ||
687 | * is because the inode is removed from the AIL while still | ||
688 | * holding the flush lock in xfs_iflush_done(). Thus, if | ||
689 | * we found it in the AIL and were able to obtain the flush | ||
690 | * lock without sleeping, then there must not have been | ||
691 | * anyone in the process of flushing the inode. | ||
692 | */ | ||
693 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0); | ||
694 | |||
695 | /* | ||
696 | * Push the inode to it's backing buffer. This will not remove the | ||
697 | * inode from the AIL - a further push will be required to trigger a | ||
698 | * buffer push. However, this allows all the dirty inodes to be pushed | ||
699 | * to the buffer before it is pushed to disk. The buffer IO completion | ||
700 | * will pull the inode from the AIL, mark it clean and unlock the flush | ||
701 | * lock. | ||
702 | */ | ||
703 | error = xfs_iflush(ip, &bp); | ||
704 | if (!error) { | ||
705 | xfs_buf_delwri_queue(bp); | ||
706 | xfs_buf_relse(bp); | ||
707 | } | ||
708 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
709 | } | ||
710 | |||
711 | /* | ||
712 | * XXX rcc - this one really has to do something. Probably needs | 628 | * XXX rcc - this one really has to do something. Probably needs |
713 | * to stamp in a new field in the incore inode. | 629 | * to stamp in a new field in the incore inode. |
714 | */ | 630 | */ |
@@ -728,11 +644,9 @@ static const struct xfs_item_ops xfs_inode_item_ops = { | |||
728 | .iop_format = xfs_inode_item_format, | 644 | .iop_format = xfs_inode_item_format, |
729 | .iop_pin = xfs_inode_item_pin, | 645 | .iop_pin = xfs_inode_item_pin, |
730 | .iop_unpin = xfs_inode_item_unpin, | 646 | .iop_unpin = xfs_inode_item_unpin, |
731 | .iop_trylock = xfs_inode_item_trylock, | ||
732 | .iop_unlock = xfs_inode_item_unlock, | 647 | .iop_unlock = xfs_inode_item_unlock, |
733 | .iop_committed = xfs_inode_item_committed, | 648 | .iop_committed = xfs_inode_item_committed, |
734 | .iop_push = xfs_inode_item_push, | 649 | .iop_push = xfs_inode_item_push, |
735 | .iop_pushbuf = xfs_inode_item_pushbuf, | ||
736 | .iop_committing = xfs_inode_item_committing | 650 | .iop_committing = xfs_inode_item_committing |
737 | }; | 651 | }; |
738 | 652 | ||
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 8ecad5bad66c..5e864a9c0ccf 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -2103,6 +2103,7 @@ xlog_recover_do_dquot_buffer( | |||
2103 | STATIC int | 2103 | STATIC int |
2104 | xlog_recover_buffer_pass2( | 2104 | xlog_recover_buffer_pass2( |
2105 | xlog_t *log, | 2105 | xlog_t *log, |
2106 | struct list_head *buffer_list, | ||
2106 | xlog_recover_item_t *item) | 2107 | xlog_recover_item_t *item) |
2107 | { | 2108 | { |
2108 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 2109 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
@@ -2173,7 +2174,7 @@ xlog_recover_buffer_pass2( | |||
2173 | } else { | 2174 | } else { |
2174 | ASSERT(bp->b_target->bt_mount == mp); | 2175 | ASSERT(bp->b_target->bt_mount == mp); |
2175 | bp->b_iodone = xlog_recover_iodone; | 2176 | bp->b_iodone = xlog_recover_iodone; |
2176 | xfs_buf_delwri_queue(bp); | 2177 | xfs_buf_delwri_queue(bp, buffer_list); |
2177 | } | 2178 | } |
2178 | 2179 | ||
2179 | xfs_buf_relse(bp); | 2180 | xfs_buf_relse(bp); |
@@ -2183,6 +2184,7 @@ xlog_recover_buffer_pass2( | |||
2183 | STATIC int | 2184 | STATIC int |
2184 | xlog_recover_inode_pass2( | 2185 | xlog_recover_inode_pass2( |
2185 | xlog_t *log, | 2186 | xlog_t *log, |
2187 | struct list_head *buffer_list, | ||
2186 | xlog_recover_item_t *item) | 2188 | xlog_recover_item_t *item) |
2187 | { | 2189 | { |
2188 | xfs_inode_log_format_t *in_f; | 2190 | xfs_inode_log_format_t *in_f; |
@@ -2436,7 +2438,7 @@ xlog_recover_inode_pass2( | |||
2436 | write_inode_buffer: | 2438 | write_inode_buffer: |
2437 | ASSERT(bp->b_target->bt_mount == mp); | 2439 | ASSERT(bp->b_target->bt_mount == mp); |
2438 | bp->b_iodone = xlog_recover_iodone; | 2440 | bp->b_iodone = xlog_recover_iodone; |
2439 | xfs_buf_delwri_queue(bp); | 2441 | xfs_buf_delwri_queue(bp, buffer_list); |
2440 | xfs_buf_relse(bp); | 2442 | xfs_buf_relse(bp); |
2441 | error: | 2443 | error: |
2442 | if (need_free) | 2444 | if (need_free) |
@@ -2477,6 +2479,7 @@ xlog_recover_quotaoff_pass1( | |||
2477 | STATIC int | 2479 | STATIC int |
2478 | xlog_recover_dquot_pass2( | 2480 | xlog_recover_dquot_pass2( |
2479 | xlog_t *log, | 2481 | xlog_t *log, |
2482 | struct list_head *buffer_list, | ||
2480 | xlog_recover_item_t *item) | 2483 | xlog_recover_item_t *item) |
2481 | { | 2484 | { |
2482 | xfs_mount_t *mp = log->l_mp; | 2485 | xfs_mount_t *mp = log->l_mp; |
@@ -2558,7 +2561,7 @@ xlog_recover_dquot_pass2( | |||
2558 | ASSERT(dq_f->qlf_size == 2); | 2561 | ASSERT(dq_f->qlf_size == 2); |
2559 | ASSERT(bp->b_target->bt_mount == mp); | 2562 | ASSERT(bp->b_target->bt_mount == mp); |
2560 | bp->b_iodone = xlog_recover_iodone; | 2563 | bp->b_iodone = xlog_recover_iodone; |
2561 | xfs_buf_delwri_queue(bp); | 2564 | xfs_buf_delwri_queue(bp, buffer_list); |
2562 | xfs_buf_relse(bp); | 2565 | xfs_buf_relse(bp); |
2563 | 2566 | ||
2564 | return (0); | 2567 | return (0); |
@@ -2712,21 +2715,22 @@ STATIC int | |||
2712 | xlog_recover_commit_pass2( | 2715 | xlog_recover_commit_pass2( |
2713 | struct log *log, | 2716 | struct log *log, |
2714 | struct xlog_recover *trans, | 2717 | struct xlog_recover *trans, |
2718 | struct list_head *buffer_list, | ||
2715 | xlog_recover_item_t *item) | 2719 | xlog_recover_item_t *item) |
2716 | { | 2720 | { |
2717 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); | 2721 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); |
2718 | 2722 | ||
2719 | switch (ITEM_TYPE(item)) { | 2723 | switch (ITEM_TYPE(item)) { |
2720 | case XFS_LI_BUF: | 2724 | case XFS_LI_BUF: |
2721 | return xlog_recover_buffer_pass2(log, item); | 2725 | return xlog_recover_buffer_pass2(log, buffer_list, item); |
2722 | case XFS_LI_INODE: | 2726 | case XFS_LI_INODE: |
2723 | return xlog_recover_inode_pass2(log, item); | 2727 | return xlog_recover_inode_pass2(log, buffer_list, item); |
2724 | case XFS_LI_EFI: | 2728 | case XFS_LI_EFI: |
2725 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); | 2729 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); |
2726 | case XFS_LI_EFD: | 2730 | case XFS_LI_EFD: |
2727 | return xlog_recover_efd_pass2(log, item); | 2731 | return xlog_recover_efd_pass2(log, item); |
2728 | case XFS_LI_DQUOT: | 2732 | case XFS_LI_DQUOT: |
2729 | return xlog_recover_dquot_pass2(log, item); | 2733 | return xlog_recover_dquot_pass2(log, buffer_list, item); |
2730 | case XFS_LI_QUOTAOFF: | 2734 | case XFS_LI_QUOTAOFF: |
2731 | /* nothing to do in pass2 */ | 2735 | /* nothing to do in pass2 */ |
2732 | return 0; | 2736 | return 0; |
@@ -2750,8 +2754,9 @@ xlog_recover_commit_trans( | |||
2750 | struct xlog_recover *trans, | 2754 | struct xlog_recover *trans, |
2751 | int pass) | 2755 | int pass) |
2752 | { | 2756 | { |
2753 | int error = 0; | 2757 | int error = 0, error2; |
2754 | xlog_recover_item_t *item; | 2758 | xlog_recover_item_t *item; |
2759 | LIST_HEAD (buffer_list); | ||
2755 | 2760 | ||
2756 | hlist_del(&trans->r_list); | 2761 | hlist_del(&trans->r_list); |
2757 | 2762 | ||
@@ -2760,16 +2765,27 @@ xlog_recover_commit_trans( | |||
2760 | return error; | 2765 | return error; |
2761 | 2766 | ||
2762 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | 2767 | list_for_each_entry(item, &trans->r_itemq, ri_list) { |
2763 | if (pass == XLOG_RECOVER_PASS1) | 2768 | switch (pass) { |
2769 | case XLOG_RECOVER_PASS1: | ||
2764 | error = xlog_recover_commit_pass1(log, trans, item); | 2770 | error = xlog_recover_commit_pass1(log, trans, item); |
2765 | else | 2771 | break; |
2766 | error = xlog_recover_commit_pass2(log, trans, item); | 2772 | case XLOG_RECOVER_PASS2: |
2773 | error = xlog_recover_commit_pass2(log, trans, | ||
2774 | &buffer_list, item); | ||
2775 | break; | ||
2776 | default: | ||
2777 | ASSERT(0); | ||
2778 | } | ||
2779 | |||
2767 | if (error) | 2780 | if (error) |
2768 | return error; | 2781 | goto out; |
2769 | } | 2782 | } |
2770 | 2783 | ||
2771 | xlog_recover_free_trans(trans); | 2784 | xlog_recover_free_trans(trans); |
2772 | return 0; | 2785 | |
2786 | out: | ||
2787 | error2 = xfs_buf_delwri_submit(&buffer_list); | ||
2788 | return error ? error : error2; | ||
2773 | } | 2789 | } |
2774 | 2790 | ||
2775 | STATIC int | 2791 | STATIC int |
@@ -3639,11 +3655,8 @@ xlog_do_recover( | |||
3639 | * First replay the images in the log. | 3655 | * First replay the images in the log. |
3640 | */ | 3656 | */ |
3641 | error = xlog_do_log_recovery(log, head_blk, tail_blk); | 3657 | error = xlog_do_log_recovery(log, head_blk, tail_blk); |
3642 | if (error) { | 3658 | if (error) |
3643 | return error; | 3659 | return error; |
3644 | } | ||
3645 | |||
3646 | xfs_flush_buftarg(log->l_mp->m_ddev_targp, 1); | ||
3647 | 3660 | ||
3648 | /* | 3661 | /* |
3649 | * If IO errors happened during recovery, bail out. | 3662 | * If IO errors happened during recovery, bail out. |
@@ -3670,7 +3683,6 @@ xlog_do_recover( | |||
3670 | bp = xfs_getsb(log->l_mp, 0); | 3683 | bp = xfs_getsb(log->l_mp, 0); |
3671 | XFS_BUF_UNDONE(bp); | 3684 | XFS_BUF_UNDONE(bp); |
3672 | ASSERT(!(XFS_BUF_ISWRITE(bp))); | 3685 | ASSERT(!(XFS_BUF_ISWRITE(bp))); |
3673 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | ||
3674 | XFS_BUF_READ(bp); | 3686 | XFS_BUF_READ(bp); |
3675 | XFS_BUF_UNASYNC(bp); | 3687 | XFS_BUF_UNASYNC(bp); |
3676 | xfsbdstrat(log->l_mp, bp); | 3688 | xfsbdstrat(log->l_mp, bp); |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 95aecf52475d..755a9bd749d0 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -65,7 +65,8 @@ STATIC int | |||
65 | xfs_qm_dquot_walk( | 65 | xfs_qm_dquot_walk( |
66 | struct xfs_mount *mp, | 66 | struct xfs_mount *mp, |
67 | int type, | 67 | int type, |
68 | int (*execute)(struct xfs_dquot *dqp)) | 68 | int (*execute)(struct xfs_dquot *dqp, void *data), |
69 | void *data) | ||
69 | { | 70 | { |
70 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 71 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
71 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); | 72 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); |
@@ -97,7 +98,7 @@ restart: | |||
97 | 98 | ||
98 | next_index = be32_to_cpu(dqp->q_core.d_id) + 1; | 99 | next_index = be32_to_cpu(dqp->q_core.d_id) + 1; |
99 | 100 | ||
100 | error = execute(batch[i]); | 101 | error = execute(batch[i], data); |
101 | if (error == EAGAIN) { | 102 | if (error == EAGAIN) { |
102 | skipped++; | 103 | skipped++; |
103 | continue; | 104 | continue; |
@@ -129,7 +130,8 @@ restart: | |||
129 | */ | 130 | */ |
130 | STATIC int | 131 | STATIC int |
131 | xfs_qm_dqpurge( | 132 | xfs_qm_dqpurge( |
132 | struct xfs_dquot *dqp) | 133 | struct xfs_dquot *dqp, |
134 | void *data) | ||
133 | { | 135 | { |
134 | struct xfs_mount *mp = dqp->q_mount; | 136 | struct xfs_mount *mp = dqp->q_mount; |
135 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 137 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
@@ -153,21 +155,7 @@ xfs_qm_dqpurge( | |||
153 | 155 | ||
154 | dqp->dq_flags |= XFS_DQ_FREEING; | 156 | dqp->dq_flags |= XFS_DQ_FREEING; |
155 | 157 | ||
156 | /* | 158 | xfs_dqflock(dqp); |
157 | * If we're turning off quotas, we have to make sure that, for | ||
158 | * example, we don't delete quota disk blocks while dquots are | ||
159 | * in the process of getting written to those disk blocks. | ||
160 | * This dquot might well be on AIL, and we can't leave it there | ||
161 | * if we're turning off quotas. Basically, we need this flush | ||
162 | * lock, and are willing to block on it. | ||
163 | */ | ||
164 | if (!xfs_dqflock_nowait(dqp)) { | ||
165 | /* | ||
166 | * Block on the flush lock after nudging dquot buffer, | ||
167 | * if it is incore. | ||
168 | */ | ||
169 | xfs_dqflock_pushbuf_wait(dqp); | ||
170 | } | ||
171 | 159 | ||
172 | /* | 160 | /* |
173 | * If we are turning this type of quotas off, we don't care | 161 | * If we are turning this type of quotas off, we don't care |
@@ -231,11 +219,11 @@ xfs_qm_dqpurge_all( | |||
231 | uint flags) | 219 | uint flags) |
232 | { | 220 | { |
233 | if (flags & XFS_QMOPT_UQUOTA) | 221 | if (flags & XFS_QMOPT_UQUOTA) |
234 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge); | 222 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); |
235 | if (flags & XFS_QMOPT_GQUOTA) | 223 | if (flags & XFS_QMOPT_GQUOTA) |
236 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge); | 224 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); |
237 | if (flags & XFS_QMOPT_PQUOTA) | 225 | if (flags & XFS_QMOPT_PQUOTA) |
238 | xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge); | 226 | xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL); |
239 | } | 227 | } |
240 | 228 | ||
241 | /* | 229 | /* |
@@ -876,15 +864,16 @@ xfs_qm_reset_dqcounts( | |||
876 | 864 | ||
877 | STATIC int | 865 | STATIC int |
878 | xfs_qm_dqiter_bufs( | 866 | xfs_qm_dqiter_bufs( |
879 | xfs_mount_t *mp, | 867 | struct xfs_mount *mp, |
880 | xfs_dqid_t firstid, | 868 | xfs_dqid_t firstid, |
881 | xfs_fsblock_t bno, | 869 | xfs_fsblock_t bno, |
882 | xfs_filblks_t blkcnt, | 870 | xfs_filblks_t blkcnt, |
883 | uint flags) | 871 | uint flags, |
872 | struct list_head *buffer_list) | ||
884 | { | 873 | { |
885 | xfs_buf_t *bp; | 874 | struct xfs_buf *bp; |
886 | int error; | 875 | int error; |
887 | int type; | 876 | int type; |
888 | 877 | ||
889 | ASSERT(blkcnt > 0); | 878 | ASSERT(blkcnt > 0); |
890 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : | 879 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : |
@@ -908,7 +897,7 @@ xfs_qm_dqiter_bufs( | |||
908 | break; | 897 | break; |
909 | 898 | ||
910 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); | 899 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); |
911 | xfs_buf_delwri_queue(bp); | 900 | xfs_buf_delwri_queue(bp, buffer_list); |
912 | xfs_buf_relse(bp); | 901 | xfs_buf_relse(bp); |
913 | /* | 902 | /* |
914 | * goto the next block. | 903 | * goto the next block. |
@@ -916,6 +905,7 @@ xfs_qm_dqiter_bufs( | |||
916 | bno++; | 905 | bno++; |
917 | firstid += mp->m_quotainfo->qi_dqperchunk; | 906 | firstid += mp->m_quotainfo->qi_dqperchunk; |
918 | } | 907 | } |
908 | |||
919 | return error; | 909 | return error; |
920 | } | 910 | } |
921 | 911 | ||
@@ -925,11 +915,12 @@ xfs_qm_dqiter_bufs( | |||
925 | */ | 915 | */ |
926 | STATIC int | 916 | STATIC int |
927 | xfs_qm_dqiterate( | 917 | xfs_qm_dqiterate( |
928 | xfs_mount_t *mp, | 918 | struct xfs_mount *mp, |
929 | xfs_inode_t *qip, | 919 | struct xfs_inode *qip, |
930 | uint flags) | 920 | uint flags, |
921 | struct list_head *buffer_list) | ||
931 | { | 922 | { |
932 | xfs_bmbt_irec_t *map; | 923 | struct xfs_bmbt_irec *map; |
933 | int i, nmaps; /* number of map entries */ | 924 | int i, nmaps; /* number of map entries */ |
934 | int error; /* return value */ | 925 | int error; /* return value */ |
935 | xfs_fileoff_t lblkno; | 926 | xfs_fileoff_t lblkno; |
@@ -996,21 +987,17 @@ xfs_qm_dqiterate( | |||
996 | * Iterate thru all the blks in the extent and | 987 | * Iterate thru all the blks in the extent and |
997 | * reset the counters of all the dquots inside them. | 988 | * reset the counters of all the dquots inside them. |
998 | */ | 989 | */ |
999 | if ((error = xfs_qm_dqiter_bufs(mp, | 990 | error = xfs_qm_dqiter_bufs(mp, firstid, |
1000 | firstid, | 991 | map[i].br_startblock, |
1001 | map[i].br_startblock, | 992 | map[i].br_blockcount, |
1002 | map[i].br_blockcount, | 993 | flags, buffer_list); |
1003 | flags))) { | 994 | if (error) |
1004 | break; | 995 | goto out; |
1005 | } | ||
1006 | } | 996 | } |
1007 | |||
1008 | if (error) | ||
1009 | break; | ||
1010 | } while (nmaps > 0); | 997 | } while (nmaps > 0); |
1011 | 998 | ||
999 | out: | ||
1012 | kmem_free(map); | 1000 | kmem_free(map); |
1013 | |||
1014 | return error; | 1001 | return error; |
1015 | } | 1002 | } |
1016 | 1003 | ||
@@ -1203,8 +1190,10 @@ error0: | |||
1203 | 1190 | ||
1204 | STATIC int | 1191 | STATIC int |
1205 | xfs_qm_flush_one( | 1192 | xfs_qm_flush_one( |
1206 | struct xfs_dquot *dqp) | 1193 | struct xfs_dquot *dqp, |
1194 | void *data) | ||
1207 | { | 1195 | { |
1196 | struct list_head *buffer_list = data; | ||
1208 | struct xfs_buf *bp = NULL; | 1197 | struct xfs_buf *bp = NULL; |
1209 | int error = 0; | 1198 | int error = 0; |
1210 | 1199 | ||
@@ -1214,14 +1203,12 @@ xfs_qm_flush_one( | |||
1214 | if (!XFS_DQ_IS_DIRTY(dqp)) | 1203 | if (!XFS_DQ_IS_DIRTY(dqp)) |
1215 | goto out_unlock; | 1204 | goto out_unlock; |
1216 | 1205 | ||
1217 | if (!xfs_dqflock_nowait(dqp)) | 1206 | xfs_dqflock(dqp); |
1218 | xfs_dqflock_pushbuf_wait(dqp); | ||
1219 | |||
1220 | error = xfs_qm_dqflush(dqp, &bp); | 1207 | error = xfs_qm_dqflush(dqp, &bp); |
1221 | if (error) | 1208 | if (error) |
1222 | goto out_unlock; | 1209 | goto out_unlock; |
1223 | 1210 | ||
1224 | xfs_buf_delwri_queue(bp); | 1211 | xfs_buf_delwri_queue(bp, buffer_list); |
1225 | xfs_buf_relse(bp); | 1212 | xfs_buf_relse(bp); |
1226 | out_unlock: | 1213 | out_unlock: |
1227 | xfs_dqunlock(dqp); | 1214 | xfs_dqunlock(dqp); |
@@ -1241,6 +1228,7 @@ xfs_qm_quotacheck( | |||
1241 | size_t structsz; | 1228 | size_t structsz; |
1242 | xfs_inode_t *uip, *gip; | 1229 | xfs_inode_t *uip, *gip; |
1243 | uint flags; | 1230 | uint flags; |
1231 | LIST_HEAD (buffer_list); | ||
1244 | 1232 | ||
1245 | count = INT_MAX; | 1233 | count = INT_MAX; |
1246 | structsz = 1; | 1234 | structsz = 1; |
@@ -1259,7 +1247,8 @@ xfs_qm_quotacheck( | |||
1259 | */ | 1247 | */ |
1260 | uip = mp->m_quotainfo->qi_uquotaip; | 1248 | uip = mp->m_quotainfo->qi_uquotaip; |
1261 | if (uip) { | 1249 | if (uip) { |
1262 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA); | 1250 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, |
1251 | &buffer_list); | ||
1263 | if (error) | 1252 | if (error) |
1264 | goto error_return; | 1253 | goto error_return; |
1265 | flags |= XFS_UQUOTA_CHKD; | 1254 | flags |= XFS_UQUOTA_CHKD; |
@@ -1268,7 +1257,8 @@ xfs_qm_quotacheck( | |||
1268 | gip = mp->m_quotainfo->qi_gquotaip; | 1257 | gip = mp->m_quotainfo->qi_gquotaip; |
1269 | if (gip) { | 1258 | if (gip) { |
1270 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? | 1259 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? |
1271 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); | 1260 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, |
1261 | &buffer_list); | ||
1272 | if (error) | 1262 | if (error) |
1273 | goto error_return; | 1263 | goto error_return; |
1274 | flags |= XFS_OQUOTA_CHKD; | 1264 | flags |= XFS_OQUOTA_CHKD; |
@@ -1291,19 +1281,27 @@ xfs_qm_quotacheck( | |||
1291 | * We've made all the changes that we need to make incore. Flush them | 1281 | * We've made all the changes that we need to make incore. Flush them |
1292 | * down to disk buffers if everything was updated successfully. | 1282 | * down to disk buffers if everything was updated successfully. |
1293 | */ | 1283 | */ |
1294 | if (XFS_IS_UQUOTA_ON(mp)) | 1284 | if (XFS_IS_UQUOTA_ON(mp)) { |
1295 | error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one); | 1285 | error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one, |
1286 | &buffer_list); | ||
1287 | } | ||
1296 | if (XFS_IS_GQUOTA_ON(mp)) { | 1288 | if (XFS_IS_GQUOTA_ON(mp)) { |
1297 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one); | 1289 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one, |
1290 | &buffer_list); | ||
1298 | if (!error) | 1291 | if (!error) |
1299 | error = error2; | 1292 | error = error2; |
1300 | } | 1293 | } |
1301 | if (XFS_IS_PQUOTA_ON(mp)) { | 1294 | if (XFS_IS_PQUOTA_ON(mp)) { |
1302 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one); | 1295 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one, |
1296 | &buffer_list); | ||
1303 | if (!error) | 1297 | if (!error) |
1304 | error = error2; | 1298 | error = error2; |
1305 | } | 1299 | } |
1306 | 1300 | ||
1301 | error2 = xfs_buf_delwri_submit(&buffer_list); | ||
1302 | if (!error) | ||
1303 | error = error2; | ||
1304 | |||
1307 | /* | 1305 | /* |
1308 | * We can get this error if we couldn't do a dquot allocation inside | 1306 | * We can get this error if we couldn't do a dquot allocation inside |
1309 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the | 1307 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the |
@@ -1317,15 +1315,6 @@ xfs_qm_quotacheck( | |||
1317 | } | 1315 | } |
1318 | 1316 | ||
1319 | /* | 1317 | /* |
1320 | * We didn't log anything, because if we crashed, we'll have to | ||
1321 | * start the quotacheck from scratch anyway. However, we must make | ||
1322 | * sure that our dquot changes are secure before we put the | ||
1323 | * quotacheck'd stamp on the superblock. So, here we do a synchronous | ||
1324 | * flush. | ||
1325 | */ | ||
1326 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
1327 | |||
1328 | /* | ||
1329 | * If one type of quotas is off, then it will lose its | 1318 | * If one type of quotas is off, then it will lose its |
1330 | * quotachecked status, since we won't be doing accounting for | 1319 | * quotachecked status, since we won't be doing accounting for |
1331 | * that type anymore. | 1320 | * that type anymore. |
@@ -1334,6 +1323,13 @@ xfs_qm_quotacheck( | |||
1334 | mp->m_qflags |= flags; | 1323 | mp->m_qflags |= flags; |
1335 | 1324 | ||
1336 | error_return: | 1325 | error_return: |
1326 | while (!list_empty(&buffer_list)) { | ||
1327 | struct xfs_buf *bp = | ||
1328 | list_first_entry(&buffer_list, struct xfs_buf, b_list); | ||
1329 | list_del_init(&bp->b_list); | ||
1330 | xfs_buf_relse(bp); | ||
1331 | } | ||
1332 | |||
1337 | if (error) { | 1333 | if (error) { |
1338 | xfs_warn(mp, | 1334 | xfs_warn(mp, |
1339 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", | 1335 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", |
@@ -1450,6 +1446,7 @@ xfs_qm_dqfree_one( | |||
1450 | STATIC void | 1446 | STATIC void |
1451 | xfs_qm_dqreclaim_one( | 1447 | xfs_qm_dqreclaim_one( |
1452 | struct xfs_dquot *dqp, | 1448 | struct xfs_dquot *dqp, |
1449 | struct list_head *buffer_list, | ||
1453 | struct list_head *dispose_list) | 1450 | struct list_head *dispose_list) |
1454 | { | 1451 | { |
1455 | struct xfs_mount *mp = dqp->q_mount; | 1452 | struct xfs_mount *mp = dqp->q_mount; |
@@ -1482,21 +1479,11 @@ xfs_qm_dqreclaim_one( | |||
1482 | if (!xfs_dqflock_nowait(dqp)) | 1479 | if (!xfs_dqflock_nowait(dqp)) |
1483 | goto out_busy; | 1480 | goto out_busy; |
1484 | 1481 | ||
1485 | /* | ||
1486 | * We have the flush lock so we know that this is not in the | ||
1487 | * process of being flushed. So, if this is dirty, flush it | ||
1488 | * DELWRI so that we don't get a freelist infested with | ||
1489 | * dirty dquots. | ||
1490 | */ | ||
1491 | if (XFS_DQ_IS_DIRTY(dqp)) { | 1482 | if (XFS_DQ_IS_DIRTY(dqp)) { |
1492 | struct xfs_buf *bp = NULL; | 1483 | struct xfs_buf *bp = NULL; |
1493 | 1484 | ||
1494 | trace_xfs_dqreclaim_dirty(dqp); | 1485 | trace_xfs_dqreclaim_dirty(dqp); |
1495 | 1486 | ||
1496 | /* | ||
1497 | * We flush it delayed write, so don't bother releasing the | ||
1498 | * freelist lock. | ||
1499 | */ | ||
1500 | error = xfs_qm_dqflush(dqp, &bp); | 1487 | error = xfs_qm_dqflush(dqp, &bp); |
1501 | if (error) { | 1488 | if (error) { |
1502 | xfs_warn(mp, "%s: dquot %p flush failed", | 1489 | xfs_warn(mp, "%s: dquot %p flush failed", |
@@ -1504,7 +1491,7 @@ xfs_qm_dqreclaim_one( | |||
1504 | goto out_busy; | 1491 | goto out_busy; |
1505 | } | 1492 | } |
1506 | 1493 | ||
1507 | xfs_buf_delwri_queue(bp); | 1494 | xfs_buf_delwri_queue(bp, buffer_list); |
1508 | xfs_buf_relse(bp); | 1495 | xfs_buf_relse(bp); |
1509 | /* | 1496 | /* |
1510 | * Give the dquot another try on the freelist, as the | 1497 | * Give the dquot another try on the freelist, as the |
@@ -1549,8 +1536,10 @@ xfs_qm_shake( | |||
1549 | struct xfs_quotainfo *qi = | 1536 | struct xfs_quotainfo *qi = |
1550 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); | 1537 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); |
1551 | int nr_to_scan = sc->nr_to_scan; | 1538 | int nr_to_scan = sc->nr_to_scan; |
1539 | LIST_HEAD (buffer_list); | ||
1552 | LIST_HEAD (dispose_list); | 1540 | LIST_HEAD (dispose_list); |
1553 | struct xfs_dquot *dqp; | 1541 | struct xfs_dquot *dqp; |
1542 | int error; | ||
1554 | 1543 | ||
1555 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | 1544 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) |
1556 | return 0; | 1545 | return 0; |
@@ -1563,15 +1552,20 @@ xfs_qm_shake( | |||
1563 | break; | 1552 | break; |
1564 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, | 1553 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, |
1565 | q_lru); | 1554 | q_lru); |
1566 | xfs_qm_dqreclaim_one(dqp, &dispose_list); | 1555 | xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); |
1567 | } | 1556 | } |
1568 | mutex_unlock(&qi->qi_lru_lock); | 1557 | mutex_unlock(&qi->qi_lru_lock); |
1569 | 1558 | ||
1559 | error = xfs_buf_delwri_submit(&buffer_list); | ||
1560 | if (error) | ||
1561 | xfs_warn(NULL, "%s: dquot reclaim failed", __func__); | ||
1562 | |||
1570 | while (!list_empty(&dispose_list)) { | 1563 | while (!list_empty(&dispose_list)) { |
1571 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); | 1564 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); |
1572 | list_del_init(&dqp->q_lru); | 1565 | list_del_init(&dqp->q_lru); |
1573 | xfs_qm_dqfree_one(dqp); | 1566 | xfs_qm_dqfree_one(dqp); |
1574 | } | 1567 | } |
1568 | |||
1575 | out: | 1569 | out: |
1576 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; | 1570 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; |
1577 | } | 1571 | } |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 28d1f508b578..fa07b7731cf2 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -981,15 +981,7 @@ xfs_fs_put_super( | |||
981 | { | 981 | { |
982 | struct xfs_mount *mp = XFS_M(sb); | 982 | struct xfs_mount *mp = XFS_M(sb); |
983 | 983 | ||
984 | /* | ||
985 | * Blow away any referenced inode in the filestreams cache. | ||
986 | * This can and will cause log traffic as inodes go inactive | ||
987 | * here. | ||
988 | */ | ||
989 | xfs_filestream_unmount(mp); | 984 | xfs_filestream_unmount(mp); |
990 | |||
991 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
992 | |||
993 | xfs_unmountfs(mp); | 985 | xfs_unmountfs(mp); |
994 | xfs_syncd_stop(mp); | 986 | xfs_syncd_stop(mp); |
995 | xfs_freesb(mp); | 987 | xfs_freesb(mp); |
@@ -1404,15 +1396,7 @@ out_destroy_workqueues: | |||
1404 | return -error; | 1396 | return -error; |
1405 | 1397 | ||
1406 | out_unmount: | 1398 | out_unmount: |
1407 | /* | ||
1408 | * Blow away any referenced inode in the filestreams cache. | ||
1409 | * This can and will cause log traffic as inodes go inactive | ||
1410 | * here. | ||
1411 | */ | ||
1412 | xfs_filestream_unmount(mp); | 1399 | xfs_filestream_unmount(mp); |
1413 | |||
1414 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
1415 | |||
1416 | xfs_unmountfs(mp); | 1400 | xfs_unmountfs(mp); |
1417 | xfs_syncd_stop(mp); | 1401 | xfs_syncd_stop(mp); |
1418 | goto out_free_sb; | 1402 | goto out_free_sb; |
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 468c3c0a4f9f..cdb644fd0bd1 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -313,17 +313,10 @@ xfs_quiesce_data( | |||
313 | /* write superblock and hoover up shutdown errors */ | 313 | /* write superblock and hoover up shutdown errors */ |
314 | error = xfs_sync_fsdata(mp); | 314 | error = xfs_sync_fsdata(mp); |
315 | 315 | ||
316 | /* make sure all delwri buffers are written out */ | ||
317 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
318 | |||
319 | /* mark the log as covered if needed */ | 316 | /* mark the log as covered if needed */ |
320 | if (xfs_log_need_covered(mp)) | 317 | if (xfs_log_need_covered(mp)) |
321 | error2 = xfs_fs_log_dummy(mp); | 318 | error2 = xfs_fs_log_dummy(mp); |
322 | 319 | ||
323 | /* flush data-only devices */ | ||
324 | if (mp->m_rtdev_targp) | ||
325 | xfs_flush_buftarg(mp->m_rtdev_targp, 1); | ||
326 | |||
327 | return error ? error : error2; | 320 | return error ? error : error2; |
328 | } | 321 | } |
329 | 322 | ||
@@ -684,17 +677,6 @@ restart: | |||
684 | if (!xfs_iflock_nowait(ip)) { | 677 | if (!xfs_iflock_nowait(ip)) { |
685 | if (!(sync_mode & SYNC_WAIT)) | 678 | if (!(sync_mode & SYNC_WAIT)) |
686 | goto out; | 679 | goto out; |
687 | |||
688 | /* | ||
689 | * If we only have a single dirty inode in a cluster there is | ||
690 | * a fair chance that the AIL push may have pushed it into | ||
691 | * the buffer, but xfsbufd won't touch it until 30 seconds | ||
692 | * from now, and thus we will lock up here. | ||
693 | * | ||
694 | * Promote the inode buffer to the front of the delwri list | ||
695 | * and wake up xfsbufd now. | ||
696 | */ | ||
697 | xfs_promote_inode(ip); | ||
698 | xfs_iflock(ip); | 680 | xfs_iflock(ip); |
699 | } | 681 | } |
700 | 682 | ||
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 06838c42b2a0..2e41756e263a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -328,7 +328,7 @@ DEFINE_BUF_EVENT(xfs_buf_unlock); | |||
328 | DEFINE_BUF_EVENT(xfs_buf_iowait); | 328 | DEFINE_BUF_EVENT(xfs_buf_iowait); |
329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); | 329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); |
330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); | 330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); |
331 | DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); | 331 | DEFINE_BUF_EVENT(xfs_buf_delwri_queued); |
332 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); | 332 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); |
333 | DEFINE_BUF_EVENT(xfs_buf_get_uncached); | 333 | DEFINE_BUF_EVENT(xfs_buf_get_uncached); |
334 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); | 334 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); |
@@ -486,12 +486,10 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); | |||
486 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); | 486 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); |
487 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); | 487 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); |
488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); | 488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); |
489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock); | ||
490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); | 489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); |
491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); | 490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); |
492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); | 491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); |
493 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); | 492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); |
494 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); | ||
495 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); | 493 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); |
496 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); | 494 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); |
497 | DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); | 495 | DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); |
@@ -881,10 +879,9 @@ DEFINE_EVENT(xfs_log_item_class, name, \ | |||
881 | TP_PROTO(struct xfs_log_item *lip), \ | 879 | TP_PROTO(struct xfs_log_item *lip), \ |
882 | TP_ARGS(lip)) | 880 | TP_ARGS(lip)) |
883 | DEFINE_LOG_ITEM_EVENT(xfs_ail_push); | 881 | DEFINE_LOG_ITEM_EVENT(xfs_ail_push); |
884 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf); | ||
885 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf_pinned); | ||
886 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); | 882 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); |
887 | DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); | 883 | DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); |
884 | DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); | ||
888 | 885 | ||
889 | 886 | ||
890 | DECLARE_EVENT_CLASS(xfs_file_class, | 887 | DECLARE_EVENT_CLASS(xfs_file_class, |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index f6118703f20d..7ab99e1898c8 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -345,11 +345,9 @@ struct xfs_item_ops { | |||
345 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); | 345 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); |
346 | void (*iop_pin)(xfs_log_item_t *); | 346 | void (*iop_pin)(xfs_log_item_t *); |
347 | void (*iop_unpin)(xfs_log_item_t *, int remove); | 347 | void (*iop_unpin)(xfs_log_item_t *, int remove); |
348 | uint (*iop_trylock)(xfs_log_item_t *); | 348 | uint (*iop_push)(struct xfs_log_item *, struct list_head *); |
349 | void (*iop_unlock)(xfs_log_item_t *); | 349 | void (*iop_unlock)(xfs_log_item_t *); |
350 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); | 350 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); |
351 | void (*iop_push)(xfs_log_item_t *); | ||
352 | bool (*iop_pushbuf)(xfs_log_item_t *); | ||
353 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); | 351 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); |
354 | }; | 352 | }; |
355 | 353 | ||
@@ -357,20 +355,18 @@ struct xfs_item_ops { | |||
357 | #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) | 355 | #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) |
358 | #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) | 356 | #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) |
359 | #define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove) | 357 | #define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove) |
360 | #define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) | 358 | #define IOP_PUSH(ip, list) (*(ip)->li_ops->iop_push)(ip, list) |
361 | #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) | 359 | #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) |
362 | #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) | 360 | #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) |
363 | #define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) | ||
364 | #define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) | ||
365 | #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) | 361 | #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) |
366 | 362 | ||
367 | /* | 363 | /* |
368 | * Return values for the IOP_TRYLOCK() routines. | 364 | * Return values for the IOP_PUSH() routines. |
369 | */ | 365 | */ |
370 | #define XFS_ITEM_SUCCESS 0 | 366 | #define XFS_ITEM_SUCCESS 0 |
371 | #define XFS_ITEM_PINNED 1 | 367 | #define XFS_ITEM_PINNED 1 |
372 | #define XFS_ITEM_LOCKED 2 | 368 | #define XFS_ITEM_LOCKED 2 |
373 | #define XFS_ITEM_PUSHBUF 3 | 369 | #define XFS_ITEM_FLUSHING 3 |
374 | 370 | ||
375 | /* | 371 | /* |
376 | * This is the type of function which can be given to xfs_trans_callback() | 372 | * This is the type of function which can be given to xfs_trans_callback() |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0425ca16738b..49d9cde33bb3 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -364,29 +364,31 @@ xfsaild_push( | |||
364 | xfs_log_item_t *lip; | 364 | xfs_log_item_t *lip; |
365 | xfs_lsn_t lsn; | 365 | xfs_lsn_t lsn; |
366 | xfs_lsn_t target; | 366 | xfs_lsn_t target; |
367 | long tout = 10; | 367 | long tout; |
368 | int stuck = 0; | 368 | int stuck = 0; |
369 | int flushing = 0; | ||
369 | int count = 0; | 370 | int count = 0; |
370 | int push_xfsbufd = 0; | ||
371 | 371 | ||
372 | /* | 372 | /* |
373 | * If last time we ran we encountered pinned items, force the log first | 373 | * If we encountered pinned items or did not finish writing out all |
374 | * and wait for it before pushing again. | 374 | * buffers the last time we ran, force the log first and wait for it |
375 | * before pushing again. | ||
375 | */ | 376 | */ |
376 | spin_lock(&ailp->xa_lock); | 377 | if (ailp->xa_log_flush && ailp->xa_last_pushed_lsn == 0 && |
377 | if (ailp->xa_last_pushed_lsn == 0 && ailp->xa_log_flush && | 378 | (!list_empty_careful(&ailp->xa_buf_list) || |
378 | !list_empty(&ailp->xa_ail)) { | 379 | xfs_ail_min_lsn(ailp))) { |
379 | ailp->xa_log_flush = 0; | 380 | ailp->xa_log_flush = 0; |
380 | spin_unlock(&ailp->xa_lock); | 381 | |
381 | XFS_STATS_INC(xs_push_ail_flush); | 382 | XFS_STATS_INC(xs_push_ail_flush); |
382 | xfs_log_force(mp, XFS_LOG_SYNC); | 383 | xfs_log_force(mp, XFS_LOG_SYNC); |
383 | spin_lock(&ailp->xa_lock); | ||
384 | } | 384 | } |
385 | 385 | ||
386 | spin_lock(&ailp->xa_lock); | ||
386 | lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); | 387 | lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); |
387 | if (!lip) { | 388 | if (!lip) { |
388 | /* | 389 | /* |
389 | * AIL is empty or our push has reached the end. | 390 | * If the AIL is empty or our push has reached the end we are |
391 | * done now. | ||
390 | */ | 392 | */ |
391 | xfs_trans_ail_cursor_done(ailp, &cur); | 393 | xfs_trans_ail_cursor_done(ailp, &cur); |
392 | spin_unlock(&ailp->xa_lock); | 394 | spin_unlock(&ailp->xa_lock); |
@@ -395,55 +397,42 @@ xfsaild_push( | |||
395 | 397 | ||
396 | XFS_STATS_INC(xs_push_ail); | 398 | XFS_STATS_INC(xs_push_ail); |
397 | 399 | ||
398 | /* | ||
399 | * While the item we are looking at is below the given threshold | ||
400 | * try to flush it out. We'd like not to stop until we've at least | ||
401 | * tried to push on everything in the AIL with an LSN less than | ||
402 | * the given threshold. | ||
403 | * | ||
404 | * However, we will stop after a certain number of pushes and wait | ||
405 | * for a reduced timeout to fire before pushing further. This | ||
406 | * prevents use from spinning when we can't do anything or there is | ||
407 | * lots of contention on the AIL lists. | ||
408 | */ | ||
409 | lsn = lip->li_lsn; | 400 | lsn = lip->li_lsn; |
410 | target = ailp->xa_target; | 401 | target = ailp->xa_target; |
411 | while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { | 402 | while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { |
412 | int lock_result; | 403 | int lock_result; |
404 | |||
413 | /* | 405 | /* |
414 | * If we can lock the item without sleeping, unlock the AIL | 406 | * Note that IOP_PUSH may unlock and reacquire the AIL lock. We |
415 | * lock and flush the item. Then re-grab the AIL lock so we | 407 | * rely on the AIL cursor implementation to be able to deal with |
416 | * can look for the next item on the AIL. List changes are | 408 | * the dropped lock. |
417 | * handled by the AIL lookup functions internally | ||
418 | * | ||
419 | * If we can't lock the item, either its holder will flush it | ||
420 | * or it is already being flushed or it is being relogged. In | ||
421 | * any of these case it is being taken care of and we can just | ||
422 | * skip to the next item in the list. | ||
423 | */ | 409 | */ |
424 | lock_result = IOP_TRYLOCK(lip); | 410 | lock_result = IOP_PUSH(lip, &ailp->xa_buf_list); |
425 | spin_unlock(&ailp->xa_lock); | ||
426 | switch (lock_result) { | 411 | switch (lock_result) { |
427 | case XFS_ITEM_SUCCESS: | 412 | case XFS_ITEM_SUCCESS: |
428 | XFS_STATS_INC(xs_push_ail_success); | 413 | XFS_STATS_INC(xs_push_ail_success); |
429 | trace_xfs_ail_push(lip); | 414 | trace_xfs_ail_push(lip); |
430 | 415 | ||
431 | IOP_PUSH(lip); | ||
432 | ailp->xa_last_pushed_lsn = lsn; | 416 | ailp->xa_last_pushed_lsn = lsn; |
433 | break; | 417 | break; |
434 | 418 | ||
435 | case XFS_ITEM_PUSHBUF: | 419 | case XFS_ITEM_FLUSHING: |
436 | XFS_STATS_INC(xs_push_ail_pushbuf); | 420 | /* |
437 | trace_xfs_ail_pushbuf(lip); | 421 | * The item or its backing buffer is already beeing |
438 | 422 | * flushed. The typical reason for that is that an | |
439 | if (!IOP_PUSHBUF(lip)) { | 423 | * inode buffer is locked because we already pushed the |
440 | trace_xfs_ail_pushbuf_pinned(lip); | 424 | * updates to it as part of inode clustering. |
441 | stuck++; | 425 | * |
442 | ailp->xa_log_flush++; | 426 | * We do not want to to stop flushing just because lots |
443 | } else { | 427 | * of items are already beeing flushed, but we need to |
444 | ailp->xa_last_pushed_lsn = lsn; | 428 | * re-try the flushing relatively soon if most of the |
445 | } | 429 | * AIL is beeing flushed. |
446 | push_xfsbufd = 1; | 430 | */ |
431 | XFS_STATS_INC(xs_push_ail_flushing); | ||
432 | trace_xfs_ail_flushing(lip); | ||
433 | |||
434 | flushing++; | ||
435 | ailp->xa_last_pushed_lsn = lsn; | ||
447 | break; | 436 | break; |
448 | 437 | ||
449 | case XFS_ITEM_PINNED: | 438 | case XFS_ITEM_PINNED: |
@@ -453,23 +442,22 @@ xfsaild_push( | |||
453 | stuck++; | 442 | stuck++; |
454 | ailp->xa_log_flush++; | 443 | ailp->xa_log_flush++; |
455 | break; | 444 | break; |
456 | |||
457 | case XFS_ITEM_LOCKED: | 445 | case XFS_ITEM_LOCKED: |
458 | XFS_STATS_INC(xs_push_ail_locked); | 446 | XFS_STATS_INC(xs_push_ail_locked); |
459 | trace_xfs_ail_locked(lip); | 447 | trace_xfs_ail_locked(lip); |
448 | |||
460 | stuck++; | 449 | stuck++; |
461 | break; | 450 | break; |
462 | |||
463 | default: | 451 | default: |
464 | ASSERT(0); | 452 | ASSERT(0); |
465 | break; | 453 | break; |
466 | } | 454 | } |
467 | 455 | ||
468 | spin_lock(&ailp->xa_lock); | ||
469 | count++; | 456 | count++; |
470 | 457 | ||
471 | /* | 458 | /* |
472 | * Are there too many items we can't do anything with? | 459 | * Are there too many items we can't do anything with? |
460 | * | ||
473 | * If we we are skipping too many items because we can't flush | 461 | * If we we are skipping too many items because we can't flush |
474 | * them or they are already being flushed, we back off and | 462 | * them or they are already being flushed, we back off and |
475 | * given them time to complete whatever operation is being | 463 | * given them time to complete whatever operation is being |
@@ -491,42 +479,36 @@ xfsaild_push( | |||
491 | xfs_trans_ail_cursor_done(ailp, &cur); | 479 | xfs_trans_ail_cursor_done(ailp, &cur); |
492 | spin_unlock(&ailp->xa_lock); | 480 | spin_unlock(&ailp->xa_lock); |
493 | 481 | ||
494 | if (push_xfsbufd) { | 482 | if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list)) |
495 | /* we've got delayed write buffers to flush */ | 483 | ailp->xa_log_flush++; |
496 | wake_up_process(mp->m_ddev_targp->bt_task); | ||
497 | } | ||
498 | 484 | ||
499 | /* assume we have more work to do in a short while */ | 485 | if (!count || XFS_LSN_CMP(lsn, target) >= 0) { |
500 | out_done: | 486 | out_done: |
501 | if (!count) { | ||
502 | /* We're past our target or empty, so idle */ | ||
503 | ailp->xa_last_pushed_lsn = 0; | ||
504 | ailp->xa_log_flush = 0; | ||
505 | |||
506 | tout = 50; | ||
507 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { | ||
508 | /* | 487 | /* |
509 | * We reached the target so wait a bit longer for I/O to | 488 | * We reached the target or the AIL is empty, so wait a bit |
510 | * complete and remove pushed items from the AIL before we | 489 | * longer for I/O to complete and remove pushed items from the |
511 | * start the next scan from the start of the AIL. | 490 | * AIL before we start the next scan from the start of the AIL. |
512 | */ | 491 | */ |
513 | tout = 50; | 492 | tout = 50; |
514 | ailp->xa_last_pushed_lsn = 0; | 493 | ailp->xa_last_pushed_lsn = 0; |
515 | } else if ((stuck * 100) / count > 90) { | 494 | } else if (((stuck + flushing) * 100) / count > 90) { |
516 | /* | 495 | /* |
517 | * Either there is a lot of contention on the AIL or we | 496 | * Either there is a lot of contention on the AIL or we are |
518 | * are stuck due to operations in progress. "Stuck" in this | 497 | * stuck due to operations in progress. "Stuck" in this case |
519 | * case is defined as >90% of the items we tried to push | 498 | * is defined as >90% of the items we tried to push were stuck. |
520 | * were stuck. | ||
521 | * | 499 | * |
522 | * Backoff a bit more to allow some I/O to complete before | 500 | * Backoff a bit more to allow some I/O to complete before |
523 | * restarting from the start of the AIL. This prevents us | 501 | * restarting from the start of the AIL. This prevents us from |
524 | * from spinning on the same items, and if they are pinned will | 502 | * spinning on the same items, and if they are pinned will all |
525 | * all the restart to issue a log force to unpin the stuck | 503 | * the restart to issue a log force to unpin the stuck items. |
526 | * items. | ||
527 | */ | 504 | */ |
528 | tout = 20; | 505 | tout = 20; |
529 | ailp->xa_last_pushed_lsn = 0; | 506 | ailp->xa_last_pushed_lsn = 0; |
507 | } else { | ||
508 | /* | ||
509 | * Assume we have more work to do in a short while. | ||
510 | */ | ||
511 | tout = 10; | ||
530 | } | 512 | } |
531 | 513 | ||
532 | return tout; | 514 | return tout; |
@@ -539,6 +521,8 @@ xfsaild( | |||
539 | struct xfs_ail *ailp = data; | 521 | struct xfs_ail *ailp = data; |
540 | long tout = 0; /* milliseconds */ | 522 | long tout = 0; /* milliseconds */ |
541 | 523 | ||
524 | current->flags |= PF_MEMALLOC; | ||
525 | |||
542 | while (!kthread_should_stop()) { | 526 | while (!kthread_should_stop()) { |
543 | if (tout && tout <= 20) | 527 | if (tout && tout <= 20) |
544 | __set_current_state(TASK_KILLABLE); | 528 | __set_current_state(TASK_KILLABLE); |
@@ -794,6 +778,7 @@ xfs_trans_ail_init( | |||
794 | INIT_LIST_HEAD(&ailp->xa_ail); | 778 | INIT_LIST_HEAD(&ailp->xa_ail); |
795 | INIT_LIST_HEAD(&ailp->xa_cursors); | 779 | INIT_LIST_HEAD(&ailp->xa_cursors); |
796 | spin_lock_init(&ailp->xa_lock); | 780 | spin_lock_init(&ailp->xa_lock); |
781 | INIT_LIST_HEAD(&ailp->xa_buf_list); | ||
797 | init_waitqueue_head(&ailp->xa_empty); | 782 | init_waitqueue_head(&ailp->xa_empty); |
798 | 783 | ||
799 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", | 784 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 296a7995a007..9132d162c4b8 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -165,14 +165,6 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
165 | XFS_BUF_DONE(bp); | 165 | XFS_BUF_DONE(bp); |
166 | } | 166 | } |
167 | 167 | ||
168 | /* | ||
169 | * If the buffer is stale then it was binval'ed | ||
170 | * since last read. This doesn't matter since the | ||
171 | * caller isn't allowed to use the data anyway. | ||
172 | */ | ||
173 | else if (XFS_BUF_ISSTALE(bp)) | ||
174 | ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); | ||
175 | |||
176 | ASSERT(bp->b_transp == tp); | 168 | ASSERT(bp->b_transp == tp); |
177 | bip = bp->b_fspriv; | 169 | bip = bp->b_fspriv; |
178 | ASSERT(bip != NULL); | 170 | ASSERT(bip != NULL); |
@@ -418,19 +410,6 @@ xfs_trans_read_buf( | |||
418 | return 0; | 410 | return 0; |
419 | 411 | ||
420 | shutdown_abort: | 412 | shutdown_abort: |
421 | /* | ||
422 | * the theory here is that buffer is good but we're | ||
423 | * bailing out because the filesystem is being forcibly | ||
424 | * shut down. So we should leave the b_flags alone since | ||
425 | * the buffer's not staled and just get out. | ||
426 | */ | ||
427 | #if defined(DEBUG) | ||
428 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) | ||
429 | xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); | ||
430 | #endif | ||
431 | ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) != | ||
432 | (XBF_STALE|XBF_DELWRI)); | ||
433 | |||
434 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); | 413 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); |
435 | xfs_buf_relse(bp); | 414 | xfs_buf_relse(bp); |
436 | *bpp = NULL; | 415 | *bpp = NULL; |
@@ -649,22 +628,33 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
649 | 628 | ||
650 | 629 | ||
651 | /* | 630 | /* |
652 | * This called to invalidate a buffer that is being used within | 631 | * Invalidate a buffer that is being used within a transaction. |
653 | * a transaction. Typically this is because the blocks in the | 632 | * |
654 | * buffer are being freed, so we need to prevent it from being | 633 | * Typically this is because the blocks in the buffer are being freed, so we |
655 | * written out when we're done. Allowing it to be written again | 634 | * need to prevent it from being written out when we're done. Allowing it |
656 | * might overwrite data in the free blocks if they are reallocated | 635 | * to be written again might overwrite data in the free blocks if they are |
657 | * to a file. | 636 | * reallocated to a file. |
658 | * | 637 | * |
659 | * We prevent the buffer from being written out by clearing the | 638 | * We prevent the buffer from being written out by marking it stale. We can't |
660 | * B_DELWRI flag. We can't always | 639 | * get rid of the buf log item at this point because the buffer may still be |
661 | * get rid of the buf log item at this point, though, because | 640 | * pinned by another transaction. If that is the case, then we'll wait until |
662 | * the buffer may still be pinned by another transaction. If that | 641 | * the buffer is committed to disk for the last time (we can tell by the ref |
663 | * is the case, then we'll wait until the buffer is committed to | 642 | * count) and free it in xfs_buf_item_unpin(). Until that happens we will |
664 | * disk for the last time (we can tell by the ref count) and | 643 | * keep the buffer locked so that the buffer and buf log item are not reused. |
665 | * free it in xfs_buf_item_unpin(). Until it is cleaned up we | 644 | * |
666 | * will keep the buffer locked so that the buffer and buf log item | 645 | * We also set the XFS_BLF_CANCEL flag in the buf log format structure and log |
667 | * are not reused. | 646 | * the buf item. This will be used at recovery time to determine that copies |
647 | * of the buffer in the log before this should not be replayed. | ||
648 | * | ||
649 | * We mark the item descriptor and the transaction dirty so that we'll hold | ||
650 | * the buffer until after the commit. | ||
651 | * | ||
652 | * Since we're invalidating the buffer, we also clear the state about which | ||
653 | * parts of the buffer have been logged. We also clear the flag indicating | ||
654 | * that this is an inode buffer since the data in the buffer will no longer | ||
655 | * be valid. | ||
656 | * | ||
657 | * We set the stale bit in the buffer as well since we're getting rid of it. | ||
668 | */ | 658 | */ |
669 | void | 659 | void |
670 | xfs_trans_binval( | 660 | xfs_trans_binval( |
@@ -684,7 +674,6 @@ xfs_trans_binval( | |||
684 | * If the buffer is already invalidated, then | 674 | * If the buffer is already invalidated, then |
685 | * just return. | 675 | * just return. |
686 | */ | 676 | */ |
687 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | ||
688 | ASSERT(XFS_BUF_ISSTALE(bp)); | 677 | ASSERT(XFS_BUF_ISSTALE(bp)); |
689 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); | 678 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); |
690 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); | 679 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); |
@@ -694,27 +683,8 @@ xfs_trans_binval( | |||
694 | return; | 683 | return; |
695 | } | 684 | } |
696 | 685 | ||
697 | /* | ||
698 | * Clear the dirty bit in the buffer and set the STALE flag | ||
699 | * in the buf log item. The STALE flag will be used in | ||
700 | * xfs_buf_item_unpin() to determine if it should clean up | ||
701 | * when the last reference to the buf item is given up. | ||
702 | * We set the XFS_BLF_CANCEL flag in the buf log format structure | ||
703 | * and log the buf item. This will be used at recovery time | ||
704 | * to determine that copies of the buffer in the log before | ||
705 | * this should not be replayed. | ||
706 | * We mark the item descriptor and the transaction dirty so | ||
707 | * that we'll hold the buffer until after the commit. | ||
708 | * | ||
709 | * Since we're invalidating the buffer, we also clear the state | ||
710 | * about which parts of the buffer have been logged. We also | ||
711 | * clear the flag indicating that this is an inode buffer since | ||
712 | * the data in the buffer will no longer be valid. | ||
713 | * | ||
714 | * We set the stale bit in the buffer as well since we're getting | ||
715 | * rid of it. | ||
716 | */ | ||
717 | xfs_buf_stale(bp); | 686 | xfs_buf_stale(bp); |
687 | |||
718 | bip->bli_flags |= XFS_BLI_STALE; | 688 | bip->bli_flags |= XFS_BLI_STALE; |
719 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); | 689 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); |
720 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; | 690 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 218304a8cdc7..f72bdd48a5c1 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -71,6 +71,7 @@ struct xfs_ail { | |||
71 | spinlock_t xa_lock; | 71 | spinlock_t xa_lock; |
72 | xfs_lsn_t xa_last_pushed_lsn; | 72 | xfs_lsn_t xa_last_pushed_lsn; |
73 | int xa_log_flush; | 73 | int xa_log_flush; |
74 | struct list_head xa_buf_list; | ||
74 | wait_queue_head_t xa_empty; | 75 | wait_queue_head_t xa_empty; |
75 | }; | 76 | }; |
76 | 77 | ||