diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/ext4.h | 15 | ||||
-rw-r--r-- | fs/ext4/inode.c | 1011 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 4 |
3 files changed, 487 insertions, 543 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2ebfcde5a156..90a164f365c4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -177,21 +177,6 @@ struct ext4_map_blocks { | |||
177 | }; | 177 | }; |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * For delayed allocation tracking | ||
181 | */ | ||
182 | struct mpage_da_data { | ||
183 | struct inode *inode; | ||
184 | sector_t b_blocknr; /* start block number of extent */ | ||
185 | size_t b_size; /* size of extent */ | ||
186 | unsigned long b_state; /* state of the extent */ | ||
187 | unsigned long first_page, next_page; /* extent of pages */ | ||
188 | struct writeback_control *wbc; | ||
189 | int io_done; | ||
190 | int pages_written; | ||
191 | int retval; | ||
192 | }; | ||
193 | |||
194 | /* | ||
195 | * Flags for ext4_io_end->flags | 180 | * Flags for ext4_io_end->flags |
196 | */ | 181 | */ |
197 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 182 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2b777e51b677..5939a4742def 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1407,149 +1407,42 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1407 | * Delayed allocation stuff | 1407 | * Delayed allocation stuff |
1408 | */ | 1408 | */ |
1409 | 1409 | ||
1410 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd); | 1410 | struct mpage_da_data { |
1411 | 1411 | struct inode *inode; | |
1412 | /* | 1412 | struct writeback_control *wbc; |
1413 | * mpage_da_submit_io - walks through extent of pages and try to write | 1413 | pgoff_t first_page; /* The first page to write */ |
1414 | * them with writepage() call back | 1414 | pgoff_t next_page; /* Current page to examine */ |
1415 | * | 1415 | pgoff_t last_page; /* Last page to examine */ |
1416 | * @mpd->inode: inode | ||
1417 | * @mpd->first_page: first page of the extent | ||
1418 | * @mpd->next_page: page after the last page of the extent | ||
1419 | * | ||
1420 | * By the time mpage_da_submit_io() is called we expect all blocks | ||
1421 | * to be allocated. this may be wrong if allocation failed. | ||
1422 | * | ||
1423 | * As pages are already locked by write_cache_pages(), we can't use it | ||
1424 | */ | ||
1425 | static int mpage_da_submit_io(struct mpage_da_data *mpd, | ||
1426 | struct ext4_map_blocks *map) | ||
1427 | { | ||
1428 | struct pagevec pvec; | ||
1429 | unsigned long index, end; | ||
1430 | int ret = 0, err, nr_pages, i; | ||
1431 | struct inode *inode = mpd->inode; | ||
1432 | struct address_space *mapping = inode->i_mapping; | ||
1433 | loff_t size = i_size_read(inode); | ||
1434 | unsigned int len, block_start; | ||
1435 | struct buffer_head *bh, *page_bufs = NULL; | ||
1436 | sector_t pblock = 0, cur_logical = 0; | ||
1437 | struct ext4_io_submit io_submit; | ||
1438 | |||
1439 | BUG_ON(mpd->next_page <= mpd->first_page); | ||
1440 | ext4_io_submit_init(&io_submit, mpd->wbc); | ||
1441 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
1442 | if (!io_submit.io_end) { | ||
1443 | ext4_da_block_invalidatepages(mpd); | ||
1444 | return -ENOMEM; | ||
1445 | } | ||
1446 | /* | 1416 | /* |
1447 | * We need to start from the first_page to the next_page - 1 | 1417 | * Extent to map - this can be after first_page because that can be |
1448 | * to make sure we also write the mapped dirty buffer_heads. | 1418 | * fully mapped. We somewhat abuse m_flags to store whether the extent |
1449 | * If we look at mpd->b_blocknr we would only be looking | 1419 | * is delalloc or unwritten. |
1450 | * at the currently mapped buffer_heads. | ||
1451 | */ | 1420 | */ |
1452 | index = mpd->first_page; | 1421 | struct ext4_map_blocks map; |
1453 | end = mpd->next_page - 1; | 1422 | struct ext4_io_submit io_submit; /* IO submission data */ |
1454 | 1423 | }; | |
1455 | pagevec_init(&pvec, 0); | ||
1456 | while (index <= end) { | ||
1457 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1458 | if (nr_pages == 0) | ||
1459 | break; | ||
1460 | for (i = 0; i < nr_pages; i++) { | ||
1461 | int skip_page = 0; | ||
1462 | struct page *page = pvec.pages[i]; | ||
1463 | |||
1464 | index = page->index; | ||
1465 | if (index > end) | ||
1466 | break; | ||
1467 | |||
1468 | if (index == size >> PAGE_CACHE_SHIFT) | ||
1469 | len = size & ~PAGE_CACHE_MASK; | ||
1470 | else | ||
1471 | len = PAGE_CACHE_SIZE; | ||
1472 | if (map) { | ||
1473 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
1474 | inode->i_blkbits); | ||
1475 | pblock = map->m_pblk + (cur_logical - | ||
1476 | map->m_lblk); | ||
1477 | } | ||
1478 | index++; | ||
1479 | |||
1480 | BUG_ON(!PageLocked(page)); | ||
1481 | BUG_ON(PageWriteback(page)); | ||
1482 | |||
1483 | bh = page_bufs = page_buffers(page); | ||
1484 | block_start = 0; | ||
1485 | do { | ||
1486 | if (map && (cur_logical >= map->m_lblk) && | ||
1487 | (cur_logical <= (map->m_lblk + | ||
1488 | (map->m_len - 1)))) { | ||
1489 | if (buffer_delay(bh)) { | ||
1490 | clear_buffer_delay(bh); | ||
1491 | bh->b_blocknr = pblock; | ||
1492 | } | ||
1493 | if (buffer_unwritten(bh) || | ||
1494 | buffer_mapped(bh)) | ||
1495 | BUG_ON(bh->b_blocknr != pblock); | ||
1496 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
1497 | set_buffer_uninit(bh); | ||
1498 | clear_buffer_unwritten(bh); | ||
1499 | } | ||
1500 | |||
1501 | /* | ||
1502 | * skip page if block allocation undone and | ||
1503 | * block is dirty | ||
1504 | */ | ||
1505 | if (ext4_bh_delay_or_unwritten(NULL, bh)) | ||
1506 | skip_page = 1; | ||
1507 | bh = bh->b_this_page; | ||
1508 | block_start += bh->b_size; | ||
1509 | cur_logical++; | ||
1510 | pblock++; | ||
1511 | } while (bh != page_bufs); | ||
1512 | |||
1513 | if (skip_page) { | ||
1514 | unlock_page(page); | ||
1515 | continue; | ||
1516 | } | ||
1517 | |||
1518 | clear_page_dirty_for_io(page); | ||
1519 | err = ext4_bio_write_page(&io_submit, page, len, | ||
1520 | mpd->wbc); | ||
1521 | if (!err) | ||
1522 | mpd->pages_written++; | ||
1523 | /* | ||
1524 | * In error case, we have to continue because | ||
1525 | * remaining pages are still locked | ||
1526 | */ | ||
1527 | if (ret == 0) | ||
1528 | ret = err; | ||
1529 | } | ||
1530 | pagevec_release(&pvec); | ||
1531 | } | ||
1532 | ext4_io_submit(&io_submit); | ||
1533 | /* Drop io_end reference we got from init */ | ||
1534 | ext4_put_io_end_defer(io_submit.io_end); | ||
1535 | return ret; | ||
1536 | } | ||
1537 | 1424 | ||
1538 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | 1425 | static void mpage_release_unused_pages(struct mpage_da_data *mpd, |
1426 | bool invalidate) | ||
1539 | { | 1427 | { |
1540 | int nr_pages, i; | 1428 | int nr_pages, i; |
1541 | pgoff_t index, end; | 1429 | pgoff_t index, end; |
1542 | struct pagevec pvec; | 1430 | struct pagevec pvec; |
1543 | struct inode *inode = mpd->inode; | 1431 | struct inode *inode = mpd->inode; |
1544 | struct address_space *mapping = inode->i_mapping; | 1432 | struct address_space *mapping = inode->i_mapping; |
1545 | ext4_lblk_t start, last; | 1433 | |
1434 | /* This is necessary when next_page == 0. */ | ||
1435 | if (mpd->first_page >= mpd->next_page) | ||
1436 | return; | ||
1546 | 1437 | ||
1547 | index = mpd->first_page; | 1438 | index = mpd->first_page; |
1548 | end = mpd->next_page - 1; | 1439 | end = mpd->next_page - 1; |
1549 | 1440 | if (invalidate) { | |
1550 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1441 | ext4_lblk_t start, last; |
1551 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1442 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1552 | ext4_es_remove_extent(inode, start, last - start + 1); | 1443 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1444 | ext4_es_remove_extent(inode, start, last - start + 1); | ||
1445 | } | ||
1553 | 1446 | ||
1554 | pagevec_init(&pvec, 0); | 1447 | pagevec_init(&pvec, 0); |
1555 | while (index <= end) { | 1448 | while (index <= end) { |
@@ -1562,14 +1455,15 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |||
1562 | break; | 1455 | break; |
1563 | BUG_ON(!PageLocked(page)); | 1456 | BUG_ON(!PageLocked(page)); |
1564 | BUG_ON(PageWriteback(page)); | 1457 | BUG_ON(PageWriteback(page)); |
1565 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); | 1458 | if (invalidate) { |
1566 | ClearPageUptodate(page); | 1459 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
1460 | ClearPageUptodate(page); | ||
1461 | } | ||
1567 | unlock_page(page); | 1462 | unlock_page(page); |
1568 | } | 1463 | } |
1569 | index = pvec.pages[nr_pages - 1]->index + 1; | 1464 | index = pvec.pages[nr_pages - 1]->index + 1; |
1570 | pagevec_release(&pvec); | 1465 | pagevec_release(&pvec); |
1571 | } | 1466 | } |
1572 | return; | ||
1573 | } | 1467 | } |
1574 | 1468 | ||
1575 | static void ext4_print_free_blocks(struct inode *inode) | 1469 | static void ext4_print_free_blocks(struct inode *inode) |
@@ -1598,215 +1492,6 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1598 | return; | 1492 | return; |
1599 | } | 1493 | } |
1600 | 1494 | ||
1601 | /* | ||
1602 | * mpage_da_map_and_submit - go through given space, map them | ||
1603 | * if necessary, and then submit them for I/O | ||
1604 | * | ||
1605 | * @mpd - bh describing space | ||
1606 | * | ||
1607 | * The function skips space we know is already mapped to disk blocks. | ||
1608 | * | ||
1609 | */ | ||
1610 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | ||
1611 | { | ||
1612 | int err, blks, get_blocks_flags; | ||
1613 | struct ext4_map_blocks map, *mapp = NULL; | ||
1614 | sector_t next = mpd->b_blocknr; | ||
1615 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | ||
1616 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | ||
1617 | handle_t *handle = NULL; | ||
1618 | |||
1619 | /* | ||
1620 | * If the blocks are mapped already, or we couldn't accumulate | ||
1621 | * any blocks, then proceed immediately to the submission stage. | ||
1622 | */ | ||
1623 | if ((mpd->b_size == 0) || | ||
1624 | ((mpd->b_state & (1 << BH_Mapped)) && | ||
1625 | !(mpd->b_state & (1 << BH_Delay)) && | ||
1626 | !(mpd->b_state & (1 << BH_Unwritten)))) | ||
1627 | goto submit_io; | ||
1628 | |||
1629 | handle = ext4_journal_current_handle(); | ||
1630 | BUG_ON(!handle); | ||
1631 | |||
1632 | /* | ||
1633 | * Call ext4_map_blocks() to allocate any delayed allocation | ||
1634 | * blocks, or to convert an uninitialized extent to be | ||
1635 | * initialized (in the case where we have written into | ||
1636 | * one or more preallocated blocks). | ||
1637 | * | ||
1638 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to | ||
1639 | * indicate that we are on the delayed allocation path. This | ||
1640 | * affects functions in many different parts of the allocation | ||
1641 | * call path. This flag exists primarily because we don't | ||
1642 | * want to change *many* call functions, so ext4_map_blocks() | ||
1643 | * will set the EXT4_STATE_DELALLOC_RESERVED flag once the | ||
1644 | * inode's allocation semaphore is taken. | ||
1645 | * | ||
1646 | * If the blocks in questions were delalloc blocks, set | ||
1647 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | ||
1648 | * variables are updated after the blocks have been allocated. | ||
1649 | */ | ||
1650 | map.m_lblk = next; | ||
1651 | map.m_len = max_blocks; | ||
1652 | /* | ||
1653 | * We're in delalloc path and it is possible that we're going to | ||
1654 | * need more metadata blocks than previously reserved. However | ||
1655 | * we must not fail because we're in writeback and there is | ||
1656 | * nothing we can do about it so it might result in data loss. | ||
1657 | * So use reserved blocks to allocate metadata if possible. | ||
1658 | */ | ||
1659 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | ||
1660 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | ||
1661 | if (ext4_should_dioread_nolock(mpd->inode)) | ||
1662 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
1663 | if (mpd->b_state & (1 << BH_Delay)) | ||
1664 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
1665 | |||
1666 | |||
1667 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | ||
1668 | if (blks < 0) { | ||
1669 | struct super_block *sb = mpd->inode->i_sb; | ||
1670 | |||
1671 | err = blks; | ||
1672 | /* | ||
1673 | * If get block returns EAGAIN or ENOSPC and there | ||
1674 | * appears to be free blocks we will just let | ||
1675 | * mpage_da_submit_io() unlock all of the pages. | ||
1676 | */ | ||
1677 | if (err == -EAGAIN) | ||
1678 | goto submit_io; | ||
1679 | |||
1680 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) { | ||
1681 | mpd->retval = err; | ||
1682 | goto submit_io; | ||
1683 | } | ||
1684 | |||
1685 | /* | ||
1686 | * get block failure will cause us to loop in | ||
1687 | * writepages, because a_ops->writepage won't be able | ||
1688 | * to make progress. The page will be redirtied by | ||
1689 | * writepage and writepages will again try to write | ||
1690 | * the same. | ||
1691 | */ | ||
1692 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { | ||
1693 | ext4_msg(sb, KERN_CRIT, | ||
1694 | "delayed block allocation failed for inode %lu " | ||
1695 | "at logical offset %llu with max blocks %zd " | ||
1696 | "with error %d", mpd->inode->i_ino, | ||
1697 | (unsigned long long) next, | ||
1698 | mpd->b_size >> mpd->inode->i_blkbits, err); | ||
1699 | ext4_msg(sb, KERN_CRIT, | ||
1700 | "This should not happen!! Data will be lost"); | ||
1701 | if (err == -ENOSPC) | ||
1702 | ext4_print_free_blocks(mpd->inode); | ||
1703 | } | ||
1704 | /* invalidate all the pages */ | ||
1705 | ext4_da_block_invalidatepages(mpd); | ||
1706 | |||
1707 | /* Mark this page range as having been completed */ | ||
1708 | mpd->io_done = 1; | ||
1709 | return; | ||
1710 | } | ||
1711 | BUG_ON(blks == 0); | ||
1712 | |||
1713 | mapp = ↦ | ||
1714 | if (map.m_flags & EXT4_MAP_NEW) { | ||
1715 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | ||
1716 | int i; | ||
1717 | |||
1718 | for (i = 0; i < map.m_len; i++) | ||
1719 | unmap_underlying_metadata(bdev, map.m_pblk + i); | ||
1720 | } | ||
1721 | |||
1722 | /* | ||
1723 | * Update on-disk size along with block allocation. | ||
1724 | */ | ||
1725 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; | ||
1726 | if (disksize > i_size_read(mpd->inode)) | ||
1727 | disksize = i_size_read(mpd->inode); | ||
1728 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | ||
1729 | ext4_update_i_disksize(mpd->inode, disksize); | ||
1730 | err = ext4_mark_inode_dirty(handle, mpd->inode); | ||
1731 | if (err) | ||
1732 | ext4_error(mpd->inode->i_sb, | ||
1733 | "Failed to mark inode %lu dirty", | ||
1734 | mpd->inode->i_ino); | ||
1735 | } | ||
1736 | |||
1737 | submit_io: | ||
1738 | mpage_da_submit_io(mpd, mapp); | ||
1739 | mpd->io_done = 1; | ||
1740 | } | ||
1741 | |||
1742 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | ||
1743 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
1744 | |||
1745 | /* | ||
1746 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||
1747 | * | ||
1748 | * @mpd->lbh - extent of blocks | ||
1749 | * @logical - logical number of the block in the file | ||
1750 | * @b_state - b_state of the buffer head added | ||
1751 | * | ||
1752 | * the function is used to collect contig. blocks in same state | ||
1753 | */ | ||
1754 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical, | ||
1755 | unsigned long b_state) | ||
1756 | { | ||
1757 | sector_t next; | ||
1758 | int blkbits = mpd->inode->i_blkbits; | ||
1759 | int nrblocks = mpd->b_size >> blkbits; | ||
1760 | |||
1761 | /* | ||
1762 | * XXX Don't go larger than mballoc is willing to allocate | ||
1763 | * This is a stopgap solution. We eventually need to fold | ||
1764 | * mpage_da_submit_io() into this function and then call | ||
1765 | * ext4_map_blocks() multiple times in a loop | ||
1766 | */ | ||
1767 | if (nrblocks >= (8*1024*1024 >> blkbits)) | ||
1768 | goto flush_it; | ||
1769 | |||
1770 | /* check if the reserved journal credits might overflow */ | ||
1771 | if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) { | ||
1772 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
1773 | /* | ||
1774 | * With non-extent format we are limited by the journal | ||
1775 | * credit available. Total credit needed to insert | ||
1776 | * nrblocks contiguous blocks is dependent on the | ||
1777 | * nrblocks. So limit nrblocks. | ||
1778 | */ | ||
1779 | goto flush_it; | ||
1780 | } | ||
1781 | } | ||
1782 | /* | ||
1783 | * First block in the extent | ||
1784 | */ | ||
1785 | if (mpd->b_size == 0) { | ||
1786 | mpd->b_blocknr = logical; | ||
1787 | mpd->b_size = 1 << blkbits; | ||
1788 | mpd->b_state = b_state & BH_FLAGS; | ||
1789 | return; | ||
1790 | } | ||
1791 | |||
1792 | next = mpd->b_blocknr + nrblocks; | ||
1793 | /* | ||
1794 | * Can we merge the block to our big extent? | ||
1795 | */ | ||
1796 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { | ||
1797 | mpd->b_size += 1 << blkbits; | ||
1798 | return; | ||
1799 | } | ||
1800 | |||
1801 | flush_it: | ||
1802 | /* | ||
1803 | * We couldn't merge the block to our extent, so we | ||
1804 | * need to flush current extent and start new one | ||
1805 | */ | ||
1806 | mpage_da_map_and_submit(mpd); | ||
1807 | return; | ||
1808 | } | ||
1809 | |||
1810 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | 1495 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) |
1811 | { | 1496 | { |
1812 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); | 1497 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); |
@@ -2204,6 +1889,8 @@ static int ext4_writepage(struct page *page, | |||
2204 | return ret; | 1889 | return ret; |
2205 | } | 1890 | } |
2206 | 1891 | ||
1892 | #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) | ||
1893 | |||
2207 | /* | 1894 | /* |
2208 | * mballoc gives us at most this number of blocks... | 1895 | * mballoc gives us at most this number of blocks... |
2209 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). | 1896 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). |
@@ -2212,6 +1899,315 @@ static int ext4_writepage(struct page *page, | |||
2212 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 | 1899 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 |
2213 | 1900 | ||
2214 | /* | 1901 | /* |
1902 | * mpage_add_bh_to_extent - try to add bh to extent of blocks to map | ||
1903 | * | ||
1904 | * @mpd - extent of blocks | ||
1905 | * @lblk - logical number of the block in the file | ||
1906 | * @b_state - b_state of the buffer head added | ||
1907 | * | ||
1908 | * the function is used to collect contig. blocks in same state | ||
1909 | */ | ||
1910 | static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, | ||
1911 | unsigned long b_state) | ||
1912 | { | ||
1913 | struct ext4_map_blocks *map = &mpd->map; | ||
1914 | |||
1915 | /* Don't go larger than mballoc is willing to allocate */ | ||
1916 | if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) | ||
1917 | return 0; | ||
1918 | |||
1919 | /* First block in the extent? */ | ||
1920 | if (map->m_len == 0) { | ||
1921 | map->m_lblk = lblk; | ||
1922 | map->m_len = 1; | ||
1923 | map->m_flags = b_state & BH_FLAGS; | ||
1924 | return 1; | ||
1925 | } | ||
1926 | |||
1927 | /* Can we merge the block to our big extent? */ | ||
1928 | if (lblk == map->m_lblk + map->m_len && | ||
1929 | (b_state & BH_FLAGS) == map->m_flags) { | ||
1930 | map->m_len++; | ||
1931 | return 1; | ||
1932 | } | ||
1933 | return 0; | ||
1934 | } | ||
1935 | |||
1936 | static bool add_page_bufs_to_extent(struct mpage_da_data *mpd, | ||
1937 | struct buffer_head *head, | ||
1938 | struct buffer_head *bh, | ||
1939 | ext4_lblk_t lblk) | ||
1940 | { | ||
1941 | struct inode *inode = mpd->inode; | ||
1942 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
1943 | >> inode->i_blkbits; | ||
1944 | |||
1945 | do { | ||
1946 | BUG_ON(buffer_locked(bh)); | ||
1947 | |||
1948 | if (!buffer_dirty(bh) || !buffer_mapped(bh) || | ||
1949 | (!buffer_delay(bh) && !buffer_unwritten(bh)) || | ||
1950 | lblk >= blocks) { | ||
1951 | /* Found extent to map? */ | ||
1952 | if (mpd->map.m_len) | ||
1953 | return false; | ||
1954 | if (lblk >= blocks) | ||
1955 | return true; | ||
1956 | continue; | ||
1957 | } | ||
1958 | if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state)) | ||
1959 | return false; | ||
1960 | } while (lblk++, (bh = bh->b_this_page) != head); | ||
1961 | return true; | ||
1962 | } | ||
1963 | |||
1964 | static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) | ||
1965 | { | ||
1966 | int len; | ||
1967 | loff_t size = i_size_read(mpd->inode); | ||
1968 | int err; | ||
1969 | |||
1970 | BUG_ON(page->index != mpd->first_page); | ||
1971 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
1972 | len = size & ~PAGE_CACHE_MASK; | ||
1973 | else | ||
1974 | len = PAGE_CACHE_SIZE; | ||
1975 | clear_page_dirty_for_io(page); | ||
1976 | err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); | ||
1977 | if (!err) | ||
1978 | mpd->wbc->nr_to_write--; | ||
1979 | mpd->first_page++; | ||
1980 | |||
1981 | return err; | ||
1982 | } | ||
1983 | |||
1984 | /* | ||
1985 | * mpage_map_buffers - update buffers corresponding to changed extent and | ||
1986 | * submit fully mapped pages for IO | ||
1987 | * | ||
1988 | * @mpd - description of extent to map, on return next extent to map | ||
1989 | * | ||
1990 | * Scan buffers corresponding to changed extent (we expect corresponding pages | ||
1991 | * to be already locked) and update buffer state according to new extent state. | ||
1992 | * We map delalloc buffers to their physical location, clear unwritten bits, | ||
1993 | * and mark buffers as uninit when we perform writes to uninitialized extents | ||
1994 | * and do extent conversion after IO is finished. If the last page is not fully | ||
1995 | * mapped, we update @map to the next extent in the last page that needs | ||
1996 | * mapping. Otherwise we submit the page for IO. | ||
1997 | */ | ||
1998 | static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | ||
1999 | { | ||
2000 | struct pagevec pvec; | ||
2001 | int nr_pages, i; | ||
2002 | struct inode *inode = mpd->inode; | ||
2003 | struct buffer_head *head, *bh; | ||
2004 | int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; | ||
2005 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
2006 | >> inode->i_blkbits; | ||
2007 | pgoff_t start, end; | ||
2008 | ext4_lblk_t lblk; | ||
2009 | sector_t pblock; | ||
2010 | int err; | ||
2011 | |||
2012 | start = mpd->map.m_lblk >> bpp_bits; | ||
2013 | end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; | ||
2014 | lblk = start << bpp_bits; | ||
2015 | pblock = mpd->map.m_pblk; | ||
2016 | |||
2017 | pagevec_init(&pvec, 0); | ||
2018 | while (start <= end) { | ||
2019 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start, | ||
2020 | PAGEVEC_SIZE); | ||
2021 | if (nr_pages == 0) | ||
2022 | break; | ||
2023 | for (i = 0; i < nr_pages; i++) { | ||
2024 | struct page *page = pvec.pages[i]; | ||
2025 | |||
2026 | if (page->index > end) | ||
2027 | break; | ||
2028 | /* Upto 'end' pages must be contiguous */ | ||
2029 | BUG_ON(page->index != start); | ||
2030 | bh = head = page_buffers(page); | ||
2031 | do { | ||
2032 | if (lblk < mpd->map.m_lblk) | ||
2033 | continue; | ||
2034 | if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { | ||
2035 | /* | ||
2036 | * Buffer after end of mapped extent. | ||
2037 | * Find next buffer in the page to map. | ||
2038 | */ | ||
2039 | mpd->map.m_len = 0; | ||
2040 | mpd->map.m_flags = 0; | ||
2041 | add_page_bufs_to_extent(mpd, head, bh, | ||
2042 | lblk); | ||
2043 | pagevec_release(&pvec); | ||
2044 | return 0; | ||
2045 | } | ||
2046 | if (buffer_delay(bh)) { | ||
2047 | clear_buffer_delay(bh); | ||
2048 | bh->b_blocknr = pblock++; | ||
2049 | } | ||
2050 | if (mpd->map.m_flags & EXT4_MAP_UNINIT) | ||
2051 | set_buffer_uninit(bh); | ||
2052 | clear_buffer_unwritten(bh); | ||
2053 | } while (++lblk < blocks && | ||
2054 | (bh = bh->b_this_page) != head); | ||
2055 | |||
2056 | /* | ||
2057 | * FIXME: This is going to break if dioread_nolock | ||
2058 | * supports blocksize < pagesize as we will try to | ||
2059 | * convert potentially unmapped parts of inode. | ||
2060 | */ | ||
2061 | mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; | ||
2062 | /* Page fully mapped - let IO run! */ | ||
2063 | err = mpage_submit_page(mpd, page); | ||
2064 | if (err < 0) { | ||
2065 | pagevec_release(&pvec); | ||
2066 | return err; | ||
2067 | } | ||
2068 | start++; | ||
2069 | } | ||
2070 | pagevec_release(&pvec); | ||
2071 | } | ||
2072 | /* Extent fully mapped and matches with page boundary. We are done. */ | ||
2073 | mpd->map.m_len = 0; | ||
2074 | mpd->map.m_flags = 0; | ||
2075 | return 0; | ||
2076 | } | ||
2077 | |||
2078 | static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | ||
2079 | { | ||
2080 | struct inode *inode = mpd->inode; | ||
2081 | struct ext4_map_blocks *map = &mpd->map; | ||
2082 | int get_blocks_flags; | ||
2083 | int err; | ||
2084 | |||
2085 | trace_ext4_da_write_pages_extent(inode, map); | ||
2086 | /* | ||
2087 | * Call ext4_map_blocks() to allocate any delayed allocation blocks, or | ||
2088 | * to convert an uninitialized extent to be initialized (in the case | ||
2089 | * where we have written into one or more preallocated blocks). It is | ||
2090 | * possible that we're going to need more metadata blocks than | ||
2091 | * previously reserved. However we must not fail because we're in | ||
2092 | * writeback and there is nothing we can do about it so it might result | ||
2093 | * in data loss. So use reserved blocks to allocate metadata if | ||
2094 | * possible. | ||
2095 | * | ||
2096 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks | ||
2097 | * in question are delalloc blocks. This affects functions in many | ||
2098 | * different parts of the allocation call path. This flag exists | ||
2099 | * primarily because we don't want to change *many* call functions, so | ||
2100 | * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag | ||
2101 | * once the inode's allocation semaphore is taken. | ||
2102 | */ | ||
2103 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | ||
2104 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | ||
2105 | if (ext4_should_dioread_nolock(inode)) | ||
2106 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
2107 | if (map->m_flags & (1 << BH_Delay)) | ||
2108 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
2109 | |||
2110 | err = ext4_map_blocks(handle, inode, map, get_blocks_flags); | ||
2111 | if (err < 0) | ||
2112 | return err; | ||
2113 | |||
2114 | BUG_ON(map->m_len == 0); | ||
2115 | if (map->m_flags & EXT4_MAP_NEW) { | ||
2116 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
2117 | int i; | ||
2118 | |||
2119 | for (i = 0; i < map->m_len; i++) | ||
2120 | unmap_underlying_metadata(bdev, map->m_pblk + i); | ||
2121 | } | ||
2122 | return 0; | ||
2123 | } | ||
2124 | |||
2125 | /* | ||
2126 | * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length | ||
2127 | * mpd->len and submit pages underlying it for IO | ||
2128 | * | ||
2129 | * @handle - handle for journal operations | ||
2130 | * @mpd - extent to map | ||
2131 | * | ||
2132 | * The function maps extent starting at mpd->lblk of length mpd->len. If it is | ||
2133 | * delayed, blocks are allocated, if it is unwritten, we may need to convert | ||
2134 | * them to initialized or split the described range from larger unwritten | ||
2135 | * extent. Note that we need not map all the described range since allocation | ||
2136 | * can return less blocks or the range is covered by more unwritten extents. We | ||
2137 | * cannot map more because we are limited by reserved transaction credits. On | ||
2138 | * the other hand we always make sure that the last touched page is fully | ||
2139 | * mapped so that it can be written out (and thus forward progress is | ||
2140 | * guaranteed). After mapping we submit all mapped pages for IO. | ||
2141 | */ | ||
2142 | static int mpage_map_and_submit_extent(handle_t *handle, | ||
2143 | struct mpage_da_data *mpd) | ||
2144 | { | ||
2145 | struct inode *inode = mpd->inode; | ||
2146 | struct ext4_map_blocks *map = &mpd->map; | ||
2147 | int err; | ||
2148 | loff_t disksize; | ||
2149 | |||
2150 | mpd->io_submit.io_end->offset = | ||
2151 | ((loff_t)map->m_lblk) << inode->i_blkbits; | ||
2152 | while (map->m_len) { | ||
2153 | err = mpage_map_one_extent(handle, mpd); | ||
2154 | if (err < 0) { | ||
2155 | struct super_block *sb = inode->i_sb; | ||
2156 | |||
2157 | /* | ||
2158 | * Need to commit transaction to free blocks. Let upper | ||
2159 | * layers sort it out. | ||
2160 | */ | ||
2161 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) | ||
2162 | return -ENOSPC; | ||
2163 | |||
2164 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { | ||
2165 | ext4_msg(sb, KERN_CRIT, | ||
2166 | "Delayed block allocation failed for " | ||
2167 | "inode %lu at logical offset %llu with" | ||
2168 | " max blocks %u with error %d", | ||
2169 | inode->i_ino, | ||
2170 | (unsigned long long)map->m_lblk, | ||
2171 | (unsigned)map->m_len, err); | ||
2172 | ext4_msg(sb, KERN_CRIT, | ||
2173 | "This should not happen!! Data will " | ||
2174 | "be lost\n"); | ||
2175 | if (err == -ENOSPC) | ||
2176 | ext4_print_free_blocks(inode); | ||
2177 | } | ||
2178 | /* invalidate all the pages */ | ||
2179 | mpage_release_unused_pages(mpd, true); | ||
2180 | return err; | ||
2181 | } | ||
2182 | /* | ||
2183 | * Update buffer state, submit mapped pages, and get us new | ||
2184 | * extent to map | ||
2185 | */ | ||
2186 | err = mpage_map_and_submit_buffers(mpd); | ||
2187 | if (err < 0) | ||
2188 | return err; | ||
2189 | } | ||
2190 | |||
2191 | /* Update on-disk size after IO is submitted */ | ||
2192 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; | ||
2193 | if (disksize > i_size_read(inode)) | ||
2194 | disksize = i_size_read(inode); | ||
2195 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2196 | int err2; | ||
2197 | |||
2198 | ext4_update_i_disksize(inode, disksize); | ||
2199 | err2 = ext4_mark_inode_dirty(handle, inode); | ||
2200 | if (err2) | ||
2201 | ext4_error(inode->i_sb, | ||
2202 | "Failed to mark inode %lu dirty", | ||
2203 | inode->i_ino); | ||
2204 | if (!err) | ||
2205 | err = err2; | ||
2206 | } | ||
2207 | return err; | ||
2208 | } | ||
2209 | |||
2210 | /* | ||
2215 | * Calculate the total number of credits to reserve for one writepages | 2211 | * Calculate the total number of credits to reserve for one writepages |
2216 | * iteration. This is called from ext4_da_writepages(). We map an extent of | 2212 | * iteration. This is called from ext4_da_writepages(). We map an extent of |
2217 | * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping | 2213 | * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping |
@@ -2227,44 +2223,49 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2227 | } | 2223 | } |
2228 | 2224 | ||
2229 | /* | 2225 | /* |
2230 | * write_cache_pages_da - walk the list of dirty pages of the given | 2226 | * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages |
2231 | * address space and accumulate pages that need writing, and call | 2227 | * and underlying extent to map |
2232 | * mpage_da_map_and_submit to map a single contiguous memory region | 2228 | * |
2233 | * and then write them. | 2229 | * @mpd - where to look for pages |
2230 | * | ||
2231 | * Walk dirty pages in the mapping. If they are fully mapped, submit them for | ||
2232 | * IO immediately. When we find a page which isn't mapped we start accumulating | ||
2233 | * extent of buffers underlying these pages that needs mapping (formed by | ||
2234 | * either delayed or unwritten buffers). We also lock the pages containing | ||
2235 | * these buffers. The extent found is returned in @mpd structure (starting at | ||
2236 | * mpd->lblk with length mpd->len blocks). | ||
2237 | * | ||
2238 | * Note that this function can attach bios to one io_end structure which are | ||
2239 | * neither logically nor physically contiguous. Although it may seem as an | ||
2240 | * unnecessary complication, it is actually inevitable in blocksize < pagesize | ||
2241 | * case as we need to track IO to all buffers underlying a page in one io_end. | ||
2234 | */ | 2242 | */ |
2235 | static int write_cache_pages_da(handle_t *handle, | 2243 | static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) |
2236 | struct address_space *mapping, | ||
2237 | struct writeback_control *wbc, | ||
2238 | struct mpage_da_data *mpd, | ||
2239 | pgoff_t *done_index) | ||
2240 | { | 2244 | { |
2241 | struct buffer_head *bh, *head; | 2245 | struct address_space *mapping = mpd->inode->i_mapping; |
2242 | struct inode *inode = mapping->host; | 2246 | struct pagevec pvec; |
2243 | struct pagevec pvec; | 2247 | unsigned int nr_pages; |
2244 | unsigned int nr_pages; | 2248 | pgoff_t index = mpd->first_page; |
2245 | sector_t logical; | 2249 | pgoff_t end = mpd->last_page; |
2246 | pgoff_t index, end; | 2250 | int tag; |
2247 | long nr_to_write = wbc->nr_to_write; | 2251 | int i, err = 0; |
2248 | int i, tag, ret = 0; | 2252 | int blkbits = mpd->inode->i_blkbits; |
2249 | 2253 | ext4_lblk_t lblk; | |
2250 | memset(mpd, 0, sizeof(struct mpage_da_data)); | 2254 | struct buffer_head *head; |
2251 | mpd->wbc = wbc; | ||
2252 | mpd->inode = inode; | ||
2253 | pagevec_init(&pvec, 0); | ||
2254 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
2255 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2256 | 2255 | ||
2257 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2256 | if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) |
2258 | tag = PAGECACHE_TAG_TOWRITE; | 2257 | tag = PAGECACHE_TAG_TOWRITE; |
2259 | else | 2258 | else |
2260 | tag = PAGECACHE_TAG_DIRTY; | 2259 | tag = PAGECACHE_TAG_DIRTY; |
2261 | 2260 | ||
2262 | *done_index = index; | 2261 | pagevec_init(&pvec, 0); |
2262 | mpd->map.m_len = 0; | ||
2263 | mpd->next_page = index; | ||
2263 | while (index <= end) { | 2264 | while (index <= end) { |
2264 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2265 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2265 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2266 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2266 | if (nr_pages == 0) | 2267 | if (nr_pages == 0) |
2267 | return 0; | 2268 | goto out; |
2268 | 2269 | ||
2269 | for (i = 0; i < nr_pages; i++) { | 2270 | for (i = 0; i < nr_pages; i++) { |
2270 | struct page *page = pvec.pages[i]; | 2271 | struct page *page = pvec.pages[i]; |
@@ -2279,31 +2280,21 @@ static int write_cache_pages_da(handle_t *handle, | |||
2279 | if (page->index > end) | 2280 | if (page->index > end) |
2280 | goto out; | 2281 | goto out; |
2281 | 2282 | ||
2282 | *done_index = page->index + 1; | 2283 | /* If we can't merge this page, we are done. */ |
2283 | 2284 | if (mpd->map.m_len > 0 && mpd->next_page != page->index) | |
2284 | /* | 2285 | goto out; |
2285 | * If we can't merge this page, and we have | ||
2286 | * accumulated an contiguous region, write it | ||
2287 | */ | ||
2288 | if ((mpd->next_page != page->index) && | ||
2289 | (mpd->next_page != mpd->first_page)) { | ||
2290 | mpage_da_map_and_submit(mpd); | ||
2291 | goto ret_extent_tail; | ||
2292 | } | ||
2293 | 2286 | ||
2294 | lock_page(page); | 2287 | lock_page(page); |
2295 | |||
2296 | /* | 2288 | /* |
2297 | * If the page is no longer dirty, or its | 2289 | * If the page is no longer dirty, or its mapping no |
2298 | * mapping no longer corresponds to inode we | 2290 | * longer corresponds to inode we are writing (which |
2299 | * are writing (which means it has been | 2291 | * means it has been truncated or invalidated), or the |
2300 | * truncated or invalidated), or the page is | 2292 | * page is already under writeback and we are not doing |
2301 | * already under writeback and we are not | 2293 | * a data integrity writeback, skip the page |
2302 | * doing a data integrity writeback, skip the page | ||
2303 | */ | 2294 | */ |
2304 | if (!PageDirty(page) || | 2295 | if (!PageDirty(page) || |
2305 | (PageWriteback(page) && | 2296 | (PageWriteback(page) && |
2306 | (wbc->sync_mode == WB_SYNC_NONE)) || | 2297 | (mpd->wbc->sync_mode == WB_SYNC_NONE)) || |
2307 | unlikely(page->mapping != mapping)) { | 2298 | unlikely(page->mapping != mapping)) { |
2308 | unlock_page(page); | 2299 | unlock_page(page); |
2309 | continue; | 2300 | continue; |
@@ -2312,101 +2303,57 @@ static int write_cache_pages_da(handle_t *handle, | |||
2312 | wait_on_page_writeback(page); | 2303 | wait_on_page_writeback(page); |
2313 | BUG_ON(PageWriteback(page)); | 2304 | BUG_ON(PageWriteback(page)); |
2314 | 2305 | ||
2315 | /* | 2306 | if (mpd->map.m_len == 0) |
2316 | * If we have inline data and arrive here, it means that | ||
2317 | * we will soon create the block for the 1st page, so | ||
2318 | * we'd better clear the inline data here. | ||
2319 | */ | ||
2320 | if (ext4_has_inline_data(inode)) { | ||
2321 | BUG_ON(ext4_test_inode_state(inode, | ||
2322 | EXT4_STATE_MAY_INLINE_DATA)); | ||
2323 | ext4_destroy_inline_data(handle, inode); | ||
2324 | } | ||
2325 | |||
2326 | if (mpd->next_page != page->index) | ||
2327 | mpd->first_page = page->index; | 2307 | mpd->first_page = page->index; |
2328 | mpd->next_page = page->index + 1; | 2308 | mpd->next_page = page->index + 1; |
2329 | logical = (sector_t) page->index << | ||
2330 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2331 | |||
2332 | /* Add all dirty buffers to mpd */ | 2309 | /* Add all dirty buffers to mpd */ |
2310 | lblk = ((ext4_lblk_t)page->index) << | ||
2311 | (PAGE_CACHE_SHIFT - blkbits); | ||
2333 | head = page_buffers(page); | 2312 | head = page_buffers(page); |
2334 | bh = head; | 2313 | if (!add_page_bufs_to_extent(mpd, head, head, lblk)) |
2335 | do { | 2314 | goto out; |
2336 | BUG_ON(buffer_locked(bh)); | 2315 | /* So far everything mapped? Submit the page for IO. */ |
2337 | /* | 2316 | if (mpd->map.m_len == 0) { |
2338 | * We need to try to allocate unmapped blocks | 2317 | err = mpage_submit_page(mpd, page); |
2339 | * in the same page. Otherwise we won't make | 2318 | if (err < 0) |
2340 | * progress with the page in ext4_writepage | ||
2341 | */ | ||
2342 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2343 | mpage_add_bh_to_extent(mpd, logical, | ||
2344 | bh->b_state); | ||
2345 | if (mpd->io_done) | ||
2346 | goto ret_extent_tail; | ||
2347 | } else if (buffer_dirty(bh) && | ||
2348 | buffer_mapped(bh)) { | ||
2349 | /* | ||
2350 | * mapped dirty buffer. We need to | ||
2351 | * update the b_state because we look | ||
2352 | * at b_state in mpage_da_map_blocks. | ||
2353 | * We don't update b_size because if we | ||
2354 | * find an unmapped buffer_head later | ||
2355 | * we need to use the b_state flag of | ||
2356 | * that buffer_head. | ||
2357 | */ | ||
2358 | if (mpd->b_size == 0) | ||
2359 | mpd->b_state = | ||
2360 | bh->b_state & BH_FLAGS; | ||
2361 | } | ||
2362 | logical++; | ||
2363 | } while ((bh = bh->b_this_page) != head); | ||
2364 | |||
2365 | if (nr_to_write > 0) { | ||
2366 | nr_to_write--; | ||
2367 | if (nr_to_write == 0 && | ||
2368 | wbc->sync_mode == WB_SYNC_NONE) | ||
2369 | /* | ||
2370 | * We stop writing back only if we are | ||
2371 | * not doing integrity sync. In case of | ||
2372 | * integrity sync we have to keep going | ||
2373 | * because someone may be concurrently | ||
2374 | * dirtying pages, and we might have | ||
2375 | * synced a lot of newly appeared dirty | ||
2376 | * pages, but have not synced all of the | ||
2377 | * old dirty pages. | ||
2378 | */ | ||
2379 | goto out; | 2319 | goto out; |
2380 | } | 2320 | } |
2321 | |||
2322 | /* | ||
2323 | * Accumulated enough dirty pages? This doesn't apply | ||
2324 | * to WB_SYNC_ALL mode. For integrity sync we have to | ||
2325 | * keep going because someone may be concurrently | ||
2326 | * dirtying pages, and we might have synced a lot of | ||
2327 | * newly appeared dirty pages, but have not synced all | ||
2328 | * of the old dirty pages. | ||
2329 | */ | ||
2330 | if (mpd->wbc->sync_mode == WB_SYNC_NONE && | ||
2331 | mpd->next_page - mpd->first_page >= | ||
2332 | mpd->wbc->nr_to_write) | ||
2333 | goto out; | ||
2381 | } | 2334 | } |
2382 | pagevec_release(&pvec); | 2335 | pagevec_release(&pvec); |
2383 | cond_resched(); | 2336 | cond_resched(); |
2384 | } | 2337 | } |
2385 | return 0; | 2338 | return 0; |
2386 | ret_extent_tail: | ||
2387 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2388 | out: | 2339 | out: |
2389 | pagevec_release(&pvec); | 2340 | pagevec_release(&pvec); |
2390 | cond_resched(); | 2341 | return err; |
2391 | return ret; | ||
2392 | } | 2342 | } |
2393 | 2343 | ||
2394 | |||
2395 | static int ext4_da_writepages(struct address_space *mapping, | 2344 | static int ext4_da_writepages(struct address_space *mapping, |
2396 | struct writeback_control *wbc) | 2345 | struct writeback_control *wbc) |
2397 | { | 2346 | { |
2398 | pgoff_t index; | 2347 | pgoff_t writeback_index = 0; |
2348 | long nr_to_write = wbc->nr_to_write; | ||
2399 | int range_whole = 0; | 2349 | int range_whole = 0; |
2350 | int cycled = 1; | ||
2400 | handle_t *handle = NULL; | 2351 | handle_t *handle = NULL; |
2401 | struct mpage_da_data mpd; | 2352 | struct mpage_da_data mpd; |
2402 | struct inode *inode = mapping->host; | 2353 | struct inode *inode = mapping->host; |
2403 | int pages_written = 0; | ||
2404 | int range_cyclic, cycled = 1, io_done = 0; | ||
2405 | int needed_blocks, ret = 0; | 2354 | int needed_blocks, ret = 0; |
2406 | loff_t range_start = wbc->range_start; | ||
2407 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2355 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2408 | pgoff_t done_index = 0; | 2356 | bool done; |
2409 | pgoff_t end; | ||
2410 | struct blk_plug plug; | 2357 | struct blk_plug plug; |
2411 | 2358 | ||
2412 | trace_ext4_da_writepages(inode, wbc); | 2359 | trace_ext4_da_writepages(inode, wbc); |
@@ -2432,40 +2379,65 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2432 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) | 2379 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
2433 | return -EROFS; | 2380 | return -EROFS; |
2434 | 2381 | ||
2382 | /* | ||
2383 | * If we have inline data and arrive here, it means that | ||
2384 | * we will soon create the block for the 1st page, so | ||
2385 | * we'd better clear the inline data here. | ||
2386 | */ | ||
2387 | if (ext4_has_inline_data(inode)) { | ||
2388 | /* Just inode will be modified... */ | ||
2389 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); | ||
2390 | if (IS_ERR(handle)) { | ||
2391 | ret = PTR_ERR(handle); | ||
2392 | goto out_writepages; | ||
2393 | } | ||
2394 | BUG_ON(ext4_test_inode_state(inode, | ||
2395 | EXT4_STATE_MAY_INLINE_DATA)); | ||
2396 | ext4_destroy_inline_data(handle, inode); | ||
2397 | ext4_journal_stop(handle); | ||
2398 | } | ||
2399 | |||
2435 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2400 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
2436 | range_whole = 1; | 2401 | range_whole = 1; |
2437 | 2402 | ||
2438 | range_cyclic = wbc->range_cyclic; | ||
2439 | if (wbc->range_cyclic) { | 2403 | if (wbc->range_cyclic) { |
2440 | index = mapping->writeback_index; | 2404 | writeback_index = mapping->writeback_index; |
2441 | if (index) | 2405 | if (writeback_index) |
2442 | cycled = 0; | 2406 | cycled = 0; |
2443 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2407 | mpd.first_page = writeback_index; |
2444 | wbc->range_end = LLONG_MAX; | 2408 | mpd.last_page = -1; |
2445 | wbc->range_cyclic = 0; | ||
2446 | end = -1; | ||
2447 | } else { | 2409 | } else { |
2448 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2410 | mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; |
2449 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2411 | mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; |
2450 | } | 2412 | } |
2451 | 2413 | ||
2414 | mpd.inode = inode; | ||
2415 | mpd.wbc = wbc; | ||
2416 | ext4_io_submit_init(&mpd.io_submit, wbc); | ||
2452 | retry: | 2417 | retry: |
2453 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2418 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2454 | tag_pages_for_writeback(mapping, index, end); | 2419 | tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); |
2455 | 2420 | done = false; | |
2456 | blk_start_plug(&plug); | 2421 | blk_start_plug(&plug); |
2457 | while (!ret && wbc->nr_to_write > 0) { | 2422 | while (!done && mpd.first_page <= mpd.last_page) { |
2423 | /* For each extent of pages we use new io_end */ | ||
2424 | mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); | ||
2425 | if (!mpd.io_submit.io_end) { | ||
2426 | ret = -ENOMEM; | ||
2427 | break; | ||
2428 | } | ||
2458 | 2429 | ||
2459 | /* | 2430 | /* |
2460 | * we insert one extent at a time. So we need | 2431 | * We have two constraints: We find one extent to map and we |
2461 | * credit needed for single extent allocation. | 2432 | * must always write out whole page (makes a difference when |
2462 | * journalled mode is currently not supported | 2433 | * blocksize < pagesize) so that we don't block on IO when we |
2463 | * by delalloc | 2434 | * try to write out the rest of the page. Journalled mode is |
2435 | * not supported by delalloc. | ||
2464 | */ | 2436 | */ |
2465 | BUG_ON(ext4_should_journal_data(inode)); | 2437 | BUG_ON(ext4_should_journal_data(inode)); |
2466 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | 2438 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2467 | 2439 | ||
2468 | /* start a new transaction*/ | 2440 | /* start a new transaction */ |
2469 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, | 2441 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, |
2470 | needed_blocks); | 2442 | needed_blocks); |
2471 | if (IS_ERR(handle)) { | 2443 | if (IS_ERR(handle)) { |
@@ -2473,76 +2445,67 @@ retry: | |||
2473 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2445 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2474 | "%ld pages, ino %lu; err %d", __func__, | 2446 | "%ld pages, ino %lu; err %d", __func__, |
2475 | wbc->nr_to_write, inode->i_ino, ret); | 2447 | wbc->nr_to_write, inode->i_ino, ret); |
2476 | blk_finish_plug(&plug); | 2448 | /* Release allocated io_end */ |
2477 | goto out_writepages; | 2449 | ext4_put_io_end(mpd.io_submit.io_end); |
2450 | break; | ||
2478 | } | 2451 | } |
2479 | 2452 | ||
2480 | /* | 2453 | trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); |
2481 | * Now call write_cache_pages_da() to find the next | 2454 | ret = mpage_prepare_extent_to_map(&mpd); |
2482 | * contiguous region of logical blocks that need | 2455 | if (!ret) { |
2483 | * blocks to be allocated by ext4 and submit them. | 2456 | if (mpd.map.m_len) |
2484 | */ | 2457 | ret = mpage_map_and_submit_extent(handle, &mpd); |
2485 | ret = write_cache_pages_da(handle, mapping, | 2458 | else { |
2486 | wbc, &mpd, &done_index); | 2459 | /* |
2487 | /* | 2460 | * We scanned the whole range (or exhausted |
2488 | * If we have a contiguous extent of pages and we | 2461 | * nr_to_write), submitted what was mapped and |
2489 | * haven't done the I/O yet, map the blocks and submit | 2462 | * didn't find anything needing mapping. We are |
2490 | * them for I/O. | 2463 | * done. |
2491 | */ | 2464 | */ |
2492 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 2465 | done = true; |
2493 | mpage_da_map_and_submit(&mpd); | 2466 | } |
2494 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2495 | } | 2467 | } |
2496 | trace_ext4_da_write_pages(inode, &mpd); | ||
2497 | wbc->nr_to_write -= mpd.pages_written; | ||
2498 | |||
2499 | ext4_journal_stop(handle); | 2468 | ext4_journal_stop(handle); |
2500 | 2469 | /* Submit prepared bio */ | |
2501 | if ((mpd.retval == -ENOSPC) && sbi->s_journal) { | 2470 | ext4_io_submit(&mpd.io_submit); |
2502 | /* commit the transaction which would | 2471 | /* Unlock pages we didn't use */ |
2472 | mpage_release_unused_pages(&mpd, false); | ||
2473 | /* Drop our io_end reference we got from init */ | ||
2474 | ext4_put_io_end(mpd.io_submit.io_end); | ||
2475 | |||
2476 | if (ret == -ENOSPC && sbi->s_journal) { | ||
2477 | /* | ||
2478 | * Commit the transaction which would | ||
2503 | * free blocks released in the transaction | 2479 | * free blocks released in the transaction |
2504 | * and try again | 2480 | * and try again |
2505 | */ | 2481 | */ |
2506 | jbd2_journal_force_commit_nested(sbi->s_journal); | 2482 | jbd2_journal_force_commit_nested(sbi->s_journal); |
2507 | ret = 0; | 2483 | ret = 0; |
2508 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 2484 | continue; |
2509 | /* | 2485 | } |
2510 | * Got one extent now try with rest of the pages. | 2486 | /* Fatal error - ENOMEM, EIO... */ |
2511 | * If mpd.retval is set -EIO, journal is aborted. | 2487 | if (ret) |
2512 | * So we don't need to write any more. | ||
2513 | */ | ||
2514 | pages_written += mpd.pages_written; | ||
2515 | ret = mpd.retval; | ||
2516 | io_done = 1; | ||
2517 | } else if (wbc->nr_to_write) | ||
2518 | /* | ||
2519 | * There is no more writeout needed | ||
2520 | * or we requested for a noblocking writeout | ||
2521 | * and we found the device congested | ||
2522 | */ | ||
2523 | break; | 2488 | break; |
2524 | } | 2489 | } |
2525 | blk_finish_plug(&plug); | 2490 | blk_finish_plug(&plug); |
2526 | if (!io_done && !cycled) { | 2491 | if (!ret && !cycled) { |
2527 | cycled = 1; | 2492 | cycled = 1; |
2528 | index = 0; | 2493 | mpd.last_page = writeback_index - 1; |
2529 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2494 | mpd.first_page = 0; |
2530 | wbc->range_end = mapping->writeback_index - 1; | ||
2531 | goto retry; | 2495 | goto retry; |
2532 | } | 2496 | } |
2533 | 2497 | ||
2534 | /* Update index */ | 2498 | /* Update index */ |
2535 | wbc->range_cyclic = range_cyclic; | ||
2536 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 2499 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
2537 | /* | 2500 | /* |
2538 | * set the writeback_index so that range_cyclic | 2501 | * Set the writeback_index so that range_cyclic |
2539 | * mode will write it back later | 2502 | * mode will write it back later |
2540 | */ | 2503 | */ |
2541 | mapping->writeback_index = done_index; | 2504 | mapping->writeback_index = mpd.first_page; |
2542 | 2505 | ||
2543 | out_writepages: | 2506 | out_writepages: |
2544 | wbc->range_start = range_start; | 2507 | trace_ext4_da_writepages_result(inode, wbc, ret, |
2545 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 2508 | nr_to_write - wbc->nr_to_write); |
2546 | return ret; | 2509 | return ret; |
2547 | } | 2510 | } |
2548 | 2511 | ||
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 19599bded62a..3e5854625126 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -360,9 +360,6 @@ static int io_submit_init_bio(struct ext4_io_submit *io, | |||
360 | bio->bi_bdev = bh->b_bdev; | 360 | bio->bi_bdev = bh->b_bdev; |
361 | bio->bi_end_io = ext4_end_bio; | 361 | bio->bi_end_io = ext4_end_bio; |
362 | bio->bi_private = ext4_get_io_end(io->io_end); | 362 | bio->bi_private = ext4_get_io_end(io->io_end); |
363 | if (!io->io_end->size) | ||
364 | io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT) | ||
365 | + bh_offset(bh); | ||
366 | io->io_bio = bio; | 363 | io->io_bio = bio; |
367 | io->io_next_block = bh->b_blocknr; | 364 | io->io_next_block = bh->b_blocknr; |
368 | return 0; | 365 | return 0; |
@@ -390,7 +387,6 @@ submit_and_retry: | |||
390 | io_end = io->io_end; | 387 | io_end = io->io_end; |
391 | if (test_clear_buffer_uninit(bh)) | 388 | if (test_clear_buffer_uninit(bh)) |
392 | ext4_set_io_unwritten_flag(inode, io_end); | 389 | ext4_set_io_unwritten_flag(inode, io_end); |
393 | io_end->size += bh->b_size; | ||
394 | io->io_next_block++; | 390 | io->io_next_block++; |
395 | return 0; | 391 | return 0; |
396 | } | 392 | } |