aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h15
-rw-r--r--fs/ext4/inode.c1011
-rw-r--r--fs/ext4/page-io.c4
3 files changed, 487 insertions, 543 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2ebfcde5a156..90a164f365c4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -177,21 +177,6 @@ struct ext4_map_blocks {
177}; 177};
178 178
179/* 179/*
180 * For delayed allocation tracking
181 */
182struct mpage_da_data {
183 struct inode *inode;
184 sector_t b_blocknr; /* start block number of extent */
185 size_t b_size; /* size of extent */
186 unsigned long b_state; /* state of the extent */
187 unsigned long first_page, next_page; /* extent of pages */
188 struct writeback_control *wbc;
189 int io_done;
190 int pages_written;
191 int retval;
192};
193
194/*
195 * Flags for ext4_io_end->flags 180 * Flags for ext4_io_end->flags
196 */ 181 */
197#define EXT4_IO_END_UNWRITTEN 0x0001 182#define EXT4_IO_END_UNWRITTEN 0x0001
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2b777e51b677..5939a4742def 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1407,149 +1407,42 @@ static void ext4_da_page_release_reservation(struct page *page,
1407 * Delayed allocation stuff 1407 * Delayed allocation stuff
1408 */ 1408 */
1409 1409
1410static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd); 1410struct mpage_da_data {
1411 1411 struct inode *inode;
1412/* 1412 struct writeback_control *wbc;
1413 * mpage_da_submit_io - walks through extent of pages and try to write 1413 pgoff_t first_page; /* The first page to write */
1414 * them with writepage() call back 1414 pgoff_t next_page; /* Current page to examine */
1415 * 1415 pgoff_t last_page; /* Last page to examine */
1416 * @mpd->inode: inode
1417 * @mpd->first_page: first page of the extent
1418 * @mpd->next_page: page after the last page of the extent
1419 *
1420 * By the time mpage_da_submit_io() is called we expect all blocks
1421 * to be allocated. this may be wrong if allocation failed.
1422 *
1423 * As pages are already locked by write_cache_pages(), we can't use it
1424 */
1425static int mpage_da_submit_io(struct mpage_da_data *mpd,
1426 struct ext4_map_blocks *map)
1427{
1428 struct pagevec pvec;
1429 unsigned long index, end;
1430 int ret = 0, err, nr_pages, i;
1431 struct inode *inode = mpd->inode;
1432 struct address_space *mapping = inode->i_mapping;
1433 loff_t size = i_size_read(inode);
1434 unsigned int len, block_start;
1435 struct buffer_head *bh, *page_bufs = NULL;
1436 sector_t pblock = 0, cur_logical = 0;
1437 struct ext4_io_submit io_submit;
1438
1439 BUG_ON(mpd->next_page <= mpd->first_page);
1440 ext4_io_submit_init(&io_submit, mpd->wbc);
1441 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
1442 if (!io_submit.io_end) {
1443 ext4_da_block_invalidatepages(mpd);
1444 return -ENOMEM;
1445 }
1446 /* 1416 /*
1447 * We need to start from the first_page to the next_page - 1 1417 * Extent to map - this can be after first_page because that can be
1448 * to make sure we also write the mapped dirty buffer_heads. 1418 * fully mapped. We somewhat abuse m_flags to store whether the extent
1449 * If we look at mpd->b_blocknr we would only be looking 1419 * is delalloc or unwritten.
1450 * at the currently mapped buffer_heads.
1451 */ 1420 */
1452 index = mpd->first_page; 1421 struct ext4_map_blocks map;
1453 end = mpd->next_page - 1; 1422 struct ext4_io_submit io_submit; /* IO submission data */
1454 1423};
1455 pagevec_init(&pvec, 0);
1456 while (index <= end) {
1457 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1458 if (nr_pages == 0)
1459 break;
1460 for (i = 0; i < nr_pages; i++) {
1461 int skip_page = 0;
1462 struct page *page = pvec.pages[i];
1463
1464 index = page->index;
1465 if (index > end)
1466 break;
1467
1468 if (index == size >> PAGE_CACHE_SHIFT)
1469 len = size & ~PAGE_CACHE_MASK;
1470 else
1471 len = PAGE_CACHE_SIZE;
1472 if (map) {
1473 cur_logical = index << (PAGE_CACHE_SHIFT -
1474 inode->i_blkbits);
1475 pblock = map->m_pblk + (cur_logical -
1476 map->m_lblk);
1477 }
1478 index++;
1479
1480 BUG_ON(!PageLocked(page));
1481 BUG_ON(PageWriteback(page));
1482
1483 bh = page_bufs = page_buffers(page);
1484 block_start = 0;
1485 do {
1486 if (map && (cur_logical >= map->m_lblk) &&
1487 (cur_logical <= (map->m_lblk +
1488 (map->m_len - 1)))) {
1489 if (buffer_delay(bh)) {
1490 clear_buffer_delay(bh);
1491 bh->b_blocknr = pblock;
1492 }
1493 if (buffer_unwritten(bh) ||
1494 buffer_mapped(bh))
1495 BUG_ON(bh->b_blocknr != pblock);
1496 if (map->m_flags & EXT4_MAP_UNINIT)
1497 set_buffer_uninit(bh);
1498 clear_buffer_unwritten(bh);
1499 }
1500
1501 /*
1502 * skip page if block allocation undone and
1503 * block is dirty
1504 */
1505 if (ext4_bh_delay_or_unwritten(NULL, bh))
1506 skip_page = 1;
1507 bh = bh->b_this_page;
1508 block_start += bh->b_size;
1509 cur_logical++;
1510 pblock++;
1511 } while (bh != page_bufs);
1512
1513 if (skip_page) {
1514 unlock_page(page);
1515 continue;
1516 }
1517
1518 clear_page_dirty_for_io(page);
1519 err = ext4_bio_write_page(&io_submit, page, len,
1520 mpd->wbc);
1521 if (!err)
1522 mpd->pages_written++;
1523 /*
1524 * In error case, we have to continue because
1525 * remaining pages are still locked
1526 */
1527 if (ret == 0)
1528 ret = err;
1529 }
1530 pagevec_release(&pvec);
1531 }
1532 ext4_io_submit(&io_submit);
1533 /* Drop io_end reference we got from init */
1534 ext4_put_io_end_defer(io_submit.io_end);
1535 return ret;
1536}
1537 1424
1538static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) 1425static void mpage_release_unused_pages(struct mpage_da_data *mpd,
1426 bool invalidate)
1539{ 1427{
1540 int nr_pages, i; 1428 int nr_pages, i;
1541 pgoff_t index, end; 1429 pgoff_t index, end;
1542 struct pagevec pvec; 1430 struct pagevec pvec;
1543 struct inode *inode = mpd->inode; 1431 struct inode *inode = mpd->inode;
1544 struct address_space *mapping = inode->i_mapping; 1432 struct address_space *mapping = inode->i_mapping;
1545 ext4_lblk_t start, last; 1433
1434 /* This is necessary when next_page == 0. */
1435 if (mpd->first_page >= mpd->next_page)
1436 return;
1546 1437
1547 index = mpd->first_page; 1438 index = mpd->first_page;
1548 end = mpd->next_page - 1; 1439 end = mpd->next_page - 1;
1549 1440 if (invalidate) {
1550 start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 1441 ext4_lblk_t start, last;
1551 last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); 1442 start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1552 ext4_es_remove_extent(inode, start, last - start + 1); 1443 last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1444 ext4_es_remove_extent(inode, start, last - start + 1);
1445 }
1553 1446
1554 pagevec_init(&pvec, 0); 1447 pagevec_init(&pvec, 0);
1555 while (index <= end) { 1448 while (index <= end) {
@@ -1562,14 +1455,15 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
1562 break; 1455 break;
1563 BUG_ON(!PageLocked(page)); 1456 BUG_ON(!PageLocked(page));
1564 BUG_ON(PageWriteback(page)); 1457 BUG_ON(PageWriteback(page));
1565 block_invalidatepage(page, 0, PAGE_CACHE_SIZE); 1458 if (invalidate) {
1566 ClearPageUptodate(page); 1459 block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
1460 ClearPageUptodate(page);
1461 }
1567 unlock_page(page); 1462 unlock_page(page);
1568 } 1463 }
1569 index = pvec.pages[nr_pages - 1]->index + 1; 1464 index = pvec.pages[nr_pages - 1]->index + 1;
1570 pagevec_release(&pvec); 1465 pagevec_release(&pvec);
1571 } 1466 }
1572 return;
1573} 1467}
1574 1468
1575static void ext4_print_free_blocks(struct inode *inode) 1469static void ext4_print_free_blocks(struct inode *inode)
@@ -1598,215 +1492,6 @@ static void ext4_print_free_blocks(struct inode *inode)
1598 return; 1492 return;
1599} 1493}
1600 1494
1601/*
1602 * mpage_da_map_and_submit - go through given space, map them
1603 * if necessary, and then submit them for I/O
1604 *
1605 * @mpd - bh describing space
1606 *
1607 * The function skips space we know is already mapped to disk blocks.
1608 *
1609 */
1610static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1611{
1612 int err, blks, get_blocks_flags;
1613 struct ext4_map_blocks map, *mapp = NULL;
1614 sector_t next = mpd->b_blocknr;
1615 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
1616 loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
1617 handle_t *handle = NULL;
1618
1619 /*
1620 * If the blocks are mapped already, or we couldn't accumulate
1621 * any blocks, then proceed immediately to the submission stage.
1622 */
1623 if ((mpd->b_size == 0) ||
1624 ((mpd->b_state & (1 << BH_Mapped)) &&
1625 !(mpd->b_state & (1 << BH_Delay)) &&
1626 !(mpd->b_state & (1 << BH_Unwritten))))
1627 goto submit_io;
1628
1629 handle = ext4_journal_current_handle();
1630 BUG_ON(!handle);
1631
1632 /*
1633 * Call ext4_map_blocks() to allocate any delayed allocation
1634 * blocks, or to convert an uninitialized extent to be
1635 * initialized (in the case where we have written into
1636 * one or more preallocated blocks).
1637 *
1638 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
1639 * indicate that we are on the delayed allocation path. This
1640 * affects functions in many different parts of the allocation
1641 * call path. This flag exists primarily because we don't
1642 * want to change *many* call functions, so ext4_map_blocks()
1643 * will set the EXT4_STATE_DELALLOC_RESERVED flag once the
1644 * inode's allocation semaphore is taken.
1645 *
1646 * If the blocks in questions were delalloc blocks, set
1647 * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
1648 * variables are updated after the blocks have been allocated.
1649 */
1650 map.m_lblk = next;
1651 map.m_len = max_blocks;
1652 /*
1653 * We're in delalloc path and it is possible that we're going to
1654 * need more metadata blocks than previously reserved. However
1655 * we must not fail because we're in writeback and there is
1656 * nothing we can do about it so it might result in data loss.
1657 * So use reserved blocks to allocate metadata if possible.
1658 */
1659 get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
1660 EXT4_GET_BLOCKS_METADATA_NOFAIL;
1661 if (ext4_should_dioread_nolock(mpd->inode))
1662 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
1663 if (mpd->b_state & (1 << BH_Delay))
1664 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
1665
1666
1667 blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
1668 if (blks < 0) {
1669 struct super_block *sb = mpd->inode->i_sb;
1670
1671 err = blks;
1672 /*
1673 * If get block returns EAGAIN or ENOSPC and there
1674 * appears to be free blocks we will just let
1675 * mpage_da_submit_io() unlock all of the pages.
1676 */
1677 if (err == -EAGAIN)
1678 goto submit_io;
1679
1680 if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
1681 mpd->retval = err;
1682 goto submit_io;
1683 }
1684
1685 /*
1686 * get block failure will cause us to loop in
1687 * writepages, because a_ops->writepage won't be able
1688 * to make progress. The page will be redirtied by
1689 * writepage and writepages will again try to write
1690 * the same.
1691 */
1692 if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
1693 ext4_msg(sb, KERN_CRIT,
1694 "delayed block allocation failed for inode %lu "
1695 "at logical offset %llu with max blocks %zd "
1696 "with error %d", mpd->inode->i_ino,
1697 (unsigned long long) next,
1698 mpd->b_size >> mpd->inode->i_blkbits, err);
1699 ext4_msg(sb, KERN_CRIT,
1700 "This should not happen!! Data will be lost");
1701 if (err == -ENOSPC)
1702 ext4_print_free_blocks(mpd->inode);
1703 }
1704 /* invalidate all the pages */
1705 ext4_da_block_invalidatepages(mpd);
1706
1707 /* Mark this page range as having been completed */
1708 mpd->io_done = 1;
1709 return;
1710 }
1711 BUG_ON(blks == 0);
1712
1713 mapp = &map;
1714 if (map.m_flags & EXT4_MAP_NEW) {
1715 struct block_device *bdev = mpd->inode->i_sb->s_bdev;
1716 int i;
1717
1718 for (i = 0; i < map.m_len; i++)
1719 unmap_underlying_metadata(bdev, map.m_pblk + i);
1720 }
1721
1722 /*
1723 * Update on-disk size along with block allocation.
1724 */
1725 disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
1726 if (disksize > i_size_read(mpd->inode))
1727 disksize = i_size_read(mpd->inode);
1728 if (disksize > EXT4_I(mpd->inode)->i_disksize) {
1729 ext4_update_i_disksize(mpd->inode, disksize);
1730 err = ext4_mark_inode_dirty(handle, mpd->inode);
1731 if (err)
1732 ext4_error(mpd->inode->i_sb,
1733 "Failed to mark inode %lu dirty",
1734 mpd->inode->i_ino);
1735 }
1736
1737submit_io:
1738 mpage_da_submit_io(mpd, mapp);
1739 mpd->io_done = 1;
1740}
1741
1742#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
1743 (1 << BH_Delay) | (1 << BH_Unwritten))
1744
1745/*
1746 * mpage_add_bh_to_extent - try to add one more block to extent of blocks
1747 *
1748 * @mpd->lbh - extent of blocks
1749 * @logical - logical number of the block in the file
1750 * @b_state - b_state of the buffer head added
1751 *
1752 * the function is used to collect contig. blocks in same state
1753 */
1754static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical,
1755 unsigned long b_state)
1756{
1757 sector_t next;
1758 int blkbits = mpd->inode->i_blkbits;
1759 int nrblocks = mpd->b_size >> blkbits;
1760
1761 /*
1762 * XXX Don't go larger than mballoc is willing to allocate
1763 * This is a stopgap solution. We eventually need to fold
1764 * mpage_da_submit_io() into this function and then call
1765 * ext4_map_blocks() multiple times in a loop
1766 */
1767 if (nrblocks >= (8*1024*1024 >> blkbits))
1768 goto flush_it;
1769
1770 /* check if the reserved journal credits might overflow */
1771 if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) {
1772 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
1773 /*
1774 * With non-extent format we are limited by the journal
1775 * credit available. Total credit needed to insert
1776 * nrblocks contiguous blocks is dependent on the
1777 * nrblocks. So limit nrblocks.
1778 */
1779 goto flush_it;
1780 }
1781 }
1782 /*
1783 * First block in the extent
1784 */
1785 if (mpd->b_size == 0) {
1786 mpd->b_blocknr = logical;
1787 mpd->b_size = 1 << blkbits;
1788 mpd->b_state = b_state & BH_FLAGS;
1789 return;
1790 }
1791
1792 next = mpd->b_blocknr + nrblocks;
1793 /*
1794 * Can we merge the block to our big extent?
1795 */
1796 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
1797 mpd->b_size += 1 << blkbits;
1798 return;
1799 }
1800
1801flush_it:
1802 /*
1803 * We couldn't merge the block to our extent, so we
1804 * need to flush current extent and start new one
1805 */
1806 mpage_da_map_and_submit(mpd);
1807 return;
1808}
1809
1810static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) 1495static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1811{ 1496{
1812 return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); 1497 return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
@@ -2204,6 +1889,8 @@ static int ext4_writepage(struct page *page,
2204 return ret; 1889 return ret;
2205} 1890}
2206 1891
1892#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
1893
2207/* 1894/*
2208 * mballoc gives us at most this number of blocks... 1895 * mballoc gives us at most this number of blocks...
2209 * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). 1896 * XXX: That seems to be only a limitation of ext4_mb_normalize_request().
@@ -2212,6 +1899,315 @@ static int ext4_writepage(struct page *page,
2212#define MAX_WRITEPAGES_EXTENT_LEN 2048 1899#define MAX_WRITEPAGES_EXTENT_LEN 2048
2213 1900
2214/* 1901/*
1902 * mpage_add_bh_to_extent - try to add bh to extent of blocks to map
1903 *
1904 * @mpd - extent of blocks
1905 * @lblk - logical number of the block in the file
1906 * @b_state - b_state of the buffer head added
1907 *
1908 * the function is used to collect contig. blocks in same state
1909 */
1910static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
1911 unsigned long b_state)
1912{
1913 struct ext4_map_blocks *map = &mpd->map;
1914
1915 /* Don't go larger than mballoc is willing to allocate */
1916 if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
1917 return 0;
1918
1919 /* First block in the extent? */
1920 if (map->m_len == 0) {
1921 map->m_lblk = lblk;
1922 map->m_len = 1;
1923 map->m_flags = b_state & BH_FLAGS;
1924 return 1;
1925 }
1926
1927 /* Can we merge the block to our big extent? */
1928 if (lblk == map->m_lblk + map->m_len &&
1929 (b_state & BH_FLAGS) == map->m_flags) {
1930 map->m_len++;
1931 return 1;
1932 }
1933 return 0;
1934}
1935
1936static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
1937 struct buffer_head *head,
1938 struct buffer_head *bh,
1939 ext4_lblk_t lblk)
1940{
1941 struct inode *inode = mpd->inode;
1942 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
1943 >> inode->i_blkbits;
1944
1945 do {
1946 BUG_ON(buffer_locked(bh));
1947
1948 if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
1949 (!buffer_delay(bh) && !buffer_unwritten(bh)) ||
1950 lblk >= blocks) {
1951 /* Found extent to map? */
1952 if (mpd->map.m_len)
1953 return false;
1954 if (lblk >= blocks)
1955 return true;
1956 continue;
1957 }
1958 if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state))
1959 return false;
1960 } while (lblk++, (bh = bh->b_this_page) != head);
1961 return true;
1962}
1963
1964static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
1965{
1966 int len;
1967 loff_t size = i_size_read(mpd->inode);
1968 int err;
1969
1970 BUG_ON(page->index != mpd->first_page);
1971 if (page->index == size >> PAGE_CACHE_SHIFT)
1972 len = size & ~PAGE_CACHE_MASK;
1973 else
1974 len = PAGE_CACHE_SIZE;
1975 clear_page_dirty_for_io(page);
1976 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
1977 if (!err)
1978 mpd->wbc->nr_to_write--;
1979 mpd->first_page++;
1980
1981 return err;
1982}
1983
1984/*
1985 * mpage_map_buffers - update buffers corresponding to changed extent and
1986 * submit fully mapped pages for IO
1987 *
1988 * @mpd - description of extent to map, on return next extent to map
1989 *
1990 * Scan buffers corresponding to changed extent (we expect corresponding pages
1991 * to be already locked) and update buffer state according to new extent state.
1992 * We map delalloc buffers to their physical location, clear unwritten bits,
1993 * and mark buffers as uninit when we perform writes to uninitialized extents
1994 * and do extent conversion after IO is finished. If the last page is not fully
1995 * mapped, we update @map to the next extent in the last page that needs
1996 * mapping. Otherwise we submit the page for IO.
1997 */
1998static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
1999{
2000 struct pagevec pvec;
2001 int nr_pages, i;
2002 struct inode *inode = mpd->inode;
2003 struct buffer_head *head, *bh;
2004 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
2005 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
2006 >> inode->i_blkbits;
2007 pgoff_t start, end;
2008 ext4_lblk_t lblk;
2009 sector_t pblock;
2010 int err;
2011
2012 start = mpd->map.m_lblk >> bpp_bits;
2013 end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
2014 lblk = start << bpp_bits;
2015 pblock = mpd->map.m_pblk;
2016
2017 pagevec_init(&pvec, 0);
2018 while (start <= end) {
2019 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
2020 PAGEVEC_SIZE);
2021 if (nr_pages == 0)
2022 break;
2023 for (i = 0; i < nr_pages; i++) {
2024 struct page *page = pvec.pages[i];
2025
2026 if (page->index > end)
2027 break;
2028 /* Upto 'end' pages must be contiguous */
2029 BUG_ON(page->index != start);
2030 bh = head = page_buffers(page);
2031 do {
2032 if (lblk < mpd->map.m_lblk)
2033 continue;
2034 if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
2035 /*
2036 * Buffer after end of mapped extent.
2037 * Find next buffer in the page to map.
2038 */
2039 mpd->map.m_len = 0;
2040 mpd->map.m_flags = 0;
2041 add_page_bufs_to_extent(mpd, head, bh,
2042 lblk);
2043 pagevec_release(&pvec);
2044 return 0;
2045 }
2046 if (buffer_delay(bh)) {
2047 clear_buffer_delay(bh);
2048 bh->b_blocknr = pblock++;
2049 }
2050 if (mpd->map.m_flags & EXT4_MAP_UNINIT)
2051 set_buffer_uninit(bh);
2052 clear_buffer_unwritten(bh);
2053 } while (++lblk < blocks &&
2054 (bh = bh->b_this_page) != head);
2055
2056 /*
2057 * FIXME: This is going to break if dioread_nolock
2058 * supports blocksize < pagesize as we will try to
2059 * convert potentially unmapped parts of inode.
2060 */
2061 mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
2062 /* Page fully mapped - let IO run! */
2063 err = mpage_submit_page(mpd, page);
2064 if (err < 0) {
2065 pagevec_release(&pvec);
2066 return err;
2067 }
2068 start++;
2069 }
2070 pagevec_release(&pvec);
2071 }
2072 /* Extent fully mapped and matches with page boundary. We are done. */
2073 mpd->map.m_len = 0;
2074 mpd->map.m_flags = 0;
2075 return 0;
2076}
2077
2078static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2079{
2080 struct inode *inode = mpd->inode;
2081 struct ext4_map_blocks *map = &mpd->map;
2082 int get_blocks_flags;
2083 int err;
2084
2085 trace_ext4_da_write_pages_extent(inode, map);
2086 /*
2087 * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
2088 * to convert an uninitialized extent to be initialized (in the case
2089 * where we have written into one or more preallocated blocks). It is
2090 * possible that we're going to need more metadata blocks than
2091 * previously reserved. However we must not fail because we're in
2092 * writeback and there is nothing we can do about it so it might result
2093 * in data loss. So use reserved blocks to allocate metadata if
2094 * possible.
2095 *
2096 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks
2097 * in question are delalloc blocks. This affects functions in many
2098 * different parts of the allocation call path. This flag exists
2099 * primarily because we don't want to change *many* call functions, so
2100 * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag
2101 * once the inode's allocation semaphore is taken.
2102 */
2103 get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
2104 EXT4_GET_BLOCKS_METADATA_NOFAIL;
2105 if (ext4_should_dioread_nolock(inode))
2106 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
2107 if (map->m_flags & (1 << BH_Delay))
2108 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2109
2110 err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
2111 if (err < 0)
2112 return err;
2113
2114 BUG_ON(map->m_len == 0);
2115 if (map->m_flags & EXT4_MAP_NEW) {
2116 struct block_device *bdev = inode->i_sb->s_bdev;
2117 int i;
2118
2119 for (i = 0; i < map->m_len; i++)
2120 unmap_underlying_metadata(bdev, map->m_pblk + i);
2121 }
2122 return 0;
2123}
2124
2125/*
2126 * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length
2127 * mpd->len and submit pages underlying it for IO
2128 *
2129 * @handle - handle for journal operations
2130 * @mpd - extent to map
2131 *
2132 * The function maps extent starting at mpd->lblk of length mpd->len. If it is
2133 * delayed, blocks are allocated, if it is unwritten, we may need to convert
2134 * them to initialized or split the described range from larger unwritten
2135 * extent. Note that we need not map all the described range since allocation
2136 * can return less blocks or the range is covered by more unwritten extents. We
2137 * cannot map more because we are limited by reserved transaction credits. On
2138 * the other hand we always make sure that the last touched page is fully
2139 * mapped so that it can be written out (and thus forward progress is
2140 * guaranteed). After mapping we submit all mapped pages for IO.
2141 */
2142static int mpage_map_and_submit_extent(handle_t *handle,
2143 struct mpage_da_data *mpd)
2144{
2145 struct inode *inode = mpd->inode;
2146 struct ext4_map_blocks *map = &mpd->map;
2147 int err;
2148 loff_t disksize;
2149
2150 mpd->io_submit.io_end->offset =
2151 ((loff_t)map->m_lblk) << inode->i_blkbits;
2152 while (map->m_len) {
2153 err = mpage_map_one_extent(handle, mpd);
2154 if (err < 0) {
2155 struct super_block *sb = inode->i_sb;
2156
2157 /*
2158 * Need to commit transaction to free blocks. Let upper
2159 * layers sort it out.
2160 */
2161 if (err == -ENOSPC && ext4_count_free_clusters(sb))
2162 return -ENOSPC;
2163
2164 if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
2165 ext4_msg(sb, KERN_CRIT,
2166 "Delayed block allocation failed for "
2167 "inode %lu at logical offset %llu with"
2168 " max blocks %u with error %d",
2169 inode->i_ino,
2170 (unsigned long long)map->m_lblk,
2171 (unsigned)map->m_len, err);
2172 ext4_msg(sb, KERN_CRIT,
2173 "This should not happen!! Data will "
2174 "be lost\n");
2175 if (err == -ENOSPC)
2176 ext4_print_free_blocks(inode);
2177 }
2178 /* invalidate all the pages */
2179 mpage_release_unused_pages(mpd, true);
2180 return err;
2181 }
2182 /*
2183 * Update buffer state, submit mapped pages, and get us new
2184 * extent to map
2185 */
2186 err = mpage_map_and_submit_buffers(mpd);
2187 if (err < 0)
2188 return err;
2189 }
2190
2191 /* Update on-disk size after IO is submitted */
2192 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2193 if (disksize > i_size_read(inode))
2194 disksize = i_size_read(inode);
2195 if (disksize > EXT4_I(inode)->i_disksize) {
2196 int err2;
2197
2198 ext4_update_i_disksize(inode, disksize);
2199 err2 = ext4_mark_inode_dirty(handle, inode);
2200 if (err2)
2201 ext4_error(inode->i_sb,
2202 "Failed to mark inode %lu dirty",
2203 inode->i_ino);
2204 if (!err)
2205 err = err2;
2206 }
2207 return err;
2208}
2209
2210/*
2215 * Calculate the total number of credits to reserve for one writepages 2211 * Calculate the total number of credits to reserve for one writepages
2216 * iteration. This is called from ext4_da_writepages(). We map an extent of 2212 * iteration. This is called from ext4_da_writepages(). We map an extent of
2217 * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping 2213 * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
@@ -2227,44 +2223,49 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2227} 2223}
2228 2224
2229/* 2225/*
2230 * write_cache_pages_da - walk the list of dirty pages of the given 2226 * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages
2231 * address space and accumulate pages that need writing, and call 2227 * and underlying extent to map
2232 * mpage_da_map_and_submit to map a single contiguous memory region 2228 *
2233 * and then write them. 2229 * @mpd - where to look for pages
2230 *
2231 * Walk dirty pages in the mapping. If they are fully mapped, submit them for
2232 * IO immediately. When we find a page which isn't mapped we start accumulating
2233 * extent of buffers underlying these pages that needs mapping (formed by
2234 * either delayed or unwritten buffers). We also lock the pages containing
2235 * these buffers. The extent found is returned in @mpd structure (starting at
2236 * mpd->lblk with length mpd->len blocks).
2237 *
2238 * Note that this function can attach bios to one io_end structure which are
2239 * neither logically nor physically contiguous. Although it may seem as an
2240 * unnecessary complication, it is actually inevitable in blocksize < pagesize
2241 * case as we need to track IO to all buffers underlying a page in one io_end.
2234 */ 2242 */
2235static int write_cache_pages_da(handle_t *handle, 2243static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2236 struct address_space *mapping,
2237 struct writeback_control *wbc,
2238 struct mpage_da_data *mpd,
2239 pgoff_t *done_index)
2240{ 2244{
2241 struct buffer_head *bh, *head; 2245 struct address_space *mapping = mpd->inode->i_mapping;
2242 struct inode *inode = mapping->host; 2246 struct pagevec pvec;
2243 struct pagevec pvec; 2247 unsigned int nr_pages;
2244 unsigned int nr_pages; 2248 pgoff_t index = mpd->first_page;
2245 sector_t logical; 2249 pgoff_t end = mpd->last_page;
2246 pgoff_t index, end; 2250 int tag;
2247 long nr_to_write = wbc->nr_to_write; 2251 int i, err = 0;
2248 int i, tag, ret = 0; 2252 int blkbits = mpd->inode->i_blkbits;
2249 2253 ext4_lblk_t lblk;
2250 memset(mpd, 0, sizeof(struct mpage_da_data)); 2254 struct buffer_head *head;
2251 mpd->wbc = wbc;
2252 mpd->inode = inode;
2253 pagevec_init(&pvec, 0);
2254 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2255 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2256 2255
2257 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2256 if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages)
2258 tag = PAGECACHE_TAG_TOWRITE; 2257 tag = PAGECACHE_TAG_TOWRITE;
2259 else 2258 else
2260 tag = PAGECACHE_TAG_DIRTY; 2259 tag = PAGECACHE_TAG_DIRTY;
2261 2260
2262 *done_index = index; 2261 pagevec_init(&pvec, 0);
2262 mpd->map.m_len = 0;
2263 mpd->next_page = index;
2263 while (index <= end) { 2264 while (index <= end) {
2264 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, 2265 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2265 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2266 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2266 if (nr_pages == 0) 2267 if (nr_pages == 0)
2267 return 0; 2268 goto out;
2268 2269
2269 for (i = 0; i < nr_pages; i++) { 2270 for (i = 0; i < nr_pages; i++) {
2270 struct page *page = pvec.pages[i]; 2271 struct page *page = pvec.pages[i];
@@ -2279,31 +2280,21 @@ static int write_cache_pages_da(handle_t *handle,
2279 if (page->index > end) 2280 if (page->index > end)
2280 goto out; 2281 goto out;
2281 2282
2282 *done_index = page->index + 1; 2283 /* If we can't merge this page, we are done. */
2283 2284 if (mpd->map.m_len > 0 && mpd->next_page != page->index)
2284 /* 2285 goto out;
2285 * If we can't merge this page, and we have
2286 * accumulated an contiguous region, write it
2287 */
2288 if ((mpd->next_page != page->index) &&
2289 (mpd->next_page != mpd->first_page)) {
2290 mpage_da_map_and_submit(mpd);
2291 goto ret_extent_tail;
2292 }
2293 2286
2294 lock_page(page); 2287 lock_page(page);
2295
2296 /* 2288 /*
2297 * If the page is no longer dirty, or its 2289 * If the page is no longer dirty, or its mapping no
2298 * mapping no longer corresponds to inode we 2290 * longer corresponds to inode we are writing (which
2299 * are writing (which means it has been 2291 * means it has been truncated or invalidated), or the
2300 * truncated or invalidated), or the page is 2292 * page is already under writeback and we are not doing
2301 * already under writeback and we are not 2293 * a data integrity writeback, skip the page
2302 * doing a data integrity writeback, skip the page
2303 */ 2294 */
2304 if (!PageDirty(page) || 2295 if (!PageDirty(page) ||
2305 (PageWriteback(page) && 2296 (PageWriteback(page) &&
2306 (wbc->sync_mode == WB_SYNC_NONE)) || 2297 (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
2307 unlikely(page->mapping != mapping)) { 2298 unlikely(page->mapping != mapping)) {
2308 unlock_page(page); 2299 unlock_page(page);
2309 continue; 2300 continue;
@@ -2312,101 +2303,57 @@ static int write_cache_pages_da(handle_t *handle,
2312 wait_on_page_writeback(page); 2303 wait_on_page_writeback(page);
2313 BUG_ON(PageWriteback(page)); 2304 BUG_ON(PageWriteback(page));
2314 2305
2315 /* 2306 if (mpd->map.m_len == 0)
2316 * If we have inline data and arrive here, it means that
2317 * we will soon create the block for the 1st page, so
2318 * we'd better clear the inline data here.
2319 */
2320 if (ext4_has_inline_data(inode)) {
2321 BUG_ON(ext4_test_inode_state(inode,
2322 EXT4_STATE_MAY_INLINE_DATA));
2323 ext4_destroy_inline_data(handle, inode);
2324 }
2325
2326 if (mpd->next_page != page->index)
2327 mpd->first_page = page->index; 2307 mpd->first_page = page->index;
2328 mpd->next_page = page->index + 1; 2308 mpd->next_page = page->index + 1;
2329 logical = (sector_t) page->index <<
2330 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2331
2332 /* Add all dirty buffers to mpd */ 2309 /* Add all dirty buffers to mpd */
2310 lblk = ((ext4_lblk_t)page->index) <<
2311 (PAGE_CACHE_SHIFT - blkbits);
2333 head = page_buffers(page); 2312 head = page_buffers(page);
2334 bh = head; 2313 if (!add_page_bufs_to_extent(mpd, head, head, lblk))
2335 do { 2314 goto out;
2336 BUG_ON(buffer_locked(bh)); 2315 /* So far everything mapped? Submit the page for IO. */
2337 /* 2316 if (mpd->map.m_len == 0) {
2338 * We need to try to allocate unmapped blocks 2317 err = mpage_submit_page(mpd, page);
2339 * in the same page. Otherwise we won't make 2318 if (err < 0)
2340 * progress with the page in ext4_writepage
2341 */
2342 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2343 mpage_add_bh_to_extent(mpd, logical,
2344 bh->b_state);
2345 if (mpd->io_done)
2346 goto ret_extent_tail;
2347 } else if (buffer_dirty(bh) &&
2348 buffer_mapped(bh)) {
2349 /*
2350 * mapped dirty buffer. We need to
2351 * update the b_state because we look
2352 * at b_state in mpage_da_map_blocks.
2353 * We don't update b_size because if we
2354 * find an unmapped buffer_head later
2355 * we need to use the b_state flag of
2356 * that buffer_head.
2357 */
2358 if (mpd->b_size == 0)
2359 mpd->b_state =
2360 bh->b_state & BH_FLAGS;
2361 }
2362 logical++;
2363 } while ((bh = bh->b_this_page) != head);
2364
2365 if (nr_to_write > 0) {
2366 nr_to_write--;
2367 if (nr_to_write == 0 &&
2368 wbc->sync_mode == WB_SYNC_NONE)
2369 /*
2370 * We stop writing back only if we are
2371 * not doing integrity sync. In case of
2372 * integrity sync we have to keep going
2373 * because someone may be concurrently
2374 * dirtying pages, and we might have
2375 * synced a lot of newly appeared dirty
2376 * pages, but have not synced all of the
2377 * old dirty pages.
2378 */
2379 goto out; 2319 goto out;
2380 } 2320 }
2321
2322 /*
2323 * Accumulated enough dirty pages? This doesn't apply
2324 * to WB_SYNC_ALL mode. For integrity sync we have to
2325 * keep going because someone may be concurrently
2326 * dirtying pages, and we might have synced a lot of
2327 * newly appeared dirty pages, but have not synced all
2328 * of the old dirty pages.
2329 */
2330 if (mpd->wbc->sync_mode == WB_SYNC_NONE &&
2331 mpd->next_page - mpd->first_page >=
2332 mpd->wbc->nr_to_write)
2333 goto out;
2381 } 2334 }
2382 pagevec_release(&pvec); 2335 pagevec_release(&pvec);
2383 cond_resched(); 2336 cond_resched();
2384 } 2337 }
2385 return 0; 2338 return 0;
2386ret_extent_tail:
2387 ret = MPAGE_DA_EXTENT_TAIL;
2388out: 2339out:
2389 pagevec_release(&pvec); 2340 pagevec_release(&pvec);
2390 cond_resched(); 2341 return err;
2391 return ret;
2392} 2342}
2393 2343
2394
2395static int ext4_da_writepages(struct address_space *mapping, 2344static int ext4_da_writepages(struct address_space *mapping,
2396 struct writeback_control *wbc) 2345 struct writeback_control *wbc)
2397{ 2346{
2398 pgoff_t index; 2347 pgoff_t writeback_index = 0;
2348 long nr_to_write = wbc->nr_to_write;
2399 int range_whole = 0; 2349 int range_whole = 0;
2350 int cycled = 1;
2400 handle_t *handle = NULL; 2351 handle_t *handle = NULL;
2401 struct mpage_da_data mpd; 2352 struct mpage_da_data mpd;
2402 struct inode *inode = mapping->host; 2353 struct inode *inode = mapping->host;
2403 int pages_written = 0;
2404 int range_cyclic, cycled = 1, io_done = 0;
2405 int needed_blocks, ret = 0; 2354 int needed_blocks, ret = 0;
2406 loff_t range_start = wbc->range_start;
2407 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2355 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2408 pgoff_t done_index = 0; 2356 bool done;
2409 pgoff_t end;
2410 struct blk_plug plug; 2357 struct blk_plug plug;
2411 2358
2412 trace_ext4_da_writepages(inode, wbc); 2359 trace_ext4_da_writepages(inode, wbc);
@@ -2432,40 +2379,65 @@ static int ext4_da_writepages(struct address_space *mapping,
2432 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) 2379 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
2433 return -EROFS; 2380 return -EROFS;
2434 2381
2382 /*
2383 * If we have inline data and arrive here, it means that
2384 * we will soon create the block for the 1st page, so
2385 * we'd better clear the inline data here.
2386 */
2387 if (ext4_has_inline_data(inode)) {
2388 /* Just inode will be modified... */
2389 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
2390 if (IS_ERR(handle)) {
2391 ret = PTR_ERR(handle);
2392 goto out_writepages;
2393 }
2394 BUG_ON(ext4_test_inode_state(inode,
2395 EXT4_STATE_MAY_INLINE_DATA));
2396 ext4_destroy_inline_data(handle, inode);
2397 ext4_journal_stop(handle);
2398 }
2399
2435 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2400 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2436 range_whole = 1; 2401 range_whole = 1;
2437 2402
2438 range_cyclic = wbc->range_cyclic;
2439 if (wbc->range_cyclic) { 2403 if (wbc->range_cyclic) {
2440 index = mapping->writeback_index; 2404 writeback_index = mapping->writeback_index;
2441 if (index) 2405 if (writeback_index)
2442 cycled = 0; 2406 cycled = 0;
2443 wbc->range_start = index << PAGE_CACHE_SHIFT; 2407 mpd.first_page = writeback_index;
2444 wbc->range_end = LLONG_MAX; 2408 mpd.last_page = -1;
2445 wbc->range_cyclic = 0;
2446 end = -1;
2447 } else { 2409 } else {
2448 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2410 mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT;
2449 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2411 mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT;
2450 } 2412 }
2451 2413
2414 mpd.inode = inode;
2415 mpd.wbc = wbc;
2416 ext4_io_submit_init(&mpd.io_submit, wbc);
2452retry: 2417retry:
2453 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2418 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2454 tag_pages_for_writeback(mapping, index, end); 2419 tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
2455 2420 done = false;
2456 blk_start_plug(&plug); 2421 blk_start_plug(&plug);
2457 while (!ret && wbc->nr_to_write > 0) { 2422 while (!done && mpd.first_page <= mpd.last_page) {
2423 /* For each extent of pages we use new io_end */
2424 mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
2425 if (!mpd.io_submit.io_end) {
2426 ret = -ENOMEM;
2427 break;
2428 }
2458 2429
2459 /* 2430 /*
2460 * we insert one extent at a time. So we need 2431 * We have two constraints: We find one extent to map and we
2461 * credit needed for single extent allocation. 2432 * must always write out whole page (makes a difference when
2462 * journalled mode is currently not supported 2433 * blocksize < pagesize) so that we don't block on IO when we
2463 * by delalloc 2434 * try to write out the rest of the page. Journalled mode is
2435 * not supported by delalloc.
2464 */ 2436 */
2465 BUG_ON(ext4_should_journal_data(inode)); 2437 BUG_ON(ext4_should_journal_data(inode));
2466 needed_blocks = ext4_da_writepages_trans_blocks(inode); 2438 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2467 2439
2468 /* start a new transaction*/ 2440 /* start a new transaction */
2469 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 2441 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
2470 needed_blocks); 2442 needed_blocks);
2471 if (IS_ERR(handle)) { 2443 if (IS_ERR(handle)) {
@@ -2473,76 +2445,67 @@ retry:
2473 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 2445 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
2474 "%ld pages, ino %lu; err %d", __func__, 2446 "%ld pages, ino %lu; err %d", __func__,
2475 wbc->nr_to_write, inode->i_ino, ret); 2447 wbc->nr_to_write, inode->i_ino, ret);
2476 blk_finish_plug(&plug); 2448 /* Release allocated io_end */
2477 goto out_writepages; 2449 ext4_put_io_end(mpd.io_submit.io_end);
2450 break;
2478 } 2451 }
2479 2452
2480 /* 2453 trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
2481 * Now call write_cache_pages_da() to find the next 2454 ret = mpage_prepare_extent_to_map(&mpd);
2482 * contiguous region of logical blocks that need 2455 if (!ret) {
2483 * blocks to be allocated by ext4 and submit them. 2456 if (mpd.map.m_len)
2484 */ 2457 ret = mpage_map_and_submit_extent(handle, &mpd);
2485 ret = write_cache_pages_da(handle, mapping, 2458 else {
2486 wbc, &mpd, &done_index); 2459 /*
2487 /* 2460 * We scanned the whole range (or exhausted
2488 * If we have a contiguous extent of pages and we 2461 * nr_to_write), submitted what was mapped and
2489 * haven't done the I/O yet, map the blocks and submit 2462 * didn't find anything needing mapping. We are
2490 * them for I/O. 2463 * done.
2491 */ 2464 */
2492 if (!mpd.io_done && mpd.next_page != mpd.first_page) { 2465 done = true;
2493 mpage_da_map_and_submit(&mpd); 2466 }
2494 ret = MPAGE_DA_EXTENT_TAIL;
2495 } 2467 }
2496 trace_ext4_da_write_pages(inode, &mpd);
2497 wbc->nr_to_write -= mpd.pages_written;
2498
2499 ext4_journal_stop(handle); 2468 ext4_journal_stop(handle);
2500 2469 /* Submit prepared bio */
2501 if ((mpd.retval == -ENOSPC) && sbi->s_journal) { 2470 ext4_io_submit(&mpd.io_submit);
2502 /* commit the transaction which would 2471 /* Unlock pages we didn't use */
2472 mpage_release_unused_pages(&mpd, false);
2473 /* Drop our io_end reference we got from init */
2474 ext4_put_io_end(mpd.io_submit.io_end);
2475
2476 if (ret == -ENOSPC && sbi->s_journal) {
2477 /*
2478 * Commit the transaction which would
2503 * free blocks released in the transaction 2479 * free blocks released in the transaction
2504 * and try again 2480 * and try again
2505 */ 2481 */
2506 jbd2_journal_force_commit_nested(sbi->s_journal); 2482 jbd2_journal_force_commit_nested(sbi->s_journal);
2507 ret = 0; 2483 ret = 0;
2508 } else if (ret == MPAGE_DA_EXTENT_TAIL) { 2484 continue;
2509 /* 2485 }
2510 * Got one extent now try with rest of the pages. 2486 /* Fatal error - ENOMEM, EIO... */
2511 * If mpd.retval is set -EIO, journal is aborted. 2487 if (ret)
2512 * So we don't need to write any more.
2513 */
2514 pages_written += mpd.pages_written;
2515 ret = mpd.retval;
2516 io_done = 1;
2517 } else if (wbc->nr_to_write)
2518 /*
2519 * There is no more writeout needed
2520 * or we requested for a noblocking writeout
2521 * and we found the device congested
2522 */
2523 break; 2488 break;
2524 } 2489 }
2525 blk_finish_plug(&plug); 2490 blk_finish_plug(&plug);
2526 if (!io_done && !cycled) { 2491 if (!ret && !cycled) {
2527 cycled = 1; 2492 cycled = 1;
2528 index = 0; 2493 mpd.last_page = writeback_index - 1;
2529 wbc->range_start = index << PAGE_CACHE_SHIFT; 2494 mpd.first_page = 0;
2530 wbc->range_end = mapping->writeback_index - 1;
2531 goto retry; 2495 goto retry;
2532 } 2496 }
2533 2497
2534 /* Update index */ 2498 /* Update index */
2535 wbc->range_cyclic = range_cyclic;
2536 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 2499 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2537 /* 2500 /*
2538 * set the writeback_index so that range_cyclic 2501 * Set the writeback_index so that range_cyclic
2539 * mode will write it back later 2502 * mode will write it back later
2540 */ 2503 */
2541 mapping->writeback_index = done_index; 2504 mapping->writeback_index = mpd.first_page;
2542 2505
2543out_writepages: 2506out_writepages:
2544 wbc->range_start = range_start; 2507 trace_ext4_da_writepages_result(inode, wbc, ret,
2545 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2508 nr_to_write - wbc->nr_to_write);
2546 return ret; 2509 return ret;
2547} 2510}
2548 2511
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 19599bded62a..3e5854625126 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -360,9 +360,6 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
360 bio->bi_bdev = bh->b_bdev; 360 bio->bi_bdev = bh->b_bdev;
361 bio->bi_end_io = ext4_end_bio; 361 bio->bi_end_io = ext4_end_bio;
362 bio->bi_private = ext4_get_io_end(io->io_end); 362 bio->bi_private = ext4_get_io_end(io->io_end);
363 if (!io->io_end->size)
364 io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
365 + bh_offset(bh);
366 io->io_bio = bio; 363 io->io_bio = bio;
367 io->io_next_block = bh->b_blocknr; 364 io->io_next_block = bh->b_blocknr;
368 return 0; 365 return 0;
@@ -390,7 +387,6 @@ submit_and_retry:
390 io_end = io->io_end; 387 io_end = io->io_end;
391 if (test_clear_buffer_uninit(bh)) 388 if (test_clear_buffer_uninit(bh))
392 ext4_set_io_unwritten_flag(inode, io_end); 389 ext4_set_io_unwritten_flag(inode, io_end);
393 io_end->size += bh->b_size;
394 io->io_next_block++; 390 io->io_next_block++;
395 return 0; 391 return 0;
396} 392}