aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2008-07-14 17:52:37 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-14 17:52:37 -0400
commitd2a1763791a634e315ec926b62829c1e88842c86 (patch)
tree75a701a89829ba7d728fdc19c30aa2794b9706b9 /fs/ext4/inode.c
parente8ced39d5e8911c662d4d69a342b9d053eaaac4e (diff)
ext4: delayed allocation ENOSPC handling
This patch does block reservation for delayed allocation, to avoid ENOSPC later at page flush time. Blocks(data and metadata) are reserved at da_write_begin() time, the freeblocks counter is updated by then, and the number of reserved blocks is store in per inode counter. At the writepage time, the unused reserved meta blocks are returned back. At unlink/truncate time, reserved blocks are properly released. Updated fix from Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> to fix the oldallocator block reservation accounting with delalloc, added lock to guard the counters and also fix the reservation for meta blocks. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c184
1 files changed, 156 insertions, 28 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2bef4f879e4b..a6b800c58474 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -39,6 +39,7 @@
39#include "ext4_jbd2.h" 39#include "ext4_jbd2.h"
40#include "xattr.h" 40#include "xattr.h"
41#include "acl.h" 41#include "acl.h"
42#include "ext4_extents.h"
42 43
43static inline int ext4_begin_ordered_truncate(struct inode *inode, 44static inline int ext4_begin_ordered_truncate(struct inode *inode,
44 loff_t new_size) 45 loff_t new_size)
@@ -982,7 +983,7 @@ out:
982 */ 983 */
983int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 984int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
984 unsigned long max_blocks, struct buffer_head *bh, 985 unsigned long max_blocks, struct buffer_head *bh,
985 int create, int extend_disksize) 986 int create, int extend_disksize, int flag)
986{ 987{
987 int retval; 988 int retval;
988 989
@@ -1023,6 +1024,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1023 * with create == 1 flag. 1024 * with create == 1 flag.
1024 */ 1025 */
1025 down_write((&EXT4_I(inode)->i_data_sem)); 1026 down_write((&EXT4_I(inode)->i_data_sem));
1027
1028 /*
1029 * if the caller is from delayed allocation writeout path
1030 * we have already reserved fs blocks for allocation
1031 * let the underlying get_block() function know to
1032 * avoid double accounting
1033 */
1034 if (flag)
1035 EXT4_I(inode)->i_delalloc_reserved_flag = 1;
1026 /* 1036 /*
1027 * We need to check for EXT4 here because migrate 1037 * We need to check for EXT4 here because migrate
1028 * could have changed the inode type in between 1038 * could have changed the inode type in between
@@ -1044,6 +1054,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1044 ~EXT4_EXT_MIGRATE; 1054 ~EXT4_EXT_MIGRATE;
1045 } 1055 }
1046 } 1056 }
1057
1058 if (flag) {
1059 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1060 /*
1061 * Update reserved blocks/metadata blocks
1062 * after successful block allocation
1063 * which were deferred till now
1064 */
1065 if ((retval > 0) && buffer_delay(bh))
1066 ext4_da_release_space(inode, retval, 0);
1067 }
1068
1047 up_write((&EXT4_I(inode)->i_data_sem)); 1069 up_write((&EXT4_I(inode)->i_data_sem));
1048 return retval; 1070 return retval;
1049} 1071}
@@ -1069,7 +1091,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
1069 } 1091 }
1070 1092
1071 ret = ext4_get_blocks_wrap(handle, inode, iblock, 1093 ret = ext4_get_blocks_wrap(handle, inode, iblock,
1072 max_blocks, bh_result, create, 0); 1094 max_blocks, bh_result, create, 0, 0);
1073 if (ret > 0) { 1095 if (ret > 0) {
1074 bh_result->b_size = (ret << inode->i_blkbits); 1096 bh_result->b_size = (ret << inode->i_blkbits);
1075 ret = 0; 1097 ret = 0;
@@ -1095,7 +1117,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1095 dummy.b_blocknr = -1000; 1117 dummy.b_blocknr = -1000;
1096 buffer_trace_init(&dummy.b_history); 1118 buffer_trace_init(&dummy.b_history);
1097 err = ext4_get_blocks_wrap(handle, inode, block, 1, 1119 err = ext4_get_blocks_wrap(handle, inode, block, 1,
1098 &dummy, create, 1); 1120 &dummy, create, 1, 0);
1099 /* 1121 /*
1100 * ext4_get_blocks_handle() returns number of blocks 1122 * ext4_get_blocks_handle() returns number of blocks
1101 * mapped. 0 in case of a HOLE. 1123 * mapped. 0 in case of a HOLE.
@@ -1409,6 +1431,122 @@ static int ext4_journalled_write_end(struct file *file,
1409 1431
1410 return ret ? ret : copied; 1432 return ret ? ret : copied;
1411} 1433}
1434/*
1435 * Calculate the number of metadata blocks need to reserve
1436 * to allocate @blocks for non extent file based file
1437 */
1438static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
1439{
1440 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1441 int ind_blks, dind_blks, tind_blks;
1442
1443 /* number of new indirect blocks needed */
1444 ind_blks = (blocks + icap - 1) / icap;
1445
1446 dind_blks = (ind_blks + icap - 1) / icap;
1447
1448 tind_blks = 1;
1449
1450 return ind_blks + dind_blks + tind_blks;
1451}
1452
1453/*
1454 * Calculate the number of metadata blocks need to reserve
1455 * to allocate given number of blocks
1456 */
1457static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1458{
1459 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1460 return ext4_ext_calc_metadata_amount(inode, blocks);
1461
1462 return ext4_indirect_calc_metadata_amount(inode, blocks);
1463}
1464
1465static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1466{
1467 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1468 unsigned long md_needed, mdblocks, total = 0;
1469
1470 /*
1471 * recalculate the amount of metadata blocks to reserve
1472 * in order to allocate nrblocks
1473 * worse case is one extent per block
1474 */
1475 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1476 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
1477 mdblocks = ext4_calc_metadata_amount(inode, total);
1478 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
1479
1480 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1481 total = md_needed + nrblocks;
1482
1483 if (ext4_has_free_blocks(sbi, total) < total) {
1484 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1485 return -ENOSPC;
1486 }
1487
1488 /* reduce fs free blocks counter */
1489 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1490
1491 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
1492 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
1493
1494 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1495 return 0; /* success */
1496}
1497
1498void ext4_da_release_space(struct inode *inode, int used, int to_free)
1499{
1500 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1501 int total, mdb, mdb_free, release;
1502
1503 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1504 /* recalculate the number of metablocks still need to be reserved */
1505 total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
1506 mdb = ext4_calc_metadata_amount(inode, total);
1507
1508 /* figure out how many metablocks to release */
1509 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1510 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1511
1512 /* Account for allocated meta_blocks */
1513 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1514
1515 release = to_free + mdb_free;
1516
1517 /* update fs free blocks counter for truncate case */
1518 percpu_counter_add(&sbi->s_freeblocks_counter, release);
1519
1520 /* update per-inode reservations */
1521 BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
1522 EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
1523
1524 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1525 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1526 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1527 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1528}
1529
1530static void ext4_da_page_release_reservation(struct page *page,
1531 unsigned long offset)
1532{
1533 int to_release = 0;
1534 struct buffer_head *head, *bh;
1535 unsigned int curr_off = 0;
1536
1537 head = page_buffers(page);
1538 bh = head;
1539 do {
1540 unsigned int next_off = curr_off + bh->b_size;
1541
1542 if ((offset <= curr_off) && (buffer_delay(bh))) {
1543 to_release++;
1544 clear_buffer_delay(bh);
1545 }
1546 curr_off = next_off;
1547 } while ((bh = bh->b_this_page) != head);
1548 ext4_da_release_space(page->mapping->host, 0, to_release);
1549}
1412 1550
1413/* 1551/*
1414 * Delayed allocation stuff 1552 * Delayed allocation stuff
@@ -1829,14 +1967,18 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1829 * preallocated blocks are unmapped but should treated 1967 * preallocated blocks are unmapped but should treated
1830 * the same as allocated blocks. 1968 * the same as allocated blocks.
1831 */ 1969 */
1832 ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0); 1970 ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0);
1833 if (ret == 0) { 1971 if ((ret == 0) && !buffer_delay(bh_result)) {
1834 /* the block isn't allocated yet, let's reserve space */ 1972 /* the block isn't (pre)allocated yet, let's reserve space */
1835 /* XXX: call reservation here */
1836 /* 1973 /*
1837 * XXX: __block_prepare_write() unmaps passed block, 1974 * XXX: __block_prepare_write() unmaps passed block,
1838 * is it OK? 1975 * is it OK?
1839 */ 1976 */
1977 ret = ext4_da_reserve_space(inode, 1);
1978 if (ret)
1979 /* not enough space to reserve */
1980 return ret;
1981
1840 map_bh(bh_result, inode->i_sb, 0); 1982 map_bh(bh_result, inode->i_sb, 0);
1841 set_buffer_new(bh_result); 1983 set_buffer_new(bh_result);
1842 set_buffer_delay(bh_result); 1984 set_buffer_delay(bh_result);
@@ -1847,7 +1989,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1847 1989
1848 return ret; 1990 return ret;
1849} 1991}
1850 1992#define EXT4_DELALLOC_RSVED 1
1851static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, 1993static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1852 struct buffer_head *bh_result, int create) 1994 struct buffer_head *bh_result, int create)
1853{ 1995{
@@ -1865,7 +2007,7 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1865 } 2007 }
1866 2008
1867 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, 2009 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
1868 bh_result, create, 0); 2010 bh_result, create, 0, EXT4_DELALLOC_RSVED);
1869 if (ret > 0) { 2011 if (ret > 0) {
1870 bh_result->b_size = (ret << inode->i_blkbits); 2012 bh_result->b_size = (ret << inode->i_blkbits);
1871 2013
@@ -1952,7 +2094,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
1952 loff_t pos, unsigned len, unsigned flags, 2094 loff_t pos, unsigned len, unsigned flags,
1953 struct page **pagep, void **fsdata) 2095 struct page **pagep, void **fsdata)
1954{ 2096{
1955 int ret; 2097 int ret, retries = 0;
1956 struct page *page; 2098 struct page *page;
1957 pgoff_t index; 2099 pgoff_t index;
1958 unsigned from, to; 2100 unsigned from, to;
@@ -1963,6 +2105,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
1963 from = pos & (PAGE_CACHE_SIZE - 1); 2105 from = pos & (PAGE_CACHE_SIZE - 1);
1964 to = from + len; 2106 to = from + len;
1965 2107
2108retry:
1966 /* 2109 /*
1967 * With delayed allocation, we don't log the i_disksize update 2110 * With delayed allocation, we don't log the i_disksize update
1968 * if there is delayed block allocation. But we still need 2111 * if there is delayed block allocation. But we still need
@@ -1988,6 +2131,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
1988 page_cache_release(page); 2131 page_cache_release(page);
1989 } 2132 }
1990 2133
2134 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2135 goto retry;
1991out: 2136out:
1992 return ret; 2137 return ret;
1993} 2138}
@@ -2040,9 +2185,6 @@ static int ext4_da_write_end(struct file *file,
2040 2185
2041static void ext4_da_invalidatepage(struct page *page, unsigned long offset) 2186static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
2042{ 2187{
2043 struct buffer_head *head, *bh;
2044 unsigned int curr_off = 0;
2045
2046 /* 2188 /*
2047 * Drop reserved blocks 2189 * Drop reserved blocks
2048 */ 2190 */
@@ -2050,21 +2192,7 @@ static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
2050 if (!page_has_buffers(page)) 2192 if (!page_has_buffers(page))
2051 goto out; 2193 goto out;
2052 2194
2053 head = page_buffers(page); 2195 ext4_da_page_release_reservation(page, offset);
2054 bh = head;
2055 do {
2056 unsigned int next_off = curr_off + bh->b_size;
2057
2058 /*
2059 * is this block fully invalidated?
2060 */
2061 if (offset <= curr_off && buffer_delay(bh)) {
2062 clear_buffer_delay(bh);
2063 /* XXX: add real stuff here */
2064 }
2065 curr_off = next_off;
2066 bh = bh->b_this_page;
2067 } while (bh != head);
2068 2196
2069out: 2197out:
2070 ext4_invalidatepage(page, offset); 2198 ext4_invalidatepage(page, offset);