aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2008-07-14 17:52:37 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-14 17:52:37 -0400
commitd2a1763791a634e315ec926b62829c1e88842c86 (patch)
tree75a701a89829ba7d728fdc19c30aa2794b9706b9
parente8ced39d5e8911c662d4d69a342b9d053eaaac4e (diff)
ext4: delayed allocation ENOSPC handling
This patch does block reservation for delayed allocation, to avoid ENOSPC later at page flush time. Blocks(data and metadata) are reserved at da_write_begin() time, the freeblocks counter is updated by then, and the number of reserved blocks is store in per inode counter. At the writepage time, the unused reserved meta blocks are returned back. At unlink/truncate time, reserved blocks are properly released. Updated fix from Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> to fix the oldallocator block reservation accounting with delalloc, added lock to guard the counters and also fix the reservation for meta blocks. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r--fs/ext4/balloc.c49
-rw-r--r--fs/ext4/dir.c3
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/ext4_extents.h1
-rw-r--r--fs/ext4/ext4_i.h7
-rw-r--r--fs/ext4/extents.c32
-rw-r--r--fs/ext4/inode.c184
-rw-r--r--fs/ext4/mballoc.c20
-rw-r--r--fs/ext4/super.c5
9 files changed, 257 insertions, 50 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6369bacf0dcb..495ab21b9832 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1701,7 +1701,12 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
1701 } 1701 }
1702 1702
1703 sbi = EXT4_SB(sb); 1703 sbi = EXT4_SB(sb);
1704 *count = ext4_has_free_blocks(sbi, *count); 1704 if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
1705 /*
1706 * With delalloc we already reserved the blocks
1707 */
1708 *count = ext4_has_free_blocks(sbi, *count);
1709 }
1705 if (*count == 0) { 1710 if (*count == 0) {
1706 *errp = -ENOSPC; 1711 *errp = -ENOSPC;
1707 return 0; /*return with ENOSPC error */ 1712 return 0; /*return with ENOSPC error */
@@ -1902,7 +1907,8 @@ allocated:
1902 le16_add_cpu(&gdp->bg_free_blocks_count, -num); 1907 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1903 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1908 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1904 spin_unlock(sb_bgl_lock(sbi, group_no)); 1909 spin_unlock(sb_bgl_lock(sbi, group_no));
1905 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1910 if (!EXT4_I(inode)->i_delalloc_reserved_flag)
1911 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1906 1912
1907 if (sbi->s_log_groups_per_flex) { 1913 if (sbi->s_log_groups_per_flex) {
1908 ext4_group_t flex_group = ext4_flex_group(sbi, group_no); 1914 ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
@@ -1976,40 +1982,49 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
1976} 1982}
1977 1983
1978/* 1984/*
1979 * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks 1985 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
1980 * 1986 *
1981 * @handle: handle to this transaction 1987 * @handle: handle to this transaction
1982 * @inode: file inode 1988 * @inode: file inode
1983 * @goal: given target block(filesystem wide) 1989 * @goal: given target block(filesystem wide)
1990 * @count: total number of blocks need
1984 * @errp: error code 1991 * @errp: error code
1985 * 1992 *
1986 * Return allocated block number on success 1993 * Return 1st allocated block numberon success, *count stores total account
1994 * error stores in errp pointer
1987 */ 1995 */
1988ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, 1996ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1989 ext4_fsblk_t goal, int *errp) 1997 ext4_fsblk_t goal, unsigned long *count, int *errp)
1990{ 1998{
1991 unsigned long count = 1; 1999 ext4_fsblk_t ret;
1992 return do_blk_alloc(handle, inode, 0, goal, 2000 ret = do_blk_alloc(handle, inode, 0, goal,
1993 &count, errp, EXT4_META_BLOCK); 2001 count, errp, EXT4_META_BLOCK);
2002 /*
2003 * Account for the allocated meta blocks
2004 */
2005 if (!(*errp)) {
2006 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2007 EXT4_I(inode)->i_allocated_meta_blocks += *count;
2008 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2009 }
2010 return ret;
1994} 2011}
1995 2012
1996/* 2013/*
1997 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks 2014 * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
1998 * 2015 *
1999 * @handle: handle to this transaction 2016 * @handle: handle to this transaction
2000 * @inode: file inode 2017 * @inode: file inode
2001 * @goal: given target block(filesystem wide) 2018 * @goal: given target block(filesystem wide)
2002 * @count: total number of blocks need
2003 * @errp: error code 2019 * @errp: error code
2004 * 2020 *
2005 * Return 1st allocated block numberon success, *count stores total account 2021 * Return allocated block number on success
2006 * error stores in errp pointer
2007 */ 2022 */
2008ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 2023ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
2009 ext4_fsblk_t goal, unsigned long *count, int *errp) 2024 ext4_fsblk_t goal, int *errp)
2010{ 2025{
2011 return do_blk_alloc(handle, inode, 0, goal, 2026 unsigned long count = 1;
2012 count, errp, EXT4_META_BLOCK); 2027 return ext4_new_meta_blocks(handle, inode, goal, &count, errp);
2013} 2028}
2014 2029
2015/* 2030/*
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 5ed5108766c1..d3d23d73c08b 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -129,7 +129,8 @@ static int ext4_readdir(struct file * filp,
129 struct buffer_head *bh = NULL; 129 struct buffer_head *bh = NULL;
130 130
131 map_bh.b_state = 0; 131 map_bh.b_state = 0;
132 err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); 132 err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
133 0, 0, 0);
133 if (err > 0) { 134 if (err > 0) {
134 pgoff_t index = map_bh.b_blocknr >> 135 pgoff_t index = map_bh.b_blocknr >>
135 (PAGE_CACHE_SHIFT - inode->i_blkbits); 136 (PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ee9576dc0ba1..0962f4e26579 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -74,6 +74,9 @@
74#define EXT4_MB_HINT_GOAL_ONLY 256 74#define EXT4_MB_HINT_GOAL_ONLY 256
75/* goal is meaningful */ 75/* goal is meaningful */
76#define EXT4_MB_HINT_TRY_GOAL 512 76#define EXT4_MB_HINT_TRY_GOAL 512
77/* blocks already pre-reserved by delayed allocation */
78#define EXT4_MB_DELALLOC_RESERVED 1024
79
77 80
78struct ext4_allocation_request { 81struct ext4_allocation_request {
79 /* target inode for block we're allocating */ 82 /* target inode for block we're allocating */
@@ -1041,6 +1044,7 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
1041 1044
1042 1045
1043/* inode.c */ 1046/* inode.c */
1047void ext4_da_release_space(struct inode *inode, int used, int to_free);
1044int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 1048int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1045 struct buffer_head *bh, ext4_fsblk_t blocknr); 1049 struct buffer_head *bh, ext4_fsblk_t blocknr);
1046struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1050struct buffer_head *ext4_getblk(handle_t *, struct inode *,
@@ -1234,7 +1238,7 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1234extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, 1238extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
1235 sector_t block, unsigned long max_blocks, 1239 sector_t block, unsigned long max_blocks,
1236 struct buffer_head *bh, int create, 1240 struct buffer_head *bh, int create,
1237 int extend_disksize); 1241 int extend_disksize, int flag);
1238#endif /* __KERNEL__ */ 1242#endif /* __KERNEL__ */
1239 1243
1240#endif /* _EXT4_H */ 1244#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 75333b595fab..6c166c0a54b7 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -212,6 +212,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
212 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); 212 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
213} 213}
214 214
215extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
215extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 216extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
216extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 217extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
217extern int ext4_extent_tree_init(handle_t *, struct inode *); 218extern int ext4_extent_tree_init(handle_t *, struct inode *);
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index c2903ef72159..ef7409f0e7e4 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -163,6 +163,13 @@ struct ext4_inode_info {
163 /* mballoc */ 163 /* mballoc */
164 struct list_head i_prealloc_list; 164 struct list_head i_prealloc_list;
165 spinlock_t i_prealloc_lock; 165 spinlock_t i_prealloc_lock;
166
167 /* allocation reservation info for delalloc */
168 unsigned long i_reserved_data_blocks;
169 unsigned long i_reserved_meta_blocks;
170 unsigned long i_allocated_meta_blocks;
171 unsigned short i_delalloc_reserved_flag;
172 spinlock_t i_block_reservation_lock;
166}; 173};
167 174
168#endif /* _EXT4_I */ 175#endif /* _EXT4_I */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7844bbb2bac0..dabc3b68d249 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -248,6 +248,36 @@ static int ext4_ext_space_root_idx(struct inode *inode)
248 return size; 248 return size;
249} 249}
250 250
251/*
252 * Calculate the number of metadata blocks needed
253 * to allocate @blocks
254 * Worse case is one block per extent
255 */
256int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
257{
258 int lcap, icap, rcap, leafs, idxs, num;
259 int newextents = blocks;
260
261 rcap = ext4_ext_space_root_idx(inode);
262 lcap = ext4_ext_space_block(inode);
263 icap = ext4_ext_space_block_idx(inode);
264
265 /* number of new leaf blocks needed */
266 num = leafs = (newextents + lcap - 1) / lcap;
267
268 /*
269 * Worse case, we need separate index block(s)
270 * to link all new leaf blocks
271 */
272 idxs = (leafs + icap - 1) / icap;
273 do {
274 num += idxs;
275 idxs = (idxs + icap - 1) / icap;
276 } while (idxs > rcap);
277
278 return num;
279}
280
251static int 281static int
252ext4_ext_max_entries(struct inode *inode, int depth) 282ext4_ext_max_entries(struct inode *inode, int depth)
253{ 283{
@@ -2910,7 +2940,7 @@ retry:
2910 } 2940 }
2911 ret = ext4_get_blocks_wrap(handle, inode, block, 2941 ret = ext4_get_blocks_wrap(handle, inode, block,
2912 max_blocks, &map_bh, 2942 max_blocks, &map_bh,
2913 EXT4_CREATE_UNINITIALIZED_EXT, 0); 2943 EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
2914 if (ret <= 0) { 2944 if (ret <= 0) {
2915#ifdef EXT4FS_DEBUG 2945#ifdef EXT4FS_DEBUG
2916 WARN_ON(ret <= 0); 2946 WARN_ON(ret <= 0);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2bef4f879e4b..a6b800c58474 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -39,6 +39,7 @@
39#include "ext4_jbd2.h" 39#include "ext4_jbd2.h"
40#include "xattr.h" 40#include "xattr.h"
41#include "acl.h" 41#include "acl.h"
42#include "ext4_extents.h"
42 43
43static inline int ext4_begin_ordered_truncate(struct inode *inode, 44static inline int ext4_begin_ordered_truncate(struct inode *inode,
44 loff_t new_size) 45 loff_t new_size)
@@ -982,7 +983,7 @@ out:
982 */ 983 */
983int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 984int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
984 unsigned long max_blocks, struct buffer_head *bh, 985 unsigned long max_blocks, struct buffer_head *bh,
985 int create, int extend_disksize) 986 int create, int extend_disksize, int flag)
986{ 987{
987 int retval; 988 int retval;
988 989
@@ -1023,6 +1024,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1023 * with create == 1 flag. 1024 * with create == 1 flag.
1024 */ 1025 */
1025 down_write((&EXT4_I(inode)->i_data_sem)); 1026 down_write((&EXT4_I(inode)->i_data_sem));
1027
1028 /*
1029 * if the caller is from delayed allocation writeout path
1030 * we have already reserved fs blocks for allocation
1031 * let the underlying get_block() function know to
1032 * avoid double accounting
1033 */
1034 if (flag)
1035 EXT4_I(inode)->i_delalloc_reserved_flag = 1;
1026 /* 1036 /*
1027 * We need to check for EXT4 here because migrate 1037 * We need to check for EXT4 here because migrate
1028 * could have changed the inode type in between 1038 * could have changed the inode type in between
@@ -1044,6 +1054,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1044 ~EXT4_EXT_MIGRATE; 1054 ~EXT4_EXT_MIGRATE;
1045 } 1055 }
1046 } 1056 }
1057
1058 if (flag) {
1059 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1060 /*
1061 * Update reserved blocks/metadata blocks
1062 * after successful block allocation
1063 * which were deferred till now
1064 */
1065 if ((retval > 0) && buffer_delay(bh))
1066 ext4_da_release_space(inode, retval, 0);
1067 }
1068
1047 up_write((&EXT4_I(inode)->i_data_sem)); 1069 up_write((&EXT4_I(inode)->i_data_sem));
1048 return retval; 1070 return retval;
1049} 1071}
@@ -1069,7 +1091,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
1069 } 1091 }
1070 1092
1071 ret = ext4_get_blocks_wrap(handle, inode, iblock, 1093 ret = ext4_get_blocks_wrap(handle, inode, iblock,
1072 max_blocks, bh_result, create, 0); 1094 max_blocks, bh_result, create, 0, 0);
1073 if (ret > 0) { 1095 if (ret > 0) {
1074 bh_result->b_size = (ret << inode->i_blkbits); 1096 bh_result->b_size = (ret << inode->i_blkbits);
1075 ret = 0; 1097 ret = 0;
@@ -1095,7 +1117,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1095 dummy.b_blocknr = -1000; 1117 dummy.b_blocknr = -1000;
1096 buffer_trace_init(&dummy.b_history); 1118 buffer_trace_init(&dummy.b_history);
1097 err = ext4_get_blocks_wrap(handle, inode, block, 1, 1119 err = ext4_get_blocks_wrap(handle, inode, block, 1,
1098 &dummy, create, 1); 1120 &dummy, create, 1, 0);
1099 /* 1121 /*
1100 * ext4_get_blocks_handle() returns number of blocks 1122 * ext4_get_blocks_handle() returns number of blocks
1101 * mapped. 0 in case of a HOLE. 1123 * mapped. 0 in case of a HOLE.
@@ -1409,6 +1431,122 @@ static int ext4_journalled_write_end(struct file *file,
1409 1431
1410 return ret ? ret : copied; 1432 return ret ? ret : copied;
1411} 1433}
1434/*
1435 * Calculate the number of metadata blocks need to reserve
1436 * to allocate @blocks for non extent file based file
1437 */
1438static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
1439{
1440 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1441 int ind_blks, dind_blks, tind_blks;
1442
1443 /* number of new indirect blocks needed */
1444 ind_blks = (blocks + icap - 1) / icap;
1445
1446 dind_blks = (ind_blks + icap - 1) / icap;
1447
1448 tind_blks = 1;
1449
1450 return ind_blks + dind_blks + tind_blks;
1451}
1452
1453/*
1454 * Calculate the number of metadata blocks need to reserve
1455 * to allocate given number of blocks
1456 */
1457static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1458{
1459 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1460 return ext4_ext_calc_metadata_amount(inode, blocks);
1461
1462 return ext4_indirect_calc_metadata_amount(inode, blocks);
1463}
1464
1465static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1466{
1467 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1468 unsigned long md_needed, mdblocks, total = 0;
1469
1470 /*
1471 * recalculate the amount of metadata blocks to reserve
1472 * in order to allocate nrblocks
1473 * worse case is one extent per block
1474 */
1475 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1476 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
1477 mdblocks = ext4_calc_metadata_amount(inode, total);
1478 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
1479
1480 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1481 total = md_needed + nrblocks;
1482
1483 if (ext4_has_free_blocks(sbi, total) < total) {
1484 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1485 return -ENOSPC;
1486 }
1487
1488 /* reduce fs free blocks counter */
1489 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1490
1491 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
1492 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
1493
1494 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1495 return 0; /* success */
1496}
1497
1498void ext4_da_release_space(struct inode *inode, int used, int to_free)
1499{
1500 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1501 int total, mdb, mdb_free, release;
1502
1503 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1504 /* recalculate the number of metablocks still need to be reserved */
1505 total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
1506 mdb = ext4_calc_metadata_amount(inode, total);
1507
1508 /* figure out how many metablocks to release */
1509 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1510 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1511
1512 /* Account for allocated meta_blocks */
1513 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1514
1515 release = to_free + mdb_free;
1516
1517 /* update fs free blocks counter for truncate case */
1518 percpu_counter_add(&sbi->s_freeblocks_counter, release);
1519
1520 /* update per-inode reservations */
1521 BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
1522 EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
1523
1524 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1525 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1526 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1527 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1528}
1529
1530static void ext4_da_page_release_reservation(struct page *page,
1531 unsigned long offset)
1532{
1533 int to_release = 0;
1534 struct buffer_head *head, *bh;
1535 unsigned int curr_off = 0;
1536
1537 head = page_buffers(page);
1538 bh = head;
1539 do {
1540 unsigned int next_off = curr_off + bh->b_size;
1541
1542 if ((offset <= curr_off) && (buffer_delay(bh))) {
1543 to_release++;
1544 clear_buffer_delay(bh);
1545 }
1546 curr_off = next_off;
1547 } while ((bh = bh->b_this_page) != head);
1548 ext4_da_release_space(page->mapping->host, 0, to_release);
1549}
1412 1550
1413/* 1551/*
1414 * Delayed allocation stuff 1552 * Delayed allocation stuff
@@ -1829,14 +1967,18 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1829 * preallocated blocks are unmapped but should treated 1967 * preallocated blocks are unmapped but should treated
1830 * the same as allocated blocks. 1968 * the same as allocated blocks.
1831 */ 1969 */
1832 ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0); 1970 ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0);
1833 if (ret == 0) { 1971 if ((ret == 0) && !buffer_delay(bh_result)) {
1834 /* the block isn't allocated yet, let's reserve space */ 1972 /* the block isn't (pre)allocated yet, let's reserve space */
1835 /* XXX: call reservation here */
1836 /* 1973 /*
1837 * XXX: __block_prepare_write() unmaps passed block, 1974 * XXX: __block_prepare_write() unmaps passed block,
1838 * is it OK? 1975 * is it OK?
1839 */ 1976 */
1977 ret = ext4_da_reserve_space(inode, 1);
1978 if (ret)
1979 /* not enough space to reserve */
1980 return ret;
1981
1840 map_bh(bh_result, inode->i_sb, 0); 1982 map_bh(bh_result, inode->i_sb, 0);
1841 set_buffer_new(bh_result); 1983 set_buffer_new(bh_result);
1842 set_buffer_delay(bh_result); 1984 set_buffer_delay(bh_result);
@@ -1847,7 +1989,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1847 1989
1848 return ret; 1990 return ret;
1849} 1991}
1850 1992#define EXT4_DELALLOC_RSVED 1
1851static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, 1993static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1852 struct buffer_head *bh_result, int create) 1994 struct buffer_head *bh_result, int create)
1853{ 1995{
@@ -1865,7 +2007,7 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1865 } 2007 }
1866 2008
1867 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, 2009 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
1868 bh_result, create, 0); 2010 bh_result, create, 0, EXT4_DELALLOC_RSVED);
1869 if (ret > 0) { 2011 if (ret > 0) {
1870 bh_result->b_size = (ret << inode->i_blkbits); 2012 bh_result->b_size = (ret << inode->i_blkbits);
1871 2013
@@ -1952,7 +2094,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
1952 loff_t pos, unsigned len, unsigned flags, 2094 loff_t pos, unsigned len, unsigned flags,
1953 struct page **pagep, void **fsdata) 2095 struct page **pagep, void **fsdata)
1954{ 2096{
1955 int ret; 2097 int ret, retries = 0;
1956 struct page *page; 2098 struct page *page;
1957 pgoff_t index; 2099 pgoff_t index;
1958 unsigned from, to; 2100 unsigned from, to;
@@ -1963,6 +2105,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
1963 from = pos & (PAGE_CACHE_SIZE - 1); 2105 from = pos & (PAGE_CACHE_SIZE - 1);
1964 to = from + len; 2106 to = from + len;
1965 2107
2108retry:
1966 /* 2109 /*
1967 * With delayed allocation, we don't log the i_disksize update 2110 * With delayed allocation, we don't log the i_disksize update
1968 * if there is delayed block allocation. But we still need 2111 * if there is delayed block allocation. But we still need
@@ -1988,6 +2131,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
1988 page_cache_release(page); 2131 page_cache_release(page);
1989 } 2132 }
1990 2133
2134 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2135 goto retry;
1991out: 2136out:
1992 return ret; 2137 return ret;
1993} 2138}
@@ -2040,9 +2185,6 @@ static int ext4_da_write_end(struct file *file,
2040 2185
2041static void ext4_da_invalidatepage(struct page *page, unsigned long offset) 2186static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
2042{ 2187{
2043 struct buffer_head *head, *bh;
2044 unsigned int curr_off = 0;
2045
2046 /* 2188 /*
2047 * Drop reserved blocks 2189 * Drop reserved blocks
2048 */ 2190 */
@@ -2050,21 +2192,7 @@ static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
2050 if (!page_has_buffers(page)) 2192 if (!page_has_buffers(page))
2051 goto out; 2193 goto out;
2052 2194
2053 head = page_buffers(page); 2195 ext4_da_page_release_reservation(page, offset);
2054 bh = head;
2055 do {
2056 unsigned int next_off = curr_off + bh->b_size;
2057
2058 /*
2059 * is this block fully invalidated?
2060 */
2061 if (offset <= curr_off && buffer_delay(bh)) {
2062 clear_buffer_delay(bh);
2063 /* XXX: add real stuff here */
2064 }
2065 curr_off = next_off;
2066 bh = bh->b_this_page;
2067 } while (bh != head);
2068 2196
2069out: 2197out:
2070 ext4_invalidatepage(page, offset); 2198 ext4_invalidatepage(page, offset);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d254ca83d9e..8d141a25bbee 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2964,7 +2964,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2964 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); 2964 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
2965 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2965 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2966 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2966 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
2967 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2967
2968 /*
2969 * free blocks account has already be reduced/reserved
2970 * at write_begin() time for delayed allocation
2971 * do not double accounting
2972 */
2973 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2974 percpu_counter_sub(&sbi->s_freeblocks_counter,
2975 ac->ac_b_ex.fe_len);
2968 2976
2969 if (sbi->s_log_groups_per_flex) { 2977 if (sbi->s_log_groups_per_flex) {
2970 ext4_group_t flex_group = ext4_flex_group(sbi, 2978 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -4169,7 +4177,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4169 &(ar->len), errp); 4177 &(ar->len), errp);
4170 return block; 4178 return block;
4171 } 4179 }
4172 ar->len = ext4_has_free_blocks(sbi, ar->len); 4180 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
4181 /*
4182 * With delalloc we already reserved the blocks
4183 */
4184 ar->len = ext4_has_free_blocks(sbi, ar->len);
4185 }
4173 4186
4174 if (ar->len == 0) { 4187 if (ar->len == 0) {
4175 *errp = -ENOSPC; 4188 *errp = -ENOSPC;
@@ -4186,6 +4199,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4186 } 4199 }
4187 inquota = ar->len; 4200 inquota = ar->len;
4188 4201
4202 if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4203 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4204
4189 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4205 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4190 if (!ac) { 4206 if (!ac) {
4191 ar->len = 0; 4207 ar->len = 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index de9d3d0eb203..25e2f2488cd2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -574,6 +574,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
574 INIT_LIST_HEAD(&ei->i_prealloc_list); 574 INIT_LIST_HEAD(&ei->i_prealloc_list);
575 spin_lock_init(&ei->i_prealloc_lock); 575 spin_lock_init(&ei->i_prealloc_lock);
576 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 576 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
577 ei->i_reserved_data_blocks = 0;
578 ei->i_reserved_meta_blocks = 0;
579 ei->i_allocated_meta_blocks = 0;
580 ei->i_delalloc_reserved_flag = 0;
581 spin_lock_init(&(ei->i_block_reservation_lock));
577 return &ei->vfs_inode; 582 return &ei->vfs_inode;
578} 583}
579 584