aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmit Arora <aarora@in.ibm.com>2007-07-17 21:42:41 -0400
committerTheodore Ts'o <tytso@mit.edu>2007-07-17 21:42:41 -0400
commita2df2a63407803a833f82e1fa6693826c8c9d584 (patch)
treedb761e8d82dbbe53e166511a9f8af9162541a212
parent97ac73506c0ba93f30239bb57b4cfc5d73e68a62 (diff)
fallocate support in ext4
This patch implements ->fallocate() inode operation in ext4. With this patch users of ext4 file systems will be able to use fallocate() system call for persistent preallocation. Current implementation only supports preallocation for regular files (directories not supported as of date) with extent maps. This patch does not support block-mapped files currently. Only FALLOC_ALLOCATE and FALLOC_RESV_SPACE modes are being supported as of now. Signed-off-by: Amit Arora <aarora@in.ibm.com>
-rw-r--r--fs/ext4/extents.c249
-rw-r--r--fs/ext4/file.c1
-rw-r--r--include/linux/ext4_fs.h8
-rw-r--r--include/linux/ext4_fs_extents.h15
4 files changed, 232 insertions, 41 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b9ce24129070..ba25832a756c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -39,6 +39,7 @@
39#include <linux/quotaops.h> 39#include <linux/quotaops.h>
40#include <linux/string.h> 40#include <linux/string.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/falloc.h>
42#include <linux/ext4_fs_extents.h> 43#include <linux/ext4_fs_extents.h>
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44 45
@@ -282,7 +283,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
282 } else if (path->p_ext) { 283 } else if (path->p_ext) {
283 ext_debug(" %d:%d:%llu ", 284 ext_debug(" %d:%d:%llu ",
284 le32_to_cpu(path->p_ext->ee_block), 285 le32_to_cpu(path->p_ext->ee_block),
285 le16_to_cpu(path->p_ext->ee_len), 286 ext4_ext_get_actual_len(path->p_ext),
286 ext_pblock(path->p_ext)); 287 ext_pblock(path->p_ext));
287 } else 288 } else
288 ext_debug(" []"); 289 ext_debug(" []");
@@ -305,7 +306,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
305 306
306 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 307 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
307 ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), 308 ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block),
308 le16_to_cpu(ex->ee_len), ext_pblock(ex)); 309 ext4_ext_get_actual_len(ex), ext_pblock(ex));
309 } 310 }
310 ext_debug("\n"); 311 ext_debug("\n");
311} 312}
@@ -425,7 +426,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
425 ext_debug(" -> %d:%llu:%d ", 426 ext_debug(" -> %d:%llu:%d ",
426 le32_to_cpu(path->p_ext->ee_block), 427 le32_to_cpu(path->p_ext->ee_block),
427 ext_pblock(path->p_ext), 428 ext_pblock(path->p_ext),
428 le16_to_cpu(path->p_ext->ee_len)); 429 ext4_ext_get_actual_len(path->p_ext));
429 430
430#ifdef CHECK_BINSEARCH 431#ifdef CHECK_BINSEARCH
431 { 432 {
@@ -686,7 +687,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
686 ext_debug("move %d:%llu:%d in new leaf %llu\n", 687 ext_debug("move %d:%llu:%d in new leaf %llu\n",
687 le32_to_cpu(path[depth].p_ext->ee_block), 688 le32_to_cpu(path[depth].p_ext->ee_block),
688 ext_pblock(path[depth].p_ext), 689 ext_pblock(path[depth].p_ext),
689 le16_to_cpu(path[depth].p_ext->ee_len), 690 ext4_ext_get_actual_len(path[depth].p_ext),
690 newblock); 691 newblock);
691 /*memmove(ex++, path[depth].p_ext++, 692 /*memmove(ex++, path[depth].p_ext++,
692 sizeof(struct ext4_extent)); 693 sizeof(struct ext4_extent));
@@ -1106,7 +1107,19 @@ static int
1106ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1107ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1107 struct ext4_extent *ex2) 1108 struct ext4_extent *ex2)
1108{ 1109{
1109 if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) != 1110 unsigned short ext1_ee_len, ext2_ee_len;
1111
1112 /*
1113 * Make sure that either both extents are uninitialized, or
1114 * both are _not_.
1115 */
1116 if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
1117 return 0;
1118
1119 ext1_ee_len = ext4_ext_get_actual_len(ex1);
1120 ext2_ee_len = ext4_ext_get_actual_len(ex2);
1121
1122 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1110 le32_to_cpu(ex2->ee_block)) 1123 le32_to_cpu(ex2->ee_block))
1111 return 0; 1124 return 0;
1112 1125
@@ -1115,14 +1128,14 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1115 * as an RO_COMPAT feature, refuse to merge to extents if 1128 * as an RO_COMPAT feature, refuse to merge to extents if
1116 * this can result in the top bit of ee_len being set. 1129 * this can result in the top bit of ee_len being set.
1117 */ 1130 */
1118 if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN) 1131 if (ext1_ee_len + ext2_ee_len > EXT_MAX_LEN)
1119 return 0; 1132 return 0;
1120#ifdef AGGRESSIVE_TEST 1133#ifdef AGGRESSIVE_TEST
1121 if (le16_to_cpu(ex1->ee_len) >= 4) 1134 if (le16_to_cpu(ex1->ee_len) >= 4)
1122 return 0; 1135 return 0;
1123#endif 1136#endif
1124 1137
1125 if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2)) 1138 if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
1126 return 1; 1139 return 1;
1127 return 0; 1140 return 0;
1128} 1141}
@@ -1144,7 +1157,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
1144 unsigned int ret = 0; 1157 unsigned int ret = 0;
1145 1158
1146 b1 = le32_to_cpu(newext->ee_block); 1159 b1 = le32_to_cpu(newext->ee_block);
1147 len1 = le16_to_cpu(newext->ee_len); 1160 len1 = ext4_ext_get_actual_len(newext);
1148 depth = ext_depth(inode); 1161 depth = ext_depth(inode);
1149 if (!path[depth].p_ext) 1162 if (!path[depth].p_ext)
1150 goto out; 1163 goto out;
@@ -1191,8 +1204,9 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1191 struct ext4_extent *nearex; /* nearest extent */ 1204 struct ext4_extent *nearex; /* nearest extent */
1192 struct ext4_ext_path *npath = NULL; 1205 struct ext4_ext_path *npath = NULL;
1193 int depth, len, err, next; 1206 int depth, len, err, next;
1207 unsigned uninitialized = 0;
1194 1208
1195 BUG_ON(newext->ee_len == 0); 1209 BUG_ON(ext4_ext_get_actual_len(newext) == 0);
1196 depth = ext_depth(inode); 1210 depth = ext_depth(inode);
1197 ex = path[depth].p_ext; 1211 ex = path[depth].p_ext;
1198 BUG_ON(path[depth].p_hdr == NULL); 1212 BUG_ON(path[depth].p_hdr == NULL);
@@ -1200,14 +1214,24 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1200 /* try to insert block into found extent and return */ 1214 /* try to insert block into found extent and return */
1201 if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { 1215 if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
1202 ext_debug("append %d block to %d:%d (from %llu)\n", 1216 ext_debug("append %d block to %d:%d (from %llu)\n",
1203 le16_to_cpu(newext->ee_len), 1217 ext4_ext_get_actual_len(newext),
1204 le32_to_cpu(ex->ee_block), 1218 le32_to_cpu(ex->ee_block),
1205 le16_to_cpu(ex->ee_len), ext_pblock(ex)); 1219 ext4_ext_get_actual_len(ex), ext_pblock(ex));
1206 err = ext4_ext_get_access(handle, inode, path + depth); 1220 err = ext4_ext_get_access(handle, inode, path + depth);
1207 if (err) 1221 if (err)
1208 return err; 1222 return err;
1209 ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len) 1223
1210 + le16_to_cpu(newext->ee_len)); 1224 /*
1225 * ext4_can_extents_be_merged should have checked that either
1226 * both extents are uninitialized, or both aren't. Thus we
1227 * need to check only one of them here.
1228 */
1229 if (ext4_ext_is_uninitialized(ex))
1230 uninitialized = 1;
1231 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1232 + ext4_ext_get_actual_len(newext));
1233 if (uninitialized)
1234 ext4_ext_mark_uninitialized(ex);
1211 eh = path[depth].p_hdr; 1235 eh = path[depth].p_hdr;
1212 nearex = ex; 1236 nearex = ex;
1213 goto merge; 1237 goto merge;
@@ -1263,7 +1287,7 @@ has_space:
1263 ext_debug("first extent in the leaf: %d:%llu:%d\n", 1287 ext_debug("first extent in the leaf: %d:%llu:%d\n",
1264 le32_to_cpu(newext->ee_block), 1288 le32_to_cpu(newext->ee_block),
1265 ext_pblock(newext), 1289 ext_pblock(newext),
1266 le16_to_cpu(newext->ee_len)); 1290 ext4_ext_get_actual_len(newext));
1267 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1291 path[depth].p_ext = EXT_FIRST_EXTENT(eh);
1268 } else if (le32_to_cpu(newext->ee_block) 1292 } else if (le32_to_cpu(newext->ee_block)
1269 > le32_to_cpu(nearex->ee_block)) { 1293 > le32_to_cpu(nearex->ee_block)) {
@@ -1276,7 +1300,7 @@ has_space:
1276 "move %d from 0x%p to 0x%p\n", 1300 "move %d from 0x%p to 0x%p\n",
1277 le32_to_cpu(newext->ee_block), 1301 le32_to_cpu(newext->ee_block),
1278 ext_pblock(newext), 1302 ext_pblock(newext),
1279 le16_to_cpu(newext->ee_len), 1303 ext4_ext_get_actual_len(newext),
1280 nearex, len, nearex + 1, nearex + 2); 1304 nearex, len, nearex + 1, nearex + 2);
1281 memmove(nearex + 2, nearex + 1, len); 1305 memmove(nearex + 2, nearex + 1, len);
1282 } 1306 }
@@ -1289,7 +1313,7 @@ has_space:
1289 "move %d from 0x%p to 0x%p\n", 1313 "move %d from 0x%p to 0x%p\n",
1290 le32_to_cpu(newext->ee_block), 1314 le32_to_cpu(newext->ee_block),
1291 ext_pblock(newext), 1315 ext_pblock(newext),
1292 le16_to_cpu(newext->ee_len), 1316 ext4_ext_get_actual_len(newext),
1293 nearex, len, nearex + 1, nearex + 2); 1317 nearex, len, nearex + 1, nearex + 2);
1294 memmove(nearex + 1, nearex, len); 1318 memmove(nearex + 1, nearex, len);
1295 path[depth].p_ext = nearex; 1319 path[depth].p_ext = nearex;
@@ -1308,8 +1332,13 @@ merge:
1308 if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) 1332 if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1))
1309 break; 1333 break;
1310 /* merge with next extent! */ 1334 /* merge with next extent! */
1311 nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len) 1335 if (ext4_ext_is_uninitialized(nearex))
1312 + le16_to_cpu(nearex[1].ee_len)); 1336 uninitialized = 1;
1337 nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex)
1338 + ext4_ext_get_actual_len(nearex + 1));
1339 if (uninitialized)
1340 ext4_ext_mark_uninitialized(nearex);
1341
1313 if (nearex + 1 < EXT_LAST_EXTENT(eh)) { 1342 if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
1314 len = (EXT_LAST_EXTENT(eh) - nearex - 1) 1343 len = (EXT_LAST_EXTENT(eh) - nearex - 1)
1315 * sizeof(struct ext4_extent); 1344 * sizeof(struct ext4_extent);
@@ -1379,8 +1408,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
1379 end = le32_to_cpu(ex->ee_block); 1408 end = le32_to_cpu(ex->ee_block);
1380 if (block + num < end) 1409 if (block + num < end)
1381 end = block + num; 1410 end = block + num;
1382 } else if (block >= 1411 } else if (block >= le32_to_cpu(ex->ee_block)
1383 le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) { 1412 + ext4_ext_get_actual_len(ex)) {
1384 /* need to allocate space after found extent */ 1413 /* need to allocate space after found extent */
1385 start = block; 1414 start = block;
1386 end = block + num; 1415 end = block + num;
@@ -1392,7 +1421,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
1392 * by found extent 1421 * by found extent
1393 */ 1422 */
1394 start = block; 1423 start = block;
1395 end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len); 1424 end = le32_to_cpu(ex->ee_block)
1425 + ext4_ext_get_actual_len(ex);
1396 if (block + num < end) 1426 if (block + num < end)
1397 end = block + num; 1427 end = block + num;
1398 exists = 1; 1428 exists = 1;
@@ -1408,7 +1438,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
1408 cbex.ec_type = EXT4_EXT_CACHE_GAP; 1438 cbex.ec_type = EXT4_EXT_CACHE_GAP;
1409 } else { 1439 } else {
1410 cbex.ec_block = le32_to_cpu(ex->ee_block); 1440 cbex.ec_block = le32_to_cpu(ex->ee_block);
1411 cbex.ec_len = le16_to_cpu(ex->ee_len); 1441 cbex.ec_len = ext4_ext_get_actual_len(ex);
1412 cbex.ec_start = ext_pblock(ex); 1442 cbex.ec_start = ext_pblock(ex);
1413 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1443 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1414 } 1444 }
@@ -1481,15 +1511,15 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
1481 ext_debug("cache gap(before): %lu [%lu:%lu]", 1511 ext_debug("cache gap(before): %lu [%lu:%lu]",
1482 (unsigned long) block, 1512 (unsigned long) block,
1483 (unsigned long) le32_to_cpu(ex->ee_block), 1513 (unsigned long) le32_to_cpu(ex->ee_block),
1484 (unsigned long) le16_to_cpu(ex->ee_len)); 1514 (unsigned long) ext4_ext_get_actual_len(ex));
1485 } else if (block >= le32_to_cpu(ex->ee_block) 1515 } else if (block >= le32_to_cpu(ex->ee_block)
1486 + le16_to_cpu(ex->ee_len)) { 1516 + ext4_ext_get_actual_len(ex)) {
1487 lblock = le32_to_cpu(ex->ee_block) 1517 lblock = le32_to_cpu(ex->ee_block)
1488 + le16_to_cpu(ex->ee_len); 1518 + ext4_ext_get_actual_len(ex);
1489 len = ext4_ext_next_allocated_block(path); 1519 len = ext4_ext_next_allocated_block(path);
1490 ext_debug("cache gap(after): [%lu:%lu] %lu", 1520 ext_debug("cache gap(after): [%lu:%lu] %lu",
1491 (unsigned long) le32_to_cpu(ex->ee_block), 1521 (unsigned long) le32_to_cpu(ex->ee_block),
1492 (unsigned long) le16_to_cpu(ex->ee_len), 1522 (unsigned long) ext4_ext_get_actual_len(ex),
1493 (unsigned long) block); 1523 (unsigned long) block);
1494 BUG_ON(len == lblock); 1524 BUG_ON(len == lblock);
1495 len = len - lblock; 1525 len = len - lblock;
@@ -1619,12 +1649,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
1619 unsigned long from, unsigned long to) 1649 unsigned long from, unsigned long to)
1620{ 1650{
1621 struct buffer_head *bh; 1651 struct buffer_head *bh;
1652 unsigned short ee_len = ext4_ext_get_actual_len(ex);
1622 int i; 1653 int i;
1623 1654
1624#ifdef EXTENTS_STATS 1655#ifdef EXTENTS_STATS
1625 { 1656 {
1626 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1657 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1627 unsigned short ee_len = le16_to_cpu(ex->ee_len);
1628 spin_lock(&sbi->s_ext_stats_lock); 1658 spin_lock(&sbi->s_ext_stats_lock);
1629 sbi->s_ext_blocks += ee_len; 1659 sbi->s_ext_blocks += ee_len;
1630 sbi->s_ext_extents++; 1660 sbi->s_ext_extents++;
@@ -1638,12 +1668,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
1638 } 1668 }
1639#endif 1669#endif
1640 if (from >= le32_to_cpu(ex->ee_block) 1670 if (from >= le32_to_cpu(ex->ee_block)
1641 && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { 1671 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
1642 /* tail removal */ 1672 /* tail removal */
1643 unsigned long num; 1673 unsigned long num;
1644 ext4_fsblk_t start; 1674 ext4_fsblk_t start;
1645 num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from; 1675 num = le32_to_cpu(ex->ee_block) + ee_len - from;
1646 start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num; 1676 start = ext_pblock(ex) + ee_len - num;
1647 ext_debug("free last %lu blocks starting %llu\n", num, start); 1677 ext_debug("free last %lu blocks starting %llu\n", num, start);
1648 for (i = 0; i < num; i++) { 1678 for (i = 0; i < num; i++) {
1649 bh = sb_find_get_block(inode->i_sb, start + i); 1679 bh = sb_find_get_block(inode->i_sb, start + i);
@@ -1651,12 +1681,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
1651 } 1681 }
1652 ext4_free_blocks(handle, inode, start, num); 1682 ext4_free_blocks(handle, inode, start, num);
1653 } else if (from == le32_to_cpu(ex->ee_block) 1683 } else if (from == le32_to_cpu(ex->ee_block)
1654 && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { 1684 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
1655 printk("strange request: removal %lu-%lu from %u:%u\n", 1685 printk("strange request: removal %lu-%lu from %u:%u\n",
1656 from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); 1686 from, to, le32_to_cpu(ex->ee_block), ee_len);
1657 } else { 1687 } else {
1658 printk("strange request: removal(2) %lu-%lu from %u:%u\n", 1688 printk("strange request: removal(2) %lu-%lu from %u:%u\n",
1659 from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); 1689 from, to, le32_to_cpu(ex->ee_block), ee_len);
1660 } 1690 }
1661 return 0; 1691 return 0;
1662} 1692}
@@ -1671,6 +1701,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1671 unsigned a, b, block, num; 1701 unsigned a, b, block, num;
1672 unsigned long ex_ee_block; 1702 unsigned long ex_ee_block;
1673 unsigned short ex_ee_len; 1703 unsigned short ex_ee_len;
1704 unsigned uninitialized = 0;
1674 struct ext4_extent *ex; 1705 struct ext4_extent *ex;
1675 1706
1676 ext_debug("truncate since %lu in leaf\n", start); 1707 ext_debug("truncate since %lu in leaf\n", start);
@@ -1685,7 +1716,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1685 ex = EXT_LAST_EXTENT(eh); 1716 ex = EXT_LAST_EXTENT(eh);
1686 1717
1687 ex_ee_block = le32_to_cpu(ex->ee_block); 1718 ex_ee_block = le32_to_cpu(ex->ee_block);
1688 ex_ee_len = le16_to_cpu(ex->ee_len); 1719 if (ext4_ext_is_uninitialized(ex))
1720 uninitialized = 1;
1721 ex_ee_len = ext4_ext_get_actual_len(ex);
1689 1722
1690 while (ex >= EXT_FIRST_EXTENT(eh) && 1723 while (ex >= EXT_FIRST_EXTENT(eh) &&
1691 ex_ee_block + ex_ee_len > start) { 1724 ex_ee_block + ex_ee_len > start) {
@@ -1753,6 +1786,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1753 1786
1754 ex->ee_block = cpu_to_le32(block); 1787 ex->ee_block = cpu_to_le32(block);
1755 ex->ee_len = cpu_to_le16(num); 1788 ex->ee_len = cpu_to_le16(num);
1789 if (uninitialized)
1790 ext4_ext_mark_uninitialized(ex);
1756 1791
1757 err = ext4_ext_dirty(handle, inode, path + depth); 1792 err = ext4_ext_dirty(handle, inode, path + depth);
1758 if (err) 1793 if (err)
@@ -1762,7 +1797,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1762 ext_pblock(ex)); 1797 ext_pblock(ex));
1763 ex--; 1798 ex--;
1764 ex_ee_block = le32_to_cpu(ex->ee_block); 1799 ex_ee_block = le32_to_cpu(ex->ee_block);
1765 ex_ee_len = le16_to_cpu(ex->ee_len); 1800 ex_ee_len = ext4_ext_get_actual_len(ex);
1766 } 1801 }
1767 1802
1768 if (correct_index && eh->eh_entries) 1803 if (correct_index && eh->eh_entries)
@@ -2038,7 +2073,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2038 if (ex) { 2073 if (ex) {
2039 unsigned long ee_block = le32_to_cpu(ex->ee_block); 2074 unsigned long ee_block = le32_to_cpu(ex->ee_block);
2040 ext4_fsblk_t ee_start = ext_pblock(ex); 2075 ext4_fsblk_t ee_start = ext_pblock(ex);
2041 unsigned short ee_len = le16_to_cpu(ex->ee_len); 2076 unsigned short ee_len;
2042 2077
2043 /* 2078 /*
2044 * Allow future support for preallocated extents to be added 2079 * Allow future support for preallocated extents to be added
@@ -2046,8 +2081,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2046 * Uninitialized extents are treated as holes, except that 2081 * Uninitialized extents are treated as holes, except that
2047 * we avoid (fail) allocating new blocks during a write. 2082 * we avoid (fail) allocating new blocks during a write.
2048 */ 2083 */
2049 if (ee_len > EXT_MAX_LEN) 2084 if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN)
2050 goto out2; 2085 goto out2;
2086 ee_len = ext4_ext_get_actual_len(ex);
2051 /* if found extent covers block, simply return it */ 2087 /* if found extent covers block, simply return it */
2052 if (iblock >= ee_block && iblock < ee_block + ee_len) { 2088 if (iblock >= ee_block && iblock < ee_block + ee_len) {
2053 newblock = iblock - ee_block + ee_start; 2089 newblock = iblock - ee_block + ee_start;
@@ -2055,8 +2091,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2055 allocated = ee_len - (iblock - ee_block); 2091 allocated = ee_len - (iblock - ee_block);
2056 ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, 2092 ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
2057 ee_block, ee_len, newblock); 2093 ee_block, ee_len, newblock);
2058 ext4_ext_put_in_cache(inode, ee_block, ee_len, 2094 /* Do not put uninitialized extent in the cache */
2059 ee_start, EXT4_EXT_CACHE_EXTENT); 2095 if (!ext4_ext_is_uninitialized(ex))
2096 ext4_ext_put_in_cache(inode, ee_block,
2097 ee_len, ee_start,
2098 EXT4_EXT_CACHE_EXTENT);
2060 goto out; 2099 goto out;
2061 } 2100 }
2062 } 2101 }
@@ -2098,6 +2137,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2098 /* try to insert new extent into found leaf and return */ 2137 /* try to insert new extent into found leaf and return */
2099 ext4_ext_store_pblock(&newex, newblock); 2138 ext4_ext_store_pblock(&newex, newblock);
2100 newex.ee_len = cpu_to_le16(allocated); 2139 newex.ee_len = cpu_to_le16(allocated);
2140 if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
2141 ext4_ext_mark_uninitialized(&newex);
2101 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2142 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2102 if (err) { 2143 if (err) {
2103 /* free data blocks we just allocated */ 2144 /* free data blocks we just allocated */
@@ -2113,8 +2154,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2113 newblock = ext_pblock(&newex); 2154 newblock = ext_pblock(&newex);
2114 __set_bit(BH_New, &bh_result->b_state); 2155 __set_bit(BH_New, &bh_result->b_state);
2115 2156
2116 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 2157 /* Cache only when it is _not_ an uninitialized extent */
2117 EXT4_EXT_CACHE_EXTENT); 2158 if (create != EXT4_CREATE_UNINITIALIZED_EXT)
2159 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
2160 EXT4_EXT_CACHE_EXTENT);
2118out: 2161out:
2119 if (allocated > max_blocks) 2162 if (allocated > max_blocks)
2120 allocated = max_blocks; 2163 allocated = max_blocks;
@@ -2217,3 +2260,127 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2217 2260
2218 return needed; 2261 return needed;
2219} 2262}
2263
2264/*
2265 * preallocate space for a file. This implements ext4's fallocate inode
2266 * operation, which gets called from sys_fallocate system call.
2267 * For block-mapped files, posix_fallocate should fall back to the method
2268 * of writing zeroes to the required new blocks (the same behavior which is
2269 * expected for file systems which do not support fallocate() system call).
2270 */
2271long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2272{
2273 handle_t *handle;
2274 ext4_fsblk_t block, max_blocks;
2275 ext4_fsblk_t nblocks = 0;
2276 int ret = 0;
2277 int ret2 = 0;
2278 int retries = 0;
2279 struct buffer_head map_bh;
2280 unsigned int credits, blkbits = inode->i_blkbits;
2281
2282 /*
2283 * currently supporting (pre)allocate mode for extent-based
2284 * files _only_
2285 */
2286 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
2287 return -EOPNOTSUPP;
2288
2289 /* preallocation to directories is currently not supported */
2290 if (S_ISDIR(inode->i_mode))
2291 return -ENODEV;
2292
2293 block = offset >> blkbits;
2294 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
2295 - block;
2296
2297 /*
2298 * credits to insert 1 extent into extent tree + buffers to be able to
2299 * modify 1 super block, 1 block bitmap and 1 group descriptor.
2300 */
2301 credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
2302retry:
2303 while (ret >= 0 && ret < max_blocks) {
2304 block = block + ret;
2305 max_blocks = max_blocks - ret;
2306 handle = ext4_journal_start(inode, credits);
2307 if (IS_ERR(handle)) {
2308 ret = PTR_ERR(handle);
2309 break;
2310 }
2311
2312 ret = ext4_ext_get_blocks(handle, inode, block,
2313 max_blocks, &map_bh,
2314 EXT4_CREATE_UNINITIALIZED_EXT, 0);
2315 WARN_ON(!ret);
2316 if (!ret) {
2317 ext4_error(inode->i_sb, "ext4_fallocate",
2318 "ext4_ext_get_blocks returned 0! inode#%lu"
2319 ", block=%llu, max_blocks=%llu",
2320 inode->i_ino, block, max_blocks);
2321 ret = -EIO;
2322 ext4_mark_inode_dirty(handle, inode);
2323 ret2 = ext4_journal_stop(handle);
2324 break;
2325 }
2326 if (ret > 0) {
2327 /* check wrap through sign-bit/zero here */
2328 if ((block + ret) < 0 || (block + ret) < block) {
2329 ret = -EIO;
2330 ext4_mark_inode_dirty(handle, inode);
2331 ret2 = ext4_journal_stop(handle);
2332 break;
2333 }
2334 if (buffer_new(&map_bh) && ((block + ret) >
2335 (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
2336 >> blkbits)))
2337 nblocks = nblocks + ret;
2338 }
2339
2340 /* Update ctime if new blocks get allocated */
2341 if (nblocks) {
2342 struct timespec now;
2343
2344 now = current_fs_time(inode->i_sb);
2345 if (!timespec_equal(&inode->i_ctime, &now))
2346 inode->i_ctime = now;
2347 }
2348
2349 ext4_mark_inode_dirty(handle, inode);
2350 ret2 = ext4_journal_stop(handle);
2351 if (ret2)
2352 break;
2353 }
2354
2355 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2356 goto retry;
2357
2358 /*
2359 * Time to update the file size.
2360 * Update only when preallocation was requested beyond the file size.
2361 */
2362 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2363 (offset + len) > i_size_read(inode)) {
2364 if (ret > 0) {
2365 /*
2366 * if no error, we assume preallocation succeeded
2367 * completely
2368 */
2369 mutex_lock(&inode->i_mutex);
2370 i_size_write(inode, offset + len);
2371 EXT4_I(inode)->i_disksize = i_size_read(inode);
2372 mutex_unlock(&inode->i_mutex);
2373 } else if (ret < 0 && nblocks) {
2374 /* Handle partial allocation scenario */
2375 loff_t newsize;
2376
2377 mutex_lock(&inode->i_mutex);
2378 newsize = (nblocks << blkbits) + i_size_read(inode);
2379 i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
2380 EXT4_I(inode)->i_disksize = i_size_read(inode);
2381 mutex_unlock(&inode->i_mutex);
2382 }
2383 }
2384
2385 return ret > 0 ? ret2 : ret;
2386}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d4c8186aed64..1a81cd66d63b 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -134,5 +134,6 @@ const struct inode_operations ext4_file_inode_operations = {
134 .removexattr = generic_removexattr, 134 .removexattr = generic_removexattr,
135#endif 135#endif
136 .permission = ext4_permission, 136 .permission = ext4_permission,
137 .fallocate = ext4_fallocate,
137}; 138};
138 139
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index de1f9f78625a..87c2d7a05b01 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -102,6 +102,7 @@
102 EXT4_GOOD_OLD_FIRST_INO : \ 102 EXT4_GOOD_OLD_FIRST_INO : \
103 (s)->s_first_ino) 103 (s)->s_first_ino)
104#endif 104#endif
105#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
105 106
106/* 107/*
107 * Macro-instructions used to manage fragments 108 * Macro-instructions used to manage fragments
@@ -225,6 +226,11 @@ struct ext4_new_group_data {
225 __u32 free_blocks_count; 226 __u32 free_blocks_count;
226}; 227};
227 228
229/*
230 * Following is used by preallocation code to tell get_blocks() that we
231 * want uninitialzed extents.
232 */
233#define EXT4_CREATE_UNINITIALIZED_EXT 2
228 234
229/* 235/*
230 * ioctl commands 236 * ioctl commands
@@ -983,6 +989,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
983extern void ext4_ext_truncate(struct inode *, struct page *); 989extern void ext4_ext_truncate(struct inode *, struct page *);
984extern void ext4_ext_init(struct super_block *); 990extern void ext4_ext_init(struct super_block *);
985extern void ext4_ext_release(struct super_block *); 991extern void ext4_ext_release(struct super_block *);
992extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
993 loff_t len);
986static inline int 994static inline int
987ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 995ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
988 unsigned long max_blocks, struct buffer_head *bh, 996 unsigned long max_blocks, struct buffer_head *bh,
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index acfe59740b03..e3d5afc6f23e 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -188,6 +188,21 @@ ext4_ext_invalidate_cache(struct inode *inode)
188 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; 188 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
189} 189}
190 190
191static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
192{
193 ext->ee_len |= cpu_to_le16(0x8000);
194}
195
196static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
197{
198 return (int)(le16_to_cpu((ext)->ee_len) & 0x8000);
199}
200
201static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
202{
203 return (int)(le16_to_cpu((ext)->ee_len) & 0x7FFF);
204}
205
191extern int ext4_extent_tree_init(handle_t *, struct inode *); 206extern int ext4_extent_tree_init(handle_t *, struct inode *);
192extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); 207extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
193extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); 208extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);