aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/extents.c249
-rw-r--r--fs/ext4/file.c1
-rw-r--r--include/linux/ext4_fs.h8
-rw-r--r--include/linux/ext4_fs_extents.h15
4 files changed, 232 insertions, 41 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b9ce24129070..ba25832a756c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -39,6 +39,7 @@
39#include <linux/quotaops.h> 39#include <linux/quotaops.h>
40#include <linux/string.h> 40#include <linux/string.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/falloc.h>
42#include <linux/ext4_fs_extents.h> 43#include <linux/ext4_fs_extents.h>
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44 45
@@ -282,7 +283,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
282 } else if (path->p_ext) { 283 } else if (path->p_ext) {
283 ext_debug(" %d:%d:%llu ", 284 ext_debug(" %d:%d:%llu ",
284 le32_to_cpu(path->p_ext->ee_block), 285 le32_to_cpu(path->p_ext->ee_block),
285 le16_to_cpu(path->p_ext->ee_len), 286 ext4_ext_get_actual_len(path->p_ext),
286 ext_pblock(path->p_ext)); 287 ext_pblock(path->p_ext));
287 } else 288 } else
288 ext_debug(" []"); 289 ext_debug(" []");
@@ -305,7 +306,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
305 306
306 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 307 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
307 ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), 308 ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block),
308 le16_to_cpu(ex->ee_len), ext_pblock(ex)); 309 ext4_ext_get_actual_len(ex), ext_pblock(ex));
309 } 310 }
310 ext_debug("\n"); 311 ext_debug("\n");
311} 312}
@@ -425,7 +426,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
425 ext_debug(" -> %d:%llu:%d ", 426 ext_debug(" -> %d:%llu:%d ",
426 le32_to_cpu(path->p_ext->ee_block), 427 le32_to_cpu(path->p_ext->ee_block),
427 ext_pblock(path->p_ext), 428 ext_pblock(path->p_ext),
428 le16_to_cpu(path->p_ext->ee_len)); 429 ext4_ext_get_actual_len(path->p_ext));
429 430
430#ifdef CHECK_BINSEARCH 431#ifdef CHECK_BINSEARCH
431 { 432 {
@@ -686,7 +687,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
686 ext_debug("move %d:%llu:%d in new leaf %llu\n", 687 ext_debug("move %d:%llu:%d in new leaf %llu\n",
687 le32_to_cpu(path[depth].p_ext->ee_block), 688 le32_to_cpu(path[depth].p_ext->ee_block),
688 ext_pblock(path[depth].p_ext), 689 ext_pblock(path[depth].p_ext),
689 le16_to_cpu(path[depth].p_ext->ee_len), 690 ext4_ext_get_actual_len(path[depth].p_ext),
690 newblock); 691 newblock);
691 /*memmove(ex++, path[depth].p_ext++, 692 /*memmove(ex++, path[depth].p_ext++,
692 sizeof(struct ext4_extent)); 693 sizeof(struct ext4_extent));
@@ -1106,7 +1107,19 @@ static int
1106ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1107ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1107 struct ext4_extent *ex2) 1108 struct ext4_extent *ex2)
1108{ 1109{
1109 if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) != 1110 unsigned short ext1_ee_len, ext2_ee_len;
1111
1112 /*
1113 * Make sure that either both extents are uninitialized, or
1114 * both are _not_.
1115 */
1116 if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
1117 return 0;
1118
1119 ext1_ee_len = ext4_ext_get_actual_len(ex1);
1120 ext2_ee_len = ext4_ext_get_actual_len(ex2);
1121
1122 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1110 le32_to_cpu(ex2->ee_block)) 1123 le32_to_cpu(ex2->ee_block))
1111 return 0; 1124 return 0;
1112 1125
@@ -1115,14 +1128,14 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1115 * as an RO_COMPAT feature, refuse to merge to extents if 1128 * as an RO_COMPAT feature, refuse to merge to extents if
1116 * this can result in the top bit of ee_len being set. 1129 * this can result in the top bit of ee_len being set.
1117 */ 1130 */
1118 if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN) 1131 if (ext1_ee_len + ext2_ee_len > EXT_MAX_LEN)
1119 return 0; 1132 return 0;
1120#ifdef AGGRESSIVE_TEST 1133#ifdef AGGRESSIVE_TEST
1121 if (le16_to_cpu(ex1->ee_len) >= 4) 1134 if (le16_to_cpu(ex1->ee_len) >= 4)
1122 return 0; 1135 return 0;
1123#endif 1136#endif
1124 1137
1125 if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2)) 1138 if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
1126 return 1; 1139 return 1;
1127 return 0; 1140 return 0;
1128} 1141}
@@ -1144,7 +1157,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
1144 unsigned int ret = 0; 1157 unsigned int ret = 0;
1145 1158
1146 b1 = le32_to_cpu(newext->ee_block); 1159 b1 = le32_to_cpu(newext->ee_block);
1147 len1 = le16_to_cpu(newext->ee_len); 1160 len1 = ext4_ext_get_actual_len(newext);
1148 depth = ext_depth(inode); 1161 depth = ext_depth(inode);
1149 if (!path[depth].p_ext) 1162 if (!path[depth].p_ext)
1150 goto out; 1163 goto out;
@@ -1191,8 +1204,9 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1191 struct ext4_extent *nearex; /* nearest extent */ 1204 struct ext4_extent *nearex; /* nearest extent */
1192 struct ext4_ext_path *npath = NULL; 1205 struct ext4_ext_path *npath = NULL;
1193 int depth, len, err, next; 1206 int depth, len, err, next;
1207 unsigned uninitialized = 0;
1194 1208
1195 BUG_ON(newext->ee_len == 0); 1209 BUG_ON(ext4_ext_get_actual_len(newext) == 0);
1196 depth = ext_depth(inode); 1210 depth = ext_depth(inode);
1197 ex = path[depth].p_ext; 1211 ex = path[depth].p_ext;
1198 BUG_ON(path[depth].p_hdr == NULL); 1212 BUG_ON(path[depth].p_hdr == NULL);
@@ -1200,14 +1214,24 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1200 /* try to insert block into found extent and return */ 1214 /* try to insert block into found extent and return */
1201 if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { 1215 if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
1202 ext_debug("append %d block to %d:%d (from %llu)\n", 1216 ext_debug("append %d block to %d:%d (from %llu)\n",
1203 le16_to_cpu(newext->ee_len), 1217 ext4_ext_get_actual_len(newext),
1204 le32_to_cpu(ex->ee_block), 1218 le32_to_cpu(ex->ee_block),
1205 le16_to_cpu(ex->ee_len), ext_pblock(ex)); 1219 ext4_ext_get_actual_len(ex), ext_pblock(ex));
1206 err = ext4_ext_get_access(handle, inode, path + depth); 1220 err = ext4_ext_get_access(handle, inode, path + depth);
1207 if (err) 1221 if (err)
1208 return err; 1222 return err;
1209 ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len) 1223
1210 + le16_to_cpu(newext->ee_len)); 1224 /*
1225 * ext4_can_extents_be_merged should have checked that either
1226 * both extents are uninitialized, or both aren't. Thus we
1227 * need to check only one of them here.
1228 */
1229 if (ext4_ext_is_uninitialized(ex))
1230 uninitialized = 1;
1231 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1232 + ext4_ext_get_actual_len(newext));
1233 if (uninitialized)
1234 ext4_ext_mark_uninitialized(ex);
1211 eh = path[depth].p_hdr; 1235 eh = path[depth].p_hdr;
1212 nearex = ex; 1236 nearex = ex;
1213 goto merge; 1237 goto merge;
@@ -1263,7 +1287,7 @@ has_space:
1263 ext_debug("first extent in the leaf: %d:%llu:%d\n", 1287 ext_debug("first extent in the leaf: %d:%llu:%d\n",
1264 le32_to_cpu(newext->ee_block), 1288 le32_to_cpu(newext->ee_block),
1265 ext_pblock(newext), 1289 ext_pblock(newext),
1266 le16_to_cpu(newext->ee_len)); 1290 ext4_ext_get_actual_len(newext));
1267 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1291 path[depth].p_ext = EXT_FIRST_EXTENT(eh);
1268 } else if (le32_to_cpu(newext->ee_block) 1292 } else if (le32_to_cpu(newext->ee_block)
1269 > le32_to_cpu(nearex->ee_block)) { 1293 > le32_to_cpu(nearex->ee_block)) {
@@ -1276,7 +1300,7 @@ has_space:
1276 "move %d from 0x%p to 0x%p\n", 1300 "move %d from 0x%p to 0x%p\n",
1277 le32_to_cpu(newext->ee_block), 1301 le32_to_cpu(newext->ee_block),
1278 ext_pblock(newext), 1302 ext_pblock(newext),
1279 le16_to_cpu(newext->ee_len), 1303 ext4_ext_get_actual_len(newext),
1280 nearex, len, nearex + 1, nearex + 2); 1304 nearex, len, nearex + 1, nearex + 2);
1281 memmove(nearex + 2, nearex + 1, len); 1305 memmove(nearex + 2, nearex + 1, len);
1282 } 1306 }
@@ -1289,7 +1313,7 @@ has_space:
1289 "move %d from 0x%p to 0x%p\n", 1313 "move %d from 0x%p to 0x%p\n",
1290 le32_to_cpu(newext->ee_block), 1314 le32_to_cpu(newext->ee_block),
1291 ext_pblock(newext), 1315 ext_pblock(newext),
1292 le16_to_cpu(newext->ee_len), 1316 ext4_ext_get_actual_len(newext),
1293 nearex, len, nearex + 1, nearex + 2); 1317 nearex, len, nearex + 1, nearex + 2);
1294 memmove(nearex + 1, nearex, len); 1318 memmove(nearex + 1, nearex, len);
1295 path[depth].p_ext = nearex; 1319 path[depth].p_ext = nearex;
@@ -1308,8 +1332,13 @@ merge:
1308 if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) 1332 if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1))
1309 break; 1333 break;
1310 /* merge with next extent! */ 1334 /* merge with next extent! */
1311 nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len) 1335 if (ext4_ext_is_uninitialized(nearex))
1312 + le16_to_cpu(nearex[1].ee_len)); 1336 uninitialized = 1;
1337 nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex)
1338 + ext4_ext_get_actual_len(nearex + 1));
1339 if (uninitialized)
1340 ext4_ext_mark_uninitialized(nearex);
1341
1313 if (nearex + 1 < EXT_LAST_EXTENT(eh)) { 1342 if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
1314 len = (EXT_LAST_EXTENT(eh) - nearex - 1) 1343 len = (EXT_LAST_EXTENT(eh) - nearex - 1)
1315 * sizeof(struct ext4_extent); 1344 * sizeof(struct ext4_extent);
@@ -1379,8 +1408,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
1379 end = le32_to_cpu(ex->ee_block); 1408 end = le32_to_cpu(ex->ee_block);
1380 if (block + num < end) 1409 if (block + num < end)
1381 end = block + num; 1410 end = block + num;
1382 } else if (block >= 1411 } else if (block >= le32_to_cpu(ex->ee_block)
1383 le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) { 1412 + ext4_ext_get_actual_len(ex)) {
1384 /* need to allocate space after found extent */ 1413 /* need to allocate space after found extent */
1385 start = block; 1414 start = block;
1386 end = block + num; 1415 end = block + num;
@@ -1392,7 +1421,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
1392 * by found extent 1421 * by found extent
1393 */ 1422 */
1394 start = block; 1423 start = block;
1395 end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len); 1424 end = le32_to_cpu(ex->ee_block)
1425 + ext4_ext_get_actual_len(ex);
1396 if (block + num < end) 1426 if (block + num < end)
1397 end = block + num; 1427 end = block + num;
1398 exists = 1; 1428 exists = 1;
@@ -1408,7 +1438,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
1408 cbex.ec_type = EXT4_EXT_CACHE_GAP; 1438 cbex.ec_type = EXT4_EXT_CACHE_GAP;
1409 } else { 1439 } else {
1410 cbex.ec_block = le32_to_cpu(ex->ee_block); 1440 cbex.ec_block = le32_to_cpu(ex->ee_block);
1411 cbex.ec_len = le16_to_cpu(ex->ee_len); 1441 cbex.ec_len = ext4_ext_get_actual_len(ex);
1412 cbex.ec_start = ext_pblock(ex); 1442 cbex.ec_start = ext_pblock(ex);
1413 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1443 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1414 } 1444 }
@@ -1481,15 +1511,15 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
1481 ext_debug("cache gap(before): %lu [%lu:%lu]", 1511 ext_debug("cache gap(before): %lu [%lu:%lu]",
1482 (unsigned long) block, 1512 (unsigned long) block,
1483 (unsigned long) le32_to_cpu(ex->ee_block), 1513 (unsigned long) le32_to_cpu(ex->ee_block),
1484 (unsigned long) le16_to_cpu(ex->ee_len)); 1514 (unsigned long) ext4_ext_get_actual_len(ex));
1485 } else if (block >= le32_to_cpu(ex->ee_block) 1515 } else if (block >= le32_to_cpu(ex->ee_block)
1486 + le16_to_cpu(ex->ee_len)) { 1516 + ext4_ext_get_actual_len(ex)) {
1487 lblock = le32_to_cpu(ex->ee_block) 1517 lblock = le32_to_cpu(ex->ee_block)
1488 + le16_to_cpu(ex->ee_len); 1518 + ext4_ext_get_actual_len(ex);
1489 len = ext4_ext_next_allocated_block(path); 1519 len = ext4_ext_next_allocated_block(path);
1490 ext_debug("cache gap(after): [%lu:%lu] %lu", 1520 ext_debug("cache gap(after): [%lu:%lu] %lu",
1491 (unsigned long) le32_to_cpu(ex->ee_block), 1521 (unsigned long) le32_to_cpu(ex->ee_block),
1492 (unsigned long) le16_to_cpu(ex->ee_len), 1522 (unsigned long) ext4_ext_get_actual_len(ex),
1493 (unsigned long) block); 1523 (unsigned long) block);
1494 BUG_ON(len == lblock); 1524 BUG_ON(len == lblock);
1495 len = len - lblock; 1525 len = len - lblock;
@@ -1619,12 +1649,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
1619 unsigned long from, unsigned long to) 1649 unsigned long from, unsigned long to)
1620{ 1650{
1621 struct buffer_head *bh; 1651 struct buffer_head *bh;
1652 unsigned short ee_len = ext4_ext_get_actual_len(ex);
1622 int i; 1653 int i;
1623 1654
1624#ifdef EXTENTS_STATS 1655#ifdef EXTENTS_STATS
1625 { 1656 {
1626 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1657 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1627 unsigned short ee_len = le16_to_cpu(ex->ee_len);
1628 spin_lock(&sbi->s_ext_stats_lock); 1658 spin_lock(&sbi->s_ext_stats_lock);
1629 sbi->s_ext_blocks += ee_len; 1659 sbi->s_ext_blocks += ee_len;
1630 sbi->s_ext_extents++; 1660 sbi->s_ext_extents++;
@@ -1638,12 +1668,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
1638 } 1668 }
1639#endif 1669#endif
1640 if (from >= le32_to_cpu(ex->ee_block) 1670 if (from >= le32_to_cpu(ex->ee_block)
1641 && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { 1671 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
1642 /* tail removal */ 1672 /* tail removal */
1643 unsigned long num; 1673 unsigned long num;
1644 ext4_fsblk_t start; 1674 ext4_fsblk_t start;
1645 num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from; 1675 num = le32_to_cpu(ex->ee_block) + ee_len - from;
1646 start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num; 1676 start = ext_pblock(ex) + ee_len - num;
1647 ext_debug("free last %lu blocks starting %llu\n", num, start); 1677 ext_debug("free last %lu blocks starting %llu\n", num, start);
1648 for (i = 0; i < num; i++) { 1678 for (i = 0; i < num; i++) {
1649 bh = sb_find_get_block(inode->i_sb, start + i); 1679 bh = sb_find_get_block(inode->i_sb, start + i);
@@ -1651,12 +1681,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
1651 } 1681 }
1652 ext4_free_blocks(handle, inode, start, num); 1682 ext4_free_blocks(handle, inode, start, num);
1653 } else if (from == le32_to_cpu(ex->ee_block) 1683 } else if (from == le32_to_cpu(ex->ee_block)
1654 && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { 1684 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
1655 printk("strange request: removal %lu-%lu from %u:%u\n", 1685 printk("strange request: removal %lu-%lu from %u:%u\n",
1656 from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); 1686 from, to, le32_to_cpu(ex->ee_block), ee_len);
1657 } else { 1687 } else {
1658 printk("strange request: removal(2) %lu-%lu from %u:%u\n", 1688 printk("strange request: removal(2) %lu-%lu from %u:%u\n",
1659 from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); 1689 from, to, le32_to_cpu(ex->ee_block), ee_len);
1660 } 1690 }
1661 return 0; 1691 return 0;
1662} 1692}
@@ -1671,6 +1701,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1671 unsigned a, b, block, num; 1701 unsigned a, b, block, num;
1672 unsigned long ex_ee_block; 1702 unsigned long ex_ee_block;
1673 unsigned short ex_ee_len; 1703 unsigned short ex_ee_len;
1704 unsigned uninitialized = 0;
1674 struct ext4_extent *ex; 1705 struct ext4_extent *ex;
1675 1706
1676 ext_debug("truncate since %lu in leaf\n", start); 1707 ext_debug("truncate since %lu in leaf\n", start);
@@ -1685,7 +1716,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1685 ex = EXT_LAST_EXTENT(eh); 1716 ex = EXT_LAST_EXTENT(eh);
1686 1717
1687 ex_ee_block = le32_to_cpu(ex->ee_block); 1718 ex_ee_block = le32_to_cpu(ex->ee_block);
1688 ex_ee_len = le16_to_cpu(ex->ee_len); 1719 if (ext4_ext_is_uninitialized(ex))
1720 uninitialized = 1;
1721 ex_ee_len = ext4_ext_get_actual_len(ex);
1689 1722
1690 while (ex >= EXT_FIRST_EXTENT(eh) && 1723 while (ex >= EXT_FIRST_EXTENT(eh) &&
1691 ex_ee_block + ex_ee_len > start) { 1724 ex_ee_block + ex_ee_len > start) {
@@ -1753,6 +1786,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1753 1786
1754 ex->ee_block = cpu_to_le32(block); 1787 ex->ee_block = cpu_to_le32(block);
1755 ex->ee_len = cpu_to_le16(num); 1788 ex->ee_len = cpu_to_le16(num);
1789 if (uninitialized)
1790 ext4_ext_mark_uninitialized(ex);
1756 1791
1757 err = ext4_ext_dirty(handle, inode, path + depth); 1792 err = ext4_ext_dirty(handle, inode, path + depth);
1758 if (err) 1793 if (err)
@@ -1762,7 +1797,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1762 ext_pblock(ex)); 1797 ext_pblock(ex));
1763 ex--; 1798 ex--;
1764 ex_ee_block = le32_to_cpu(ex->ee_block); 1799 ex_ee_block = le32_to_cpu(ex->ee_block);
1765 ex_ee_len = le16_to_cpu(ex->ee_len); 1800 ex_ee_len = ext4_ext_get_actual_len(ex);
1766 } 1801 }
1767 1802
1768 if (correct_index && eh->eh_entries) 1803 if (correct_index && eh->eh_entries)
@@ -2038,7 +2073,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2038 if (ex) { 2073 if (ex) {
2039 unsigned long ee_block = le32_to_cpu(ex->ee_block); 2074 unsigned long ee_block = le32_to_cpu(ex->ee_block);
2040 ext4_fsblk_t ee_start = ext_pblock(ex); 2075 ext4_fsblk_t ee_start = ext_pblock(ex);
2041 unsigned short ee_len = le16_to_cpu(ex->ee_len); 2076 unsigned short ee_len;
2042 2077
2043 /* 2078 /*
2044 * Allow future support for preallocated extents to be added 2079 * Allow future support for preallocated extents to be added
@@ -2046,8 +2081,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2046 * Uninitialized extents are treated as holes, except that 2081 * Uninitialized extents are treated as holes, except that
2047 * we avoid (fail) allocating new blocks during a write. 2082 * we avoid (fail) allocating new blocks during a write.
2048 */ 2083 */
2049 if (ee_len > EXT_MAX_LEN) 2084 if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN)
2050 goto out2; 2085 goto out2;
2086 ee_len = ext4_ext_get_actual_len(ex);
2051 /* if found extent covers block, simply return it */ 2087 /* if found extent covers block, simply return it */
2052 if (iblock >= ee_block && iblock < ee_block + ee_len) { 2088 if (iblock >= ee_block && iblock < ee_block + ee_len) {
2053 newblock = iblock - ee_block + ee_start; 2089 newblock = iblock - ee_block + ee_start;
@@ -2055,8 +2091,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2055 allocated = ee_len - (iblock - ee_block); 2091 allocated = ee_len - (iblock - ee_block);
2056 ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, 2092 ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
2057 ee_block, ee_len, newblock); 2093 ee_block, ee_len, newblock);
2058 ext4_ext_put_in_cache(inode, ee_block, ee_len, 2094 /* Do not put uninitialized extent in the cache */
2059 ee_start, EXT4_EXT_CACHE_EXTENT); 2095 if (!ext4_ext_is_uninitialized(ex))
2096 ext4_ext_put_in_cache(inode, ee_block,
2097 ee_len, ee_start,
2098 EXT4_EXT_CACHE_EXTENT);
2060 goto out; 2099 goto out;
2061 } 2100 }
2062 } 2101 }
@@ -2098,6 +2137,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2098 /* try to insert new extent into found leaf and return */ 2137 /* try to insert new extent into found leaf and return */
2099 ext4_ext_store_pblock(&newex, newblock); 2138 ext4_ext_store_pblock(&newex, newblock);
2100 newex.ee_len = cpu_to_le16(allocated); 2139 newex.ee_len = cpu_to_le16(allocated);
2140 if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
2141 ext4_ext_mark_uninitialized(&newex);
2101 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2142 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2102 if (err) { 2143 if (err) {
2103 /* free data blocks we just allocated */ 2144 /* free data blocks we just allocated */
@@ -2113,8 +2154,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2113 newblock = ext_pblock(&newex); 2154 newblock = ext_pblock(&newex);
2114 __set_bit(BH_New, &bh_result->b_state); 2155 __set_bit(BH_New, &bh_result->b_state);
2115 2156
2116 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 2157 /* Cache only when it is _not_ an uninitialized extent */
2117 EXT4_EXT_CACHE_EXTENT); 2158 if (create != EXT4_CREATE_UNINITIALIZED_EXT)
2159 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
2160 EXT4_EXT_CACHE_EXTENT);
2118out: 2161out:
2119 if (allocated > max_blocks) 2162 if (allocated > max_blocks)
2120 allocated = max_blocks; 2163 allocated = max_blocks;
@@ -2217,3 +2260,127 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2217 2260
2218 return needed; 2261 return needed;
2219} 2262}
2263
2264/*
2265 * preallocate space for a file. This implements ext4's fallocate inode
2266 * operation, which gets called from sys_fallocate system call.
2267 * For block-mapped files, posix_fallocate should fall back to the method
2268 * of writing zeroes to the required new blocks (the same behavior which is
2269 * expected for file systems which do not support fallocate() system call).
2270 */
2271long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2272{
2273 handle_t *handle;
2274 ext4_fsblk_t block, max_blocks;
2275 ext4_fsblk_t nblocks = 0;
2276 int ret = 0;
2277 int ret2 = 0;
2278 int retries = 0;
2279 struct buffer_head map_bh;
2280 unsigned int credits, blkbits = inode->i_blkbits;
2281
2282 /*
2283 * currently supporting (pre)allocate mode for extent-based
2284 * files _only_
2285 */
2286 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
2287 return -EOPNOTSUPP;
2288
2289 /* preallocation to directories is currently not supported */
2290 if (S_ISDIR(inode->i_mode))
2291 return -ENODEV;
2292
2293 block = offset >> blkbits;
2294 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
2295 - block;
2296
2297 /*
2298 * credits to insert 1 extent into extent tree + buffers to be able to
2299 * modify 1 super block, 1 block bitmap and 1 group descriptor.
2300 */
2301 credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
2302retry:
2303 while (ret >= 0 && ret < max_blocks) {
2304 block = block + ret;
2305 max_blocks = max_blocks - ret;
2306 handle = ext4_journal_start(inode, credits);
2307 if (IS_ERR(handle)) {
2308 ret = PTR_ERR(handle);
2309 break;
2310 }
2311
2312 ret = ext4_ext_get_blocks(handle, inode, block,
2313 max_blocks, &map_bh,
2314 EXT4_CREATE_UNINITIALIZED_EXT, 0);
2315 WARN_ON(!ret);
2316 if (!ret) {
2317 ext4_error(inode->i_sb, "ext4_fallocate",
2318 "ext4_ext_get_blocks returned 0! inode#%lu"
2319 ", block=%llu, max_blocks=%llu",
2320 inode->i_ino, block, max_blocks);
2321 ret = -EIO;
2322 ext4_mark_inode_dirty(handle, inode);
2323 ret2 = ext4_journal_stop(handle);
2324 break;
2325 }
2326 if (ret > 0) {
2327 /* check wrap through sign-bit/zero here */
2328 if ((block + ret) < 0 || (block + ret) < block) {
2329 ret = -EIO;
2330 ext4_mark_inode_dirty(handle, inode);
2331 ret2 = ext4_journal_stop(handle);
2332 break;
2333 }
2334 if (buffer_new(&map_bh) && ((block + ret) >
2335 (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
2336 >> blkbits)))
2337 nblocks = nblocks + ret;
2338 }
2339
2340 /* Update ctime if new blocks get allocated */
2341 if (nblocks) {
2342 struct timespec now;
2343
2344 now = current_fs_time(inode->i_sb);
2345 if (!timespec_equal(&inode->i_ctime, &now))
2346 inode->i_ctime = now;
2347 }
2348
2349 ext4_mark_inode_dirty(handle, inode);
2350 ret2 = ext4_journal_stop(handle);
2351 if (ret2)
2352 break;
2353 }
2354
2355 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2356 goto retry;
2357
2358 /*
2359 * Time to update the file size.
2360 * Update only when preallocation was requested beyond the file size.
2361 */
2362 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2363 (offset + len) > i_size_read(inode)) {
2364 if (ret > 0) {
2365 /*
2366 * if no error, we assume preallocation succeeded
2367 * completely
2368 */
2369 mutex_lock(&inode->i_mutex);
2370 i_size_write(inode, offset + len);
2371 EXT4_I(inode)->i_disksize = i_size_read(inode);
2372 mutex_unlock(&inode->i_mutex);
2373 } else if (ret < 0 && nblocks) {
2374 /* Handle partial allocation scenario */
2375 loff_t newsize;
2376
2377 mutex_lock(&inode->i_mutex);
2378 newsize = (nblocks << blkbits) + i_size_read(inode);
2379 i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
2380 EXT4_I(inode)->i_disksize = i_size_read(inode);
2381 mutex_unlock(&inode->i_mutex);
2382 }
2383 }
2384
2385 return ret > 0 ? ret2 : ret;
2386}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d4c8186aed64..1a81cd66d63b 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -134,5 +134,6 @@ const struct inode_operations ext4_file_inode_operations = {
134 .removexattr = generic_removexattr, 134 .removexattr = generic_removexattr,
135#endif 135#endif
136 .permission = ext4_permission, 136 .permission = ext4_permission,
137 .fallocate = ext4_fallocate,
137}; 138};
138 139
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index de1f9f78625a..87c2d7a05b01 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -102,6 +102,7 @@
102 EXT4_GOOD_OLD_FIRST_INO : \ 102 EXT4_GOOD_OLD_FIRST_INO : \
103 (s)->s_first_ino) 103 (s)->s_first_ino)
104#endif 104#endif
105#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
105 106
106/* 107/*
107 * Macro-instructions used to manage fragments 108 * Macro-instructions used to manage fragments
@@ -225,6 +226,11 @@ struct ext4_new_group_data {
225 __u32 free_blocks_count; 226 __u32 free_blocks_count;
226}; 227};
227 228
229/*
230 * Following is used by preallocation code to tell get_blocks() that we
231 * want uninitialzed extents.
232 */
233#define EXT4_CREATE_UNINITIALIZED_EXT 2
228 234
229/* 235/*
230 * ioctl commands 236 * ioctl commands
@@ -983,6 +989,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
983extern void ext4_ext_truncate(struct inode *, struct page *); 989extern void ext4_ext_truncate(struct inode *, struct page *);
984extern void ext4_ext_init(struct super_block *); 990extern void ext4_ext_init(struct super_block *);
985extern void ext4_ext_release(struct super_block *); 991extern void ext4_ext_release(struct super_block *);
992extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
993 loff_t len);
986static inline int 994static inline int
987ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 995ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
988 unsigned long max_blocks, struct buffer_head *bh, 996 unsigned long max_blocks, struct buffer_head *bh,
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index acfe59740b03..e3d5afc6f23e 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -188,6 +188,21 @@ ext4_ext_invalidate_cache(struct inode *inode)
188 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; 188 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
189} 189}
190 190
191static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
192{
193 ext->ee_len |= cpu_to_le16(0x8000);
194}
195
196static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
197{
198 return (int)(le16_to_cpu((ext)->ee_len) & 0x8000);
199}
200
201static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
202{
203 return (int)(le16_to_cpu((ext)->ee_len) & 0x7FFF);
204}
205
191extern int ext4_extent_tree_init(handle_t *, struct inode *); 206extern int ext4_extent_tree_init(handle_t *, struct inode *);
192extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); 207extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
193extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); 208extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);