aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c351
1 files changed, 273 insertions, 78 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e4..236b834b4ca8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -195,8 +195,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
195 if (S_ISREG(inode->i_mode)) 195 if (S_ISREG(inode->i_mode))
196 block_group++; 196 block_group++;
197 } 197 }
198 bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + 198 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
199 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
200 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 199 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
201 200
202 /* 201 /*
@@ -296,29 +295,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 295 * to allocate @blocks
297 * Worse case is one block per extent 296 * Worse case is one block per extent
298 */ 297 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 298int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 299{
301 int lcap, icap, rcap, leafs, idxs, num; 300 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 301 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 302
308 /* number of new leaf blocks needed */ 303 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 304 / sizeof(struct ext4_extent_idx));
310 305
311 /* 306 /*
312 * Worse case, we need separate index block(s) 307 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 308 * previous da block, it can share index blocks with the
309 * previous block, so we only need to allocate a new index
310 * block every idxs leaf blocks. At ldxs**2 blocks, we need
311 * an additional index block, and at ldxs**3 blocks, yet
312 * another index blocks.
314 */ 313 */
315 idxs = (leafs + icap - 1) / icap; 314 if (ei->i_da_metadata_calc_len &&
316 do { 315 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 316 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 317 num++;
319 } while (idxs > rcap); 318 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
319 num++;
320 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
321 num++;
322 ei->i_da_metadata_calc_len = 0;
323 } else
324 ei->i_da_metadata_calc_len++;
325 ei->i_da_metadata_calc_last_lblock++;
326 return num;
327 }
320 328
321 return num; 329 /*
330 * In the worst case we need a new set of index blocks at
331 * every level of the inode's extent tree.
332 */
333 ei->i_da_metadata_calc_len = 1;
334 ei->i_da_metadata_calc_last_lblock = lblock;
335 return ext_depth(inode) + 1;
322} 336}
323 337
324static int 338static int
@@ -425,7 +439,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
425 return 0; 439 return 0;
426 440
427corrupted: 441corrupted:
428 ext4_error(inode->i_sb, function, 442 __ext4_error(inode->i_sb, function,
429 "bad header/extent in inode #%lu: %s - magic %x, " 443 "bad header/extent in inode #%lu: %s - magic %x, "
430 "entries %u, max %u(%u), depth %u(%u)", 444 "entries %u, max %u(%u), depth %u(%u)",
431 inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), 445 inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
@@ -688,7 +702,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
688 } 702 }
689 eh = ext_block_hdr(bh); 703 eh = ext_block_hdr(bh);
690 ppos++; 704 ppos++;
691 BUG_ON(ppos > depth); 705 if (unlikely(ppos > depth)) {
706 put_bh(bh);
707 EXT4_ERROR_INODE(inode,
708 "ppos %d > depth %d", ppos, depth);
709 goto err;
710 }
692 path[ppos].p_bh = bh; 711 path[ppos].p_bh = bh;
693 path[ppos].p_hdr = eh; 712 path[ppos].p_hdr = eh;
694 i--; 713 i--;
@@ -734,7 +753,12 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
734 if (err) 753 if (err)
735 return err; 754 return err;
736 755
737 BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); 756 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
757 EXT4_ERROR_INODE(inode,
758 "logical %d == ei_block %d!",
759 logical, le32_to_cpu(curp->p_idx->ei_block));
760 return -EIO;
761 }
738 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; 762 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
739 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 763 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
740 /* insert after */ 764 /* insert after */
@@ -764,9 +788,17 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
764 ext4_idx_store_pblock(ix, ptr); 788 ext4_idx_store_pblock(ix, ptr);
765 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 789 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
766 790
767 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 791 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
768 > le16_to_cpu(curp->p_hdr->eh_max)); 792 > le16_to_cpu(curp->p_hdr->eh_max))) {
769 BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); 793 EXT4_ERROR_INODE(inode,
794 "logical %d == ei_block %d!",
795 logical, le32_to_cpu(curp->p_idx->ei_block));
796 return -EIO;
797 }
798 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
799 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
800 return -EIO;
801 }
770 802
771 err = ext4_ext_dirty(handle, inode, curp); 803 err = ext4_ext_dirty(handle, inode, curp);
772 ext4_std_error(inode->i_sb, err); 804 ext4_std_error(inode->i_sb, err);
@@ -804,7 +836,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
804 836
805 /* if current leaf will be split, then we should use 837 /* if current leaf will be split, then we should use
806 * border from split point */ 838 * border from split point */
807 BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); 839 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
840 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
841 return -EIO;
842 }
808 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { 843 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
809 border = path[depth].p_ext[1].ee_block; 844 border = path[depth].p_ext[1].ee_block;
810 ext_debug("leaf will be split." 845 ext_debug("leaf will be split."
@@ -845,7 +880,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
845 880
846 /* initialize new leaf */ 881 /* initialize new leaf */
847 newblock = ablocks[--a]; 882 newblock = ablocks[--a];
848 BUG_ON(newblock == 0); 883 if (unlikely(newblock == 0)) {
884 EXT4_ERROR_INODE(inode, "newblock == 0!");
885 err = -EIO;
886 goto cleanup;
887 }
849 bh = sb_getblk(inode->i_sb, newblock); 888 bh = sb_getblk(inode->i_sb, newblock);
850 if (!bh) { 889 if (!bh) {
851 err = -EIO; 890 err = -EIO;
@@ -865,7 +904,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
865 ex = EXT_FIRST_EXTENT(neh); 904 ex = EXT_FIRST_EXTENT(neh);
866 905
867 /* move remainder of path[depth] to the new leaf */ 906 /* move remainder of path[depth] to the new leaf */
868 BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); 907 if (unlikely(path[depth].p_hdr->eh_entries !=
908 path[depth].p_hdr->eh_max)) {
909 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
910 path[depth].p_hdr->eh_entries,
911 path[depth].p_hdr->eh_max);
912 err = -EIO;
913 goto cleanup;
914 }
869 /* start copy from next extent */ 915 /* start copy from next extent */
870 /* TODO: we could do it by single memmove */ 916 /* TODO: we could do it by single memmove */
871 m = 0; 917 m = 0;
@@ -912,7 +958,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
912 958
913 /* create intermediate indexes */ 959 /* create intermediate indexes */
914 k = depth - at - 1; 960 k = depth - at - 1;
915 BUG_ON(k < 0); 961 if (unlikely(k < 0)) {
962 EXT4_ERROR_INODE(inode, "k %d < 0!", k);
963 err = -EIO;
964 goto cleanup;
965 }
916 if (k) 966 if (k)
917 ext_debug("create %d intermediate indices\n", k); 967 ext_debug("create %d intermediate indices\n", k);
918 /* insert new index into current index block */ 968 /* insert new index into current index block */
@@ -949,8 +999,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
949 999
950 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 1000 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
951 EXT_MAX_INDEX(path[i].p_hdr)); 1001 EXT_MAX_INDEX(path[i].p_hdr));
952 BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != 1002 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
953 EXT_LAST_INDEX(path[i].p_hdr)); 1003 EXT_LAST_INDEX(path[i].p_hdr))) {
1004 EXT4_ERROR_INODE(inode,
1005 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
1006 le32_to_cpu(path[i].p_ext->ee_block));
1007 err = -EIO;
1008 goto cleanup;
1009 }
954 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 1010 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
955 ext_debug("%d: move %d:%llu in new index %llu\n", i, 1011 ext_debug("%d: move %d:%llu in new index %llu\n", i,
956 le32_to_cpu(path[i].p_idx->ei_block), 1012 le32_to_cpu(path[i].p_idx->ei_block),
@@ -1188,7 +1244,10 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1188 struct ext4_extent *ex; 1244 struct ext4_extent *ex;
1189 int depth, ee_len; 1245 int depth, ee_len;
1190 1246
1191 BUG_ON(path == NULL); 1247 if (unlikely(path == NULL)) {
1248 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1249 return -EIO;
1250 }
1192 depth = path->p_depth; 1251 depth = path->p_depth;
1193 *phys = 0; 1252 *phys = 0;
1194 1253
@@ -1202,15 +1261,33 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1202 ex = path[depth].p_ext; 1261 ex = path[depth].p_ext;
1203 ee_len = ext4_ext_get_actual_len(ex); 1262 ee_len = ext4_ext_get_actual_len(ex);
1204 if (*logical < le32_to_cpu(ex->ee_block)) { 1263 if (*logical < le32_to_cpu(ex->ee_block)) {
1205 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); 1264 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1265 EXT4_ERROR_INODE(inode,
1266 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1267 *logical, le32_to_cpu(ex->ee_block));
1268 return -EIO;
1269 }
1206 while (--depth >= 0) { 1270 while (--depth >= 0) {
1207 ix = path[depth].p_idx; 1271 ix = path[depth].p_idx;
1208 BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); 1272 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1273 EXT4_ERROR_INODE(inode,
1274 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1275 ix != NULL ? ix->ei_block : 0,
1276 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1277 EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
1278 depth);
1279 return -EIO;
1280 }
1209 } 1281 }
1210 return 0; 1282 return 0;
1211 } 1283 }
1212 1284
1213 BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); 1285 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1286 EXT4_ERROR_INODE(inode,
1287 "logical %d < ee_block %d + ee_len %d!",
1288 *logical, le32_to_cpu(ex->ee_block), ee_len);
1289 return -EIO;
1290 }
1214 1291
1215 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1292 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1216 *phys = ext_pblock(ex) + ee_len - 1; 1293 *phys = ext_pblock(ex) + ee_len - 1;
@@ -1236,7 +1313,10 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1236 int depth; /* Note, NOT eh_depth; depth from top of tree */ 1313 int depth; /* Note, NOT eh_depth; depth from top of tree */
1237 int ee_len; 1314 int ee_len;
1238 1315
1239 BUG_ON(path == NULL); 1316 if (unlikely(path == NULL)) {
1317 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1318 return -EIO;
1319 }
1240 depth = path->p_depth; 1320 depth = path->p_depth;
1241 *phys = 0; 1321 *phys = 0;
1242 1322
@@ -1250,17 +1330,32 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1250 ex = path[depth].p_ext; 1330 ex = path[depth].p_ext;
1251 ee_len = ext4_ext_get_actual_len(ex); 1331 ee_len = ext4_ext_get_actual_len(ex);
1252 if (*logical < le32_to_cpu(ex->ee_block)) { 1332 if (*logical < le32_to_cpu(ex->ee_block)) {
1253 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); 1333 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1334 EXT4_ERROR_INODE(inode,
1335 "first_extent(path[%d].p_hdr) != ex",
1336 depth);
1337 return -EIO;
1338 }
1254 while (--depth >= 0) { 1339 while (--depth >= 0) {
1255 ix = path[depth].p_idx; 1340 ix = path[depth].p_idx;
1256 BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); 1341 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1342 EXT4_ERROR_INODE(inode,
1343 "ix != EXT_FIRST_INDEX *logical %d!",
1344 *logical);
1345 return -EIO;
1346 }
1257 } 1347 }
1258 *logical = le32_to_cpu(ex->ee_block); 1348 *logical = le32_to_cpu(ex->ee_block);
1259 *phys = ext_pblock(ex); 1349 *phys = ext_pblock(ex);
1260 return 0; 1350 return 0;
1261 } 1351 }
1262 1352
1263 BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); 1353 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1354 EXT4_ERROR_INODE(inode,
1355 "logical %d < ee_block %d + ee_len %d!",
1356 *logical, le32_to_cpu(ex->ee_block), ee_len);
1357 return -EIO;
1358 }
1264 1359
1265 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1360 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1266 /* next allocated block in this leaf */ 1361 /* next allocated block in this leaf */
@@ -1399,8 +1494,12 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1399 1494
1400 eh = path[depth].p_hdr; 1495 eh = path[depth].p_hdr;
1401 ex = path[depth].p_ext; 1496 ex = path[depth].p_ext;
1402 BUG_ON(ex == NULL); 1497
1403 BUG_ON(eh == NULL); 1498 if (unlikely(ex == NULL || eh == NULL)) {
1499 EXT4_ERROR_INODE(inode,
1500 "ex %p == NULL or eh %p == NULL", ex, eh);
1501 return -EIO;
1502 }
1404 1503
1405 if (depth == 0) { 1504 if (depth == 0) {
1406 /* there is no tree at all */ 1505 /* there is no tree at all */
@@ -1523,8 +1622,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
1523 merge_done = 1; 1622 merge_done = 1;
1524 WARN_ON(eh->eh_entries == 0); 1623 WARN_ON(eh->eh_entries == 0);
1525 if (!eh->eh_entries) 1624 if (!eh->eh_entries)
1526 ext4_error(inode->i_sb, "ext4_ext_try_to_merge", 1625 ext4_error(inode->i_sb,
1527 "inode#%lu, eh->eh_entries = 0!", inode->i_ino); 1626 "inode#%lu, eh->eh_entries = 0!",
1627 inode->i_ino);
1528 } 1628 }
1529 1629
1530 return merge_done; 1630 return merge_done;
@@ -1597,13 +1697,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1597 ext4_lblk_t next; 1697 ext4_lblk_t next;
1598 unsigned uninitialized = 0; 1698 unsigned uninitialized = 0;
1599 1699
1600 BUG_ON(ext4_ext_get_actual_len(newext) == 0); 1700 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1701 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1702 return -EIO;
1703 }
1601 depth = ext_depth(inode); 1704 depth = ext_depth(inode);
1602 ex = path[depth].p_ext; 1705 ex = path[depth].p_ext;
1603 BUG_ON(path[depth].p_hdr == NULL); 1706 if (unlikely(path[depth].p_hdr == NULL)) {
1707 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1708 return -EIO;
1709 }
1604 1710
1605 /* try to insert block into found extent and return */ 1711 /* try to insert block into found extent and return */
1606 if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) 1712 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
1607 && ext4_can_extents_be_merged(inode, ex, newext)) { 1713 && ext4_can_extents_be_merged(inode, ex, newext)) {
1608 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1714 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
1609 ext4_ext_is_uninitialized(newext), 1715 ext4_ext_is_uninitialized(newext),
@@ -1724,7 +1830,7 @@ has_space:
1724 1830
1725merge: 1831merge:
1726 /* try to merge extents to the right */ 1832 /* try to merge extents to the right */
1727 if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) 1833 if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
1728 ext4_ext_try_to_merge(inode, path, nearex); 1834 ext4_ext_try_to_merge(inode, path, nearex);
1729 1835
1730 /* try to merge extents to the left */ 1836 /* try to merge extents to the left */
@@ -1772,7 +1878,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1772 } 1878 }
1773 1879
1774 depth = ext_depth(inode); 1880 depth = ext_depth(inode);
1775 BUG_ON(path[depth].p_hdr == NULL); 1881 if (unlikely(path[depth].p_hdr == NULL)) {
1882 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1883 err = -EIO;
1884 break;
1885 }
1776 ex = path[depth].p_ext; 1886 ex = path[depth].p_ext;
1777 next = ext4_ext_next_allocated_block(path); 1887 next = ext4_ext_next_allocated_block(path);
1778 1888
@@ -1823,7 +1933,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1823 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1933 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1824 } 1934 }
1825 1935
1826 BUG_ON(cbex.ec_len == 0); 1936 if (unlikely(cbex.ec_len == 0)) {
1937 EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
1938 err = -EIO;
1939 break;
1940 }
1827 err = func(inode, path, &cbex, ex, cbdata); 1941 err = func(inode, path, &cbex, ex, cbdata);
1828 ext4_ext_drop_refs(path); 1942 ext4_ext_drop_refs(path);
1829 1943
@@ -1937,7 +2051,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1937 2051
1938 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && 2052 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
1939 cex->ec_type != EXT4_EXT_CACHE_EXTENT); 2053 cex->ec_type != EXT4_EXT_CACHE_EXTENT);
1940 if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { 2054 if (in_range(block, cex->ec_block, cex->ec_len)) {
1941 ex->ee_block = cpu_to_le32(cex->ec_block); 2055 ex->ee_block = cpu_to_le32(cex->ec_block);
1942 ext4_ext_store_pblock(ex, cex->ec_start); 2056 ext4_ext_store_pblock(ex, cex->ec_start);
1943 ex->ee_len = cpu_to_le16(cex->ec_len); 2057 ex->ee_len = cpu_to_le16(cex->ec_len);
@@ -1966,7 +2080,10 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1966 /* free index block */ 2080 /* free index block */
1967 path--; 2081 path--;
1968 leaf = idx_pblock(path->p_idx); 2082 leaf = idx_pblock(path->p_idx);
1969 BUG_ON(path->p_hdr->eh_entries == 0); 2083 if (unlikely(path->p_hdr->eh_entries == 0)) {
2084 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2085 return -EIO;
2086 }
1970 err = ext4_ext_get_access(handle, inode, path); 2087 err = ext4_ext_get_access(handle, inode, path);
1971 if (err) 2088 if (err)
1972 return err; 2089 return err;
@@ -2104,8 +2221,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2104 if (!path[depth].p_hdr) 2221 if (!path[depth].p_hdr)
2105 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2222 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2106 eh = path[depth].p_hdr; 2223 eh = path[depth].p_hdr;
2107 BUG_ON(eh == NULL); 2224 if (unlikely(path[depth].p_hdr == NULL)) {
2108 2225 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2226 return -EIO;
2227 }
2109 /* find where to start removing */ 2228 /* find where to start removing */
2110 ex = EXT_LAST_EXTENT(eh); 2229 ex = EXT_LAST_EXTENT(eh);
2111 2230
@@ -2968,7 +3087,7 @@ fix_extent_len:
2968 ext4_ext_dirty(handle, inode, path + depth); 3087 ext4_ext_dirty(handle, inode, path + depth);
2969 return err; 3088 return err;
2970} 3089}
2971static int ext4_convert_unwritten_extents_dio(handle_t *handle, 3090static int ext4_convert_unwritten_extents_endio(handle_t *handle,
2972 struct inode *inode, 3091 struct inode *inode,
2973 struct ext4_ext_path *path) 3092 struct ext4_ext_path *path)
2974{ 3093{
@@ -3023,6 +3142,14 @@ out:
3023 return err; 3142 return err;
3024} 3143}
3025 3144
3145static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3146 sector_t block, int count)
3147{
3148 int i;
3149 for (i = 0; i < count; i++)
3150 unmap_underlying_metadata(bdev, block + i);
3151}
3152
3026static int 3153static int
3027ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3154ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3028 ext4_lblk_t iblock, unsigned int max_blocks, 3155 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3040,8 +3167,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3040 flags, allocated); 3167 flags, allocated);
3041 ext4_ext_show_leaf(inode, path); 3168 ext4_ext_show_leaf(inode, path);
3042 3169
3043 /* DIO get_block() before submit the IO, split the extent */ 3170 /* get_block() before submit the IO, split the extent */
3044 if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { 3171 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3045 ret = ext4_split_unwritten_extents(handle, 3172 ret = ext4_split_unwritten_extents(handle,
3046 inode, path, iblock, 3173 inode, path, iblock,
3047 max_blocks, flags); 3174 max_blocks, flags);
@@ -3051,14 +3178,16 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3051 * completed 3178 * completed
3052 */ 3179 */
3053 if (io) 3180 if (io)
3054 io->flag = DIO_AIO_UNWRITTEN; 3181 io->flag = EXT4_IO_UNWRITTEN;
3055 else 3182 else
3056 EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN; 3183 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3184 if (ext4_should_dioread_nolock(inode))
3185 set_buffer_uninit(bh_result);
3057 goto out; 3186 goto out;
3058 } 3187 }
3059 /* async DIO end_io complete, convert the filled extent to written */ 3188 /* IO end_io complete, convert the filled extent to written */
3060 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { 3189 if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
3061 ret = ext4_convert_unwritten_extents_dio(handle, inode, 3190 ret = ext4_convert_unwritten_extents_endio(handle, inode,
3062 path); 3191 path);
3063 if (ret >= 0) 3192 if (ret >= 0)
3064 ext4_update_inode_fsync_trans(handle, inode, 1); 3193 ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -3098,6 +3227,30 @@ out:
3098 } else 3227 } else
3099 allocated = ret; 3228 allocated = ret;
3100 set_buffer_new(bh_result); 3229 set_buffer_new(bh_result);
3230 /*
3231 * if we allocated more blocks than requested
3232 * we need to make sure we unmap the extra block
3233 * allocated. The actual needed block will get
3234 * unmapped later when we find the buffer_head marked
3235 * new.
3236 */
3237 if (allocated > max_blocks) {
3238 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3239 newblock + max_blocks,
3240 allocated - max_blocks);
3241 allocated = max_blocks;
3242 }
3243
3244 /*
3245 * If we have done fallocate with the offset that is already
3246 * delayed allocated, we would have block reservation
3247 * and quota reservation done in the delayed write path.
3248 * But fallocate would have already updated quota and block
3249 * count for this offset. So cancel these reservation
3250 */
3251 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3252 ext4_da_update_reserve_space(inode, allocated, 0);
3253
3101map_out: 3254map_out:
3102 set_buffer_mapped(bh_result); 3255 set_buffer_mapped(bh_result);
3103out1: 3256out1:
@@ -3138,7 +3291,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3138{ 3291{
3139 struct ext4_ext_path *path = NULL; 3292 struct ext4_ext_path *path = NULL;
3140 struct ext4_extent_header *eh; 3293 struct ext4_extent_header *eh;
3141 struct ext4_extent newex, *ex; 3294 struct ext4_extent newex, *ex, *last_ex;
3142 ext4_fsblk_t newblock; 3295 ext4_fsblk_t newblock;
3143 int err = 0, depth, ret, cache_type; 3296 int err = 0, depth, ret, cache_type;
3144 unsigned int allocated = 0; 3297 unsigned int allocated = 0;
@@ -3190,7 +3343,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3343 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3344 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3345 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3346 if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
3347 EXT4_ERROR_INODE(inode, "bad extent address "
3348 "iblock: %d, depth: %d pblock %lld",
3349 iblock, depth, path[depth].p_block);
3350 err = -EIO;
3351 goto out2;
3352 }
3194 eh = path[depth].p_hdr; 3353 eh = path[depth].p_hdr;
3195 3354
3196 ex = path[depth].p_ext; 3355 ex = path[depth].p_ext;
@@ -3205,7 +3364,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3205 */ 3364 */
3206 ee_len = ext4_ext_get_actual_len(ex); 3365 ee_len = ext4_ext_get_actual_len(ex);
3207 /* if found extent covers block, simply return it */ 3366 /* if found extent covers block, simply return it */
3208 if (iblock >= ee_block && iblock < ee_block + ee_len) { 3367 if (in_range(iblock, ee_block, ee_len)) {
3209 newblock = iblock - ee_block + ee_start; 3368 newblock = iblock - ee_block + ee_start;
3210 /* number of remaining blocks in the extent */ 3369 /* number of remaining blocks in the extent */
3211 allocated = ee_len - (iblock - ee_block); 3370 allocated = ee_len - (iblock - ee_block);
@@ -3297,21 +3456,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3297 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 3456 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
3298 ext4_ext_mark_uninitialized(&newex); 3457 ext4_ext_mark_uninitialized(&newex);
3299 /* 3458 /*
3300 * io_end structure was created for every async 3459 * io_end structure was created for every IO write to an
3301 * direct IO write to the middle of the file. 3460 * uninitialized extent. To avoid unecessary conversion,
3302 * To avoid unecessary convertion for every aio dio rewrite 3461 * here we flag the IO that really needs the conversion.
3303 * to the mid of file, here we flag the IO that is really
3304 * need the convertion.
3305 * For non asycn direct IO case, flag the inode state 3462 * For non asycn direct IO case, flag the inode state
3306 * that we need to perform convertion when IO is done. 3463 * that we need to perform convertion when IO is done.
3307 */ 3464 */
3308 if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { 3465 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3309 if (io) 3466 if (io)
3310 io->flag = DIO_AIO_UNWRITTEN; 3467 io->flag = EXT4_IO_UNWRITTEN;
3311 else 3468 else
3312 EXT4_I(inode)->i_state |= 3469 ext4_set_inode_state(inode,
3313 EXT4_STATE_DIO_UNWRITTEN;; 3470 EXT4_STATE_DIO_UNWRITTEN);
3314 } 3471 }
3472 if (ext4_should_dioread_nolock(inode))
3473 set_buffer_uninit(bh_result);
3474 }
3475
3476 if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
3477 if (unlikely(!eh->eh_entries)) {
3478 EXT4_ERROR_INODE(inode,
3479 "eh->eh_entries == 0 ee_block %d",
3480 ex->ee_block);
3481 err = -EIO;
3482 goto out2;
3483 }
3484 last_ex = EXT_LAST_EXTENT(eh);
3485 if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
3486 + ext4_ext_get_actual_len(last_ex))
3487 EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
3315 } 3488 }
3316 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3489 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3317 if (err) { 3490 if (err) {
@@ -3327,9 +3500,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3327 /* previous routine could use block we allocated */ 3500 /* previous routine could use block we allocated */
3328 newblock = ext_pblock(&newex); 3501 newblock = ext_pblock(&newex);
3329 allocated = ext4_ext_get_actual_len(&newex); 3502 allocated = ext4_ext_get_actual_len(&newex);
3503 if (allocated > max_blocks)
3504 allocated = max_blocks;
3330 set_buffer_new(bh_result); 3505 set_buffer_new(bh_result);
3331 3506
3332 /* 3507 /*
3508 * Update reserved blocks/metadata blocks after successful
3509 * block allocation which had been deferred till now.
3510 */
3511 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3512 ext4_da_update_reserve_space(inode, allocated, 1);
3513
3514 /*
3333 * Cache the extent and update transaction to commit on fdatasync only 3515 * Cache the extent and update transaction to commit on fdatasync only
3334 * when it is _not_ an uninitialized extent. 3516 * when it is _not_ an uninitialized extent.
3335 */ 3517 */
@@ -3437,6 +3619,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
3437 i_size_write(inode, new_size); 3619 i_size_write(inode, new_size);
3438 if (new_size > EXT4_I(inode)->i_disksize) 3620 if (new_size > EXT4_I(inode)->i_disksize)
3439 ext4_update_i_disksize(inode, new_size); 3621 ext4_update_i_disksize(inode, new_size);
3622 } else {
3623 /*
3624 * Mark that we allocate beyond EOF so the subsequent truncate
3625 * can proceed even if the new size is the same as i_size.
3626 */
3627 if (new_size > i_size_read(inode))
3628 EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
3440 } 3629 }
3441 3630
3442} 3631}
@@ -3541,7 +3730,7 @@ retry:
3541 * Returns 0 on success. 3730 * Returns 0 on success.
3542 */ 3731 */
3543int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 3732int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3544 loff_t len) 3733 ssize_t len)
3545{ 3734{
3546 handle_t *handle; 3735 handle_t *handle;
3547 ext4_lblk_t block; 3736 ext4_lblk_t block;
@@ -3573,7 +3762,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3573 map_bh.b_state = 0; 3762 map_bh.b_state = 0;
3574 ret = ext4_get_blocks(handle, inode, block, 3763 ret = ext4_get_blocks(handle, inode, block,
3575 max_blocks, &map_bh, 3764 max_blocks, &map_bh,
3576 EXT4_GET_BLOCKS_DIO_CONVERT_EXT); 3765 EXT4_GET_BLOCKS_IO_CONVERT_EXT);
3577 if (ret <= 0) { 3766 if (ret <= 0) {
3578 WARN_ON(ret <= 0); 3767 WARN_ON(ret <= 0);
3579 printk(KERN_ERR "%s: ext4_ext_get_blocks " 3768 printk(KERN_ERR "%s: ext4_ext_get_blocks "
@@ -3677,7 +3866,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
3677 int error = 0; 3866 int error = 0;
3678 3867
3679 /* in-inode? */ 3868 /* in-inode? */
3680 if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { 3869 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
3681 struct ext4_iloc iloc; 3870 struct ext4_iloc iloc;
3682 int offset; /* offset of xattr in inode */ 3871 int offset; /* offset of xattr in inode */
3683 3872
@@ -3690,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
3690 physical += offset; 3879 physical += offset;
3691 length = EXT4_SB(inode->i_sb)->s_inode_size - offset; 3880 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
3692 flags |= FIEMAP_EXTENT_DATA_INLINE; 3881 flags |= FIEMAP_EXTENT_DATA_INLINE;
3882 brelse(iloc.bh);
3693 } else { /* external block */ 3883 } else { /* external block */
3694 physical = EXT4_I(inode)->i_file_acl << blockbits; 3884 physical = EXT4_I(inode)->i_file_acl << blockbits;
3695 length = inode->i_sb->s_blocksize; 3885 length = inode->i_sb->s_blocksize;
@@ -3705,7 +3895,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3705 __u64 start, __u64 len) 3895 __u64 start, __u64 len)
3706{ 3896{
3707 ext4_lblk_t start_blk; 3897 ext4_lblk_t start_blk;
3708 ext4_lblk_t len_blks;
3709 int error = 0; 3898 int error = 0;
3710 3899
3711 /* fallback to generic here if not in extents fmt */ 3900 /* fallback to generic here if not in extents fmt */
@@ -3719,8 +3908,14 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3719 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { 3908 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
3720 error = ext4_xattr_fiemap(inode, fieinfo); 3909 error = ext4_xattr_fiemap(inode, fieinfo);
3721 } else { 3910 } else {
3911 ext4_lblk_t len_blks;
3912 __u64 last_blk;
3913
3722 start_blk = start >> inode->i_sb->s_blocksize_bits; 3914 start_blk = start >> inode->i_sb->s_blocksize_bits;
3723 len_blks = len >> inode->i_sb->s_blocksize_bits; 3915 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
3916 if (last_blk >= EXT_MAX_BLOCK)
3917 last_blk = EXT_MAX_BLOCK-1;
3918 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
3724 3919
3725 /* 3920 /*
3726 * Walk the extent tree gathering extent information. 3921 * Walk the extent tree gathering extent information.