aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2007-03-07 19:46:57 -0500
committerMark Fasheh <mark.fasheh@oracle.com>2007-04-26 18:02:37 -0400
commite48edee2d8eab812f31f0ff62c6ba635ca2e1e21 (patch)
tree6afb9fe59a06ce621cb11d570e432e7d739376ff /fs
parent6af67d8205cf65fbaaa743edc7ebb46e486e34ff (diff)
ocfs2: make room for unwritten extents flag
Due to the size of our group bitmaps, we'll never have a leaf node extent record with more than 16 bits worth of clusters. Split e_clusters up so that leaf nodes can get a flags field where we can mark unwritten extents. Interior nodes whose length references all the child nodes beneath it can't split their e_clusters field, so we use a union to preserve sizing there. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/alloc.c155
-rw-r--r--fs/ocfs2/alloc.h19
-rw-r--r--fs/ocfs2/extent_map.c19
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h19
6 files changed, 151 insertions, 69 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 027cf5d05ffb..0eab0d328289 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -218,20 +218,32 @@ enum ocfs2_contig_type {
218 CONTIG_RIGHT 218 CONTIG_RIGHT
219}; 219};
220 220
221
222/*
223 * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and
224 * ocfs2_extent_contig only work properly against leaf nodes!
225 */
221static int ocfs2_block_extent_contig(struct super_block *sb, 226static int ocfs2_block_extent_contig(struct super_block *sb,
222 struct ocfs2_extent_rec *ext, 227 struct ocfs2_extent_rec *ext,
223 u64 blkno) 228 u64 blkno)
224{ 229{
225 return blkno == (le64_to_cpu(ext->e_blkno) + 230 u64 blk_end = le64_to_cpu(ext->e_blkno);
226 ocfs2_clusters_to_blocks(sb, 231
227 le32_to_cpu(ext->e_clusters))); 232 blk_end += ocfs2_clusters_to_blocks(sb,
233 le16_to_cpu(ext->e_leaf_clusters));
234
235 return blkno == blk_end;
228} 236}
229 237
230static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left, 238static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
231 struct ocfs2_extent_rec *right) 239 struct ocfs2_extent_rec *right)
232{ 240{
233 return (le32_to_cpu(left->e_cpos) + le32_to_cpu(left->e_clusters) == 241 u32 left_range;
234 le32_to_cpu(right->e_cpos)); 242
243 left_range = le32_to_cpu(left->e_cpos) +
244 le16_to_cpu(left->e_leaf_clusters);
245
246 return (left_range == le32_to_cpu(right->e_cpos));
235} 247}
236 248
237static enum ocfs2_contig_type 249static enum ocfs2_contig_type
@@ -430,7 +442,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
430 i = le16_to_cpu(el->l_next_free_rec) - 1; 442 i = le16_to_cpu(el->l_next_free_rec) - 1;
431 443
432 return le32_to_cpu(el->l_recs[i].e_cpos) + 444 return le32_to_cpu(el->l_recs[i].e_cpos) +
433 le32_to_cpu(el->l_recs[i].e_clusters); 445 ocfs2_rec_clusters(el, &el->l_recs[i]);
434} 446}
435 447
436/* 448/*
@@ -442,7 +454,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
442 * for the new last extent block. 454 * for the new last extent block.
443 * 455 *
444 * the new branch will be 'empty' in the sense that every block will 456 * the new branch will be 'empty' in the sense that every block will
445 * contain a single record with e_clusters == 0. 457 * contain a single record with cluster count == 0.
446 */ 458 */
447static int ocfs2_add_branch(struct ocfs2_super *osb, 459static int ocfs2_add_branch(struct ocfs2_super *osb,
448 handle_t *handle, 460 handle_t *handle,
@@ -532,7 +544,12 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
532 */ 544 */
533 eb_el->l_recs[0].e_cpos = cpu_to_le32(new_cpos); 545 eb_el->l_recs[0].e_cpos = cpu_to_le32(new_cpos);
534 eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno); 546 eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno);
535 eb_el->l_recs[0].e_clusters = cpu_to_le32(0); 547 /*
548 * eb_el isn't always an interior node, but even leaf
549 * nodes want a zero'd flags and reserved field so
550 * this gets the whole 32 bits regardless of use.
551 */
552 eb_el->l_recs[0].e_int_clusters = cpu_to_le32(0);
536 if (!eb_el->l_tree_depth) 553 if (!eb_el->l_tree_depth)
537 new_last_eb_blk = le64_to_cpu(eb->h_blkno); 554 new_last_eb_blk = le64_to_cpu(eb->h_blkno);
538 555
@@ -577,7 +594,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
577 i = le16_to_cpu(el->l_next_free_rec); 594 i = le16_to_cpu(el->l_next_free_rec);
578 el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); 595 el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
579 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); 596 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
580 el->l_recs[i].e_clusters = 0; 597 el->l_recs[i].e_int_clusters = 0;
581 le16_add_cpu(&el->l_next_free_rec, 1); 598 le16_add_cpu(&el->l_next_free_rec, 1);
582 599
583 /* fe needs a new last extent block pointer, as does the 600 /* fe needs a new last extent block pointer, as does the
@@ -662,11 +679,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
662 /* copy the fe data into the new extent block */ 679 /* copy the fe data into the new extent block */
663 eb_el->l_tree_depth = fe_el->l_tree_depth; 680 eb_el->l_tree_depth = fe_el->l_tree_depth;
664 eb_el->l_next_free_rec = fe_el->l_next_free_rec; 681 eb_el->l_next_free_rec = fe_el->l_next_free_rec;
665 for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 682 for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++)
666 eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos; 683 eb_el->l_recs[i] = fe_el->l_recs[i];
667 eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters;
668 eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno;
669 }
670 684
671 status = ocfs2_journal_dirty(handle, new_eb_bh); 685 status = ocfs2_journal_dirty(handle, new_eb_bh);
672 if (status < 0) { 686 if (status < 0) {
@@ -687,12 +701,9 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
687 le16_add_cpu(&fe_el->l_tree_depth, 1); 701 le16_add_cpu(&fe_el->l_tree_depth, 1);
688 fe_el->l_recs[0].e_cpos = 0; 702 fe_el->l_recs[0].e_cpos = 0;
689 fe_el->l_recs[0].e_blkno = eb->h_blkno; 703 fe_el->l_recs[0].e_blkno = eb->h_blkno;
690 fe_el->l_recs[0].e_clusters = cpu_to_le32(new_clusters); 704 fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
691 for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 705 for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++)
692 fe_el->l_recs[i].e_cpos = 0; 706 memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
693 fe_el->l_recs[i].e_clusters = 0;
694 fe_el->l_recs[i].e_blkno = 0;
695 }
696 fe_el->l_next_free_rec = cpu_to_le16(1); 707 fe_el->l_next_free_rec = cpu_to_le16(1);
697 708
698 /* If this is our 1st tree depth shift, then last_eb_blk 709 /* If this is our 1st tree depth shift, then last_eb_blk
@@ -817,9 +828,13 @@ bail:
817 return status; 828 return status;
818} 829}
819 830
831/*
832 * This is only valid for leaf nodes, which are the only ones that can
833 * have empty extents anyway.
834 */
820static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) 835static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
821{ 836{
822 return !rec->e_clusters; 837 return !rec->e_leaf_clusters;
823} 838}
824 839
825/* 840/*
@@ -930,6 +945,8 @@ static void ocfs2_create_empty_extent(struct ocfs2_extent_list *el)
930{ 945{
931 int next_free = le16_to_cpu(el->l_next_free_rec); 946 int next_free = le16_to_cpu(el->l_next_free_rec);
932 947
948 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
949
933 if (next_free == 0) 950 if (next_free == 0)
934 goto set_and_inc; 951 goto set_and_inc;
935 952
@@ -1034,7 +1051,7 @@ static int __ocfs2_find_path(struct inode *inode,
1034 * rightmost record. 1051 * rightmost record.
1035 */ 1052 */
1036 range = le32_to_cpu(rec->e_cpos) + 1053 range = le32_to_cpu(rec->e_cpos) +
1037 le32_to_cpu(rec->e_clusters); 1054 ocfs2_rec_clusters(el, rec);
1038 if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) 1055 if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
1039 break; 1056 break;
1040 } 1057 }
@@ -1195,21 +1212,21 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1195 */ 1212 */
1196 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1213 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
1197 left_clusters -= le32_to_cpu(left_rec->e_cpos); 1214 left_clusters -= le32_to_cpu(left_rec->e_cpos);
1198 left_rec->e_clusters = cpu_to_le32(left_clusters); 1215 left_rec->e_int_clusters = cpu_to_le32(left_clusters);
1199 1216
1200 /* 1217 /*
1201 * Calculate the rightmost cluster count boundary before 1218 * Calculate the rightmost cluster count boundary before
1202 * moving cpos - we will need to adjust e_clusters after 1219 * moving cpos - we will need to adjust clusters after
1203 * updating e_cpos to keep the same highest cluster count. 1220 * updating e_cpos to keep the same highest cluster count.
1204 */ 1221 */
1205 right_end = le32_to_cpu(right_rec->e_cpos); 1222 right_end = le32_to_cpu(right_rec->e_cpos);
1206 right_end += le32_to_cpu(right_rec->e_clusters); 1223 right_end += le32_to_cpu(right_rec->e_int_clusters);
1207 1224
1208 right_rec->e_cpos = left_rec->e_cpos; 1225 right_rec->e_cpos = left_rec->e_cpos;
1209 le32_add_cpu(&right_rec->e_cpos, left_clusters); 1226 le32_add_cpu(&right_rec->e_cpos, left_clusters);
1210 1227
1211 right_end -= le32_to_cpu(right_rec->e_cpos); 1228 right_end -= le32_to_cpu(right_rec->e_cpos);
1212 right_rec->e_clusters = cpu_to_le32(right_end); 1229 right_rec->e_int_clusters = cpu_to_le32(right_end);
1213} 1230}
1214 1231
1215/* 1232/*
@@ -1452,6 +1469,8 @@ static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
1452 u64 blkno; 1469 u64 blkno;
1453 struct ocfs2_extent_list *el; 1470 struct ocfs2_extent_list *el;
1454 1471
1472 BUG_ON(path->p_tree_depth == 0);
1473
1455 *cpos = 0; 1474 *cpos = 0;
1456 1475
1457 blkno = path_leaf_bh(path)->b_blocknr; 1476 blkno = path_leaf_bh(path)->b_blocknr;
@@ -1486,7 +1505,9 @@ static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
1486 } 1505 }
1487 1506
1488 *cpos = le32_to_cpu(el->l_recs[j - 1].e_cpos); 1507 *cpos = le32_to_cpu(el->l_recs[j - 1].e_cpos);
1489 *cpos = *cpos + le32_to_cpu(el->l_recs[j - 1].e_clusters) - 1; 1508 *cpos = *cpos + ocfs2_rec_clusters(el,
1509 &el->l_recs[j - 1]);
1510 *cpos = *cpos - 1;
1490 goto out; 1511 goto out;
1491 } 1512 }
1492 } 1513 }
@@ -1715,7 +1736,7 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
1715 unsigned int range; 1736 unsigned int range;
1716 struct ocfs2_extent_rec *rec; 1737 struct ocfs2_extent_rec *rec;
1717 1738
1718 BUG_ON(el->l_tree_depth); 1739 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
1719 1740
1720 /* 1741 /*
1721 * Contiguous insert - either left or right. 1742 * Contiguous insert - either left or right.
@@ -1726,8 +1747,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
1726 rec->e_blkno = insert_rec->e_blkno; 1747 rec->e_blkno = insert_rec->e_blkno;
1727 rec->e_cpos = insert_rec->e_cpos; 1748 rec->e_cpos = insert_rec->e_cpos;
1728 } 1749 }
1729 le32_add_cpu(&rec->e_clusters, 1750 le16_add_cpu(&rec->e_leaf_clusters,
1730 le32_to_cpu(insert_rec->e_clusters)); 1751 le16_to_cpu(insert_rec->e_leaf_clusters));
1731 return; 1752 return;
1732 } 1753 }
1733 1754
@@ -1748,7 +1769,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
1748 if (insert->ins_appending == APPEND_TAIL) { 1769 if (insert->ins_appending == APPEND_TAIL) {
1749 i = le16_to_cpu(el->l_next_free_rec) - 1; 1770 i = le16_to_cpu(el->l_next_free_rec) - 1;
1750 rec = &el->l_recs[i]; 1771 rec = &el->l_recs[i];
1751 range = le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters); 1772 range = le32_to_cpu(rec->e_cpos)
1773 + le16_to_cpu(rec->e_leaf_clusters);
1752 BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range); 1774 BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range);
1753 1775
1754 mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= 1776 mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
@@ -1761,9 +1783,9 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
1761 le16_to_cpu(el->l_count), 1783 le16_to_cpu(el->l_count),
1762 le16_to_cpu(el->l_next_free_rec), 1784 le16_to_cpu(el->l_next_free_rec),
1763 le32_to_cpu(el->l_recs[i].e_cpos), 1785 le32_to_cpu(el->l_recs[i].e_cpos),
1764 le32_to_cpu(el->l_recs[i].e_clusters), 1786 le16_to_cpu(el->l_recs[i].e_leaf_clusters),
1765 le32_to_cpu(insert_rec->e_cpos), 1787 le32_to_cpu(insert_rec->e_cpos),
1766 le32_to_cpu(insert_rec->e_clusters)); 1788 le16_to_cpu(insert_rec->e_leaf_clusters));
1767 i++; 1789 i++;
1768 el->l_recs[i] = *insert_rec; 1790 el->l_recs[i] = *insert_rec;
1769 le16_add_cpu(&el->l_next_free_rec, 1); 1791 le16_add_cpu(&el->l_next_free_rec, 1);
@@ -1806,6 +1828,12 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
1806 *ret_left_path = NULL; 1828 *ret_left_path = NULL;
1807 1829
1808 /* 1830 /*
1831 * This shouldn't happen for non-trees. The extent rec cluster
1832 * count manipulation below only works for interior nodes.
1833 */
1834 BUG_ON(right_path->p_tree_depth == 0);
1835
1836 /*
1809 * If our appending insert is at the leftmost edge of a leaf, 1837 * If our appending insert is at the leftmost edge of a leaf,
1810 * then we might need to update the rightmost records of the 1838 * then we might need to update the rightmost records of the
1811 * neighboring path. 1839 * neighboring path.
@@ -1863,6 +1891,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
1863 bh = path_root_bh(right_path); 1891 bh = path_root_bh(right_path);
1864 i = 0; 1892 i = 0;
1865 while (1) { 1893 while (1) {
1894 struct ocfs2_extent_rec *rec;
1895
1866 next_free = le16_to_cpu(el->l_next_free_rec); 1896 next_free = le16_to_cpu(el->l_next_free_rec);
1867 if (next_free == 0) { 1897 if (next_free == 0) {
1868 ocfs2_error(inode->i_sb, 1898 ocfs2_error(inode->i_sb,
@@ -1872,16 +1902,19 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
1872 goto out; 1902 goto out;
1873 } 1903 }
1874 1904
1875 el->l_recs[next_free - 1].e_clusters = insert_rec->e_cpos; 1905 rec = &el->l_recs[next_free - 1];
1876 le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 1906
1877 le32_to_cpu(insert_rec->e_clusters)); 1907 rec->e_int_clusters = insert_rec->e_cpos;
1878 le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 1908 le32_add_cpu(&rec->e_int_clusters,
1879 -le32_to_cpu(el->l_recs[next_free - 1].e_cpos)); 1909 le16_to_cpu(insert_rec->e_leaf_clusters));
1910 le32_add_cpu(&rec->e_int_clusters,
1911 -le32_to_cpu(rec->e_cpos));
1880 1912
1881 ret = ocfs2_journal_dirty(handle, bh); 1913 ret = ocfs2_journal_dirty(handle, bh);
1882 if (ret) 1914 if (ret)
1883 mlog_errno(ret); 1915 mlog_errno(ret);
1884 1916
1917 /* Don't touch the leaf node */
1885 if (++i >= right_path->p_tree_depth) 1918 if (++i >= right_path->p_tree_depth)
1886 break; 1919 break;
1887 1920
@@ -2068,7 +2101,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
2068 2101
2069out_update_clusters: 2102out_update_clusters:
2070 ocfs2_update_dinode_clusters(inode, di, 2103 ocfs2_update_dinode_clusters(inode, di,
2071 le32_to_cpu(insert_rec->e_clusters)); 2104 le16_to_cpu(insert_rec->e_leaf_clusters));
2072 2105
2073 ret = ocfs2_journal_dirty(handle, di_bh); 2106 ret = ocfs2_journal_dirty(handle, di_bh);
2074 if (ret) 2107 if (ret)
@@ -2089,6 +2122,8 @@ static void ocfs2_figure_contig_type(struct inode *inode,
2089 int i; 2122 int i;
2090 enum ocfs2_contig_type contig_type = CONTIG_NONE; 2123 enum ocfs2_contig_type contig_type = CONTIG_NONE;
2091 2124
2125 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
2126
2092 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 2127 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
2093 contig_type = ocfs2_extent_contig(inode, &el->l_recs[i], 2128 contig_type = ocfs2_extent_contig(inode, &el->l_recs[i],
2094 insert_rec); 2129 insert_rec);
@@ -2120,7 +2155,7 @@ static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
2120 2155
2121 insert->ins_appending = APPEND_NONE; 2156 insert->ins_appending = APPEND_NONE;
2122 2157
2123 BUG_ON(el->l_tree_depth); 2158 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
2124 2159
2125 if (!el->l_next_free_rec) 2160 if (!el->l_next_free_rec)
2126 goto set_tail_append; 2161 goto set_tail_append;
@@ -2134,7 +2169,8 @@ static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
2134 i = le16_to_cpu(el->l_next_free_rec) - 1; 2169 i = le16_to_cpu(el->l_next_free_rec) - 1;
2135 rec = &el->l_recs[i]; 2170 rec = &el->l_recs[i];
2136 2171
2137 if (cpos >= (le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters))) 2172 if (cpos >=
2173 (le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)))
2138 goto set_tail_append; 2174 goto set_tail_append;
2139 2175
2140 return; 2176 return;
@@ -2242,7 +2278,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
2242 * The insert code isn't quite ready to deal with all cases of 2278 * The insert code isn't quite ready to deal with all cases of
2243 * left contiguousness. Specifically, if it's an insert into 2279 * left contiguousness. Specifically, if it's an insert into
2244 * the 1st record in a leaf, it will require the adjustment of 2280 * the 1st record in a leaf, it will require the adjustment of
2245 * e_clusters on the last record of the path directly to it's 2281 * cluster count on the last record of the path directly to it's
2246 * left. For now, just catch that case and fool the layers 2282 * left. For now, just catch that case and fool the layers
2247 * above us. This works just fine for tree_depth == 0, which 2283 * above us. This works just fine for tree_depth == 0, which
2248 * is why we allow that above. 2284 * is why we allow that above.
@@ -2310,9 +2346,10 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
2310 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, 2346 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos,
2311 OCFS2_I(inode)->ip_clusters); 2347 OCFS2_I(inode)->ip_clusters);
2312 2348
2349 memset(&rec, 0, sizeof(rec));
2313 rec.e_cpos = cpu_to_le32(cpos); 2350 rec.e_cpos = cpu_to_le32(cpos);
2314 rec.e_blkno = cpu_to_le64(start_blk); 2351 rec.e_blkno = cpu_to_le64(start_blk);
2315 rec.e_clusters = cpu_to_le32(new_clusters); 2352 rec.e_leaf_clusters = cpu_to_le16(new_clusters);
2316 2353
2317 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, 2354 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
2318 &insert); 2355 &insert);
@@ -2981,7 +3018,7 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
2981 * Check it we'll only be trimming off the end of this 3018 * Check it we'll only be trimming off the end of this
2982 * cluster. 3019 * cluster.
2983 */ 3020 */
2984 if (le16_to_cpu(rec->e_clusters) > clusters_to_del) 3021 if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del)
2985 goto out; 3022 goto out;
2986 } 3023 }
2987 3024
@@ -3061,11 +3098,11 @@ find_tail_record:
3061 3098
3062 mlog(0, "Extent list before: record %d: (%u, %u, %llu), " 3099 mlog(0, "Extent list before: record %d: (%u, %u, %llu), "
3063 "next = %u\n", i, le32_to_cpu(rec->e_cpos), 3100 "next = %u\n", i, le32_to_cpu(rec->e_cpos),
3064 le32_to_cpu(rec->e_clusters), 3101 ocfs2_rec_clusters(el, rec),
3065 (unsigned long long)le64_to_cpu(rec->e_blkno), 3102 (unsigned long long)le64_to_cpu(rec->e_blkno),
3066 le16_to_cpu(el->l_next_free_rec)); 3103 le16_to_cpu(el->l_next_free_rec));
3067 3104
3068 BUG_ON(le32_to_cpu(rec->e_clusters) < clusters_to_del); 3105 BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del);
3069 3106
3070 if (le16_to_cpu(el->l_tree_depth) == 0) { 3107 if (le16_to_cpu(el->l_tree_depth) == 0) {
3071 /* 3108 /*
@@ -3107,13 +3144,13 @@ find_tail_record:
3107 goto find_tail_record; 3144 goto find_tail_record;
3108 } 3145 }
3109 3146
3110 le32_add_cpu(&rec->e_clusters, -clusters_to_del); 3147 le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del);
3111 3148
3112 /* 3149 /*
3113 * We'll use "new_edge" on our way back up the 3150 * We'll use "new_edge" on our way back up the
3114 * tree to know what our rightmost cpos is. 3151 * tree to know what our rightmost cpos is.
3115 */ 3152 */
3116 new_edge = le32_to_cpu(rec->e_clusters); 3153 new_edge = le16_to_cpu(rec->e_leaf_clusters);
3117 new_edge += le32_to_cpu(rec->e_cpos); 3154 new_edge += le32_to_cpu(rec->e_cpos);
3118 3155
3119 /* 3156 /*
@@ -3121,12 +3158,12 @@ find_tail_record:
3121 */ 3158 */
3122 *delete_start = le64_to_cpu(rec->e_blkno) 3159 *delete_start = le64_to_cpu(rec->e_blkno)
3123 + ocfs2_clusters_to_blocks(inode->i_sb, 3160 + ocfs2_clusters_to_blocks(inode->i_sb,
3124 le32_to_cpu(rec->e_clusters)); 3161 le16_to_cpu(rec->e_leaf_clusters));
3125 3162
3126 /* 3163 /*
3127 * If it's now empty, remove this record. 3164 * If it's now empty, remove this record.
3128 */ 3165 */
3129 if (le32_to_cpu(rec->e_clusters) == 0) { 3166 if (le16_to_cpu(rec->e_leaf_clusters) == 0) {
3130 memset(rec, 0, 3167 memset(rec, 0,
3131 sizeof(struct ocfs2_extent_rec)); 3168 sizeof(struct ocfs2_extent_rec));
3132 le16_add_cpu(&el->l_next_free_rec, -1); 3169 le16_add_cpu(&el->l_next_free_rec, -1);
@@ -3152,15 +3189,15 @@ find_tail_record:
3152 if (new_edge == 0) 3189 if (new_edge == 0)
3153 goto delete; 3190 goto delete;
3154 3191
3155 rec->e_clusters = cpu_to_le32(new_edge); 3192 rec->e_int_clusters = cpu_to_le32(new_edge);
3156 le32_add_cpu(&rec->e_clusters, 3193 le32_add_cpu(&rec->e_int_clusters,
3157 -le32_to_cpu(rec->e_cpos)); 3194 -le32_to_cpu(rec->e_cpos));
3158 3195
3159 /* 3196 /*
3160 * A deleted child record should have been 3197 * A deleted child record should have been
3161 * caught above. 3198 * caught above.
3162 */ 3199 */
3163 BUG_ON(le32_to_cpu(rec->e_clusters) == 0); 3200 BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0);
3164 } 3201 }
3165 3202
3166delete: 3203delete:
@@ -3173,7 +3210,7 @@ delete:
3173 mlog(0, "extent list container %llu, after: record %d: " 3210 mlog(0, "extent list container %llu, after: record %d: "
3174 "(%u, %u, %llu), next = %u.\n", 3211 "(%u, %u, %llu), next = %u.\n",
3175 (unsigned long long)bh->b_blocknr, i, 3212 (unsigned long long)bh->b_blocknr, i,
3176 le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters), 3213 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec),
3177 (unsigned long long)le64_to_cpu(rec->e_blkno), 3214 (unsigned long long)le64_to_cpu(rec->e_blkno),
3178 le16_to_cpu(el->l_next_free_rec)); 3215 le16_to_cpu(el->l_next_free_rec));
3179 3216
@@ -3195,7 +3232,7 @@ delete:
3195 3232
3196 ocfs2_remove_from_cache(inode, bh); 3233 ocfs2_remove_from_cache(inode, bh);
3197 3234
3198 BUG_ON(le32_to_cpu(el->l_recs[0].e_clusters)); 3235 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
3199 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); 3236 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
3200 BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); 3237 BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
3201 3238
@@ -3283,7 +3320,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
3283 * Lower levels depend on this never happening, but it's best 3320 * Lower levels depend on this never happening, but it's best
3284 * to check it up here before changing the tree. 3321 * to check it up here before changing the tree.
3285 */ 3322 */
3286 if (el->l_tree_depth && ocfs2_is_empty_extent(&el->l_recs[0])) { 3323 if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) {
3287 ocfs2_error(inode->i_sb, 3324 ocfs2_error(inode->i_sb,
3288 "Inode %lu has an empty extent record, depth %u\n", 3325 "Inode %lu has an empty extent record, depth %u\n",
3289 inode->i_ino, le16_to_cpu(el->l_tree_depth)); 3326 inode->i_ino, le16_to_cpu(el->l_tree_depth));
@@ -3644,13 +3681,13 @@ start:
3644 3681
3645 i = le16_to_cpu(el->l_next_free_rec) - 1; 3682 i = le16_to_cpu(el->l_next_free_rec) - 1;
3646 range = le32_to_cpu(el->l_recs[i].e_cpos) + 3683 range = le32_to_cpu(el->l_recs[i].e_cpos) +
3647 le32_to_cpu(el->l_recs[i].e_clusters); 3684 ocfs2_rec_clusters(el, &el->l_recs[i]);
3648 if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { 3685 if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) {
3649 clusters_to_del = 0; 3686 clusters_to_del = 0;
3650 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 3687 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) {
3651 clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters); 3688 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]);
3652 } else if (range > new_highest_cpos) { 3689 } else if (range > new_highest_cpos) {
3653 clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) + 3690 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) +
3654 le32_to_cpu(el->l_recs[i].e_cpos)) - 3691 le32_to_cpu(el->l_recs[i].e_cpos)) -
3655 new_highest_cpos; 3692 new_highest_cpos;
3656 } else { 3693 } else {
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 3cb39cd5e478..fbcb5934a081 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -85,4 +85,23 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
85int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 85int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
86 u32 cpos, struct buffer_head **leaf_bh); 86 u32 cpos, struct buffer_head **leaf_bh);
87 87
88/*
89 * Helper function to look at the # of clusters in an extent record.
90 */
91static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el,
92 struct ocfs2_extent_rec *rec)
93{
94 /*
95 * Cluster count in extent records is slightly different
96 * between interior nodes and leaf nodes. This is to support
97 * unwritten extents which need a flags field in leaf node
98 * records, thus shrinking the available space for a clusters
99 * field.
100 */
101 if (el->l_tree_depth)
102 return le32_to_cpu(rec->e_int_clusters);
103 else
104 return le16_to_cpu(rec->e_leaf_clusters);
105}
106
88#endif /* OCFS2_ALLOC_H */ 107#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 937c2722b753..ea0ce41d4193 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -50,13 +50,15 @@ static int ocfs2_search_extent_list(struct ocfs2_extent_list *el,
50 int ret = -1; 50 int ret = -1;
51 int i; 51 int i;
52 struct ocfs2_extent_rec *rec; 52 struct ocfs2_extent_rec *rec;
53 u32 rec_end, rec_start; 53 u32 rec_end, rec_start, clusters;
54 54
55 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 55 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
56 rec = &el->l_recs[i]; 56 rec = &el->l_recs[i];
57 57
58 rec_start = le32_to_cpu(rec->e_cpos); 58 rec_start = le32_to_cpu(rec->e_cpos);
59 rec_end = rec_start + le32_to_cpu(rec->e_clusters); 59 clusters = ocfs2_rec_clusters(el, rec);
60
61 rec_end = rec_start + clusters;
60 62
61 if (v_cluster >= rec_start && v_cluster < rec_end) { 63 if (v_cluster >= rec_start && v_cluster < rec_end) {
62 ret = i; 64 ret = i;
@@ -98,6 +100,15 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
98 100
99 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 101 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
100 el = &eb->h_list; 102 el = &eb->h_list;
103
104 if (el->l_tree_depth) {
105 ocfs2_error(inode->i_sb,
106 "Inode %lu has non zero tree depth in "
107 "leaf block %llu\n", inode->i_ino,
108 (unsigned long long)eb_bh->b_blocknr);
109 ret = -EROFS;
110 goto out;
111 }
101 } 112 }
102 113
103 i = ocfs2_search_extent_list(el, v_cluster); 114 i = ocfs2_search_extent_list(el, v_cluster);
@@ -118,7 +129,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
118 ocfs2_error(inode->i_sb, "Inode %lu has bad extent " 129 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
119 "record (%u, %u, 0)", inode->i_ino, 130 "record (%u, %u, 0)", inode->i_ino,
120 le32_to_cpu(rec->e_cpos), 131 le32_to_cpu(rec->e_cpos),
121 le32_to_cpu(rec->e_clusters)); 132 ocfs2_rec_clusters(el, rec));
122 ret = -EROFS; 133 ret = -EROFS;
123 goto out; 134 goto out;
124 } 135 }
@@ -130,7 +141,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
130 *p_cluster = *p_cluster + coff; 141 *p_cluster = *p_cluster + coff;
131 142
132 if (num_clusters) 143 if (num_clusters)
133 *num_clusters = le32_to_cpu(rec->e_clusters) - coff; 144 *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
134 } 145 }
135 146
136out: 147out:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f516619a3744..36176018b4b4 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1127,7 +1127,6 @@ static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
1127 size_t count) 1127 size_t count)
1128{ 1128{
1129 int ret = 0; 1129 int ret = 0;
1130 unsigned int extent_flags;
1131 u32 cpos, clusters, extent_len, phys_cpos; 1130 u32 cpos, clusters, extent_len, phys_cpos;
1132 struct super_block *sb = inode->i_sb; 1131 struct super_block *sb = inode->i_sb;
1133 1132
@@ -1135,14 +1134,13 @@ static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
1135 clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; 1134 clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
1136 1135
1137 while (clusters) { 1136 while (clusters) {
1138 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, 1137 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len);
1139 &extent_flags);
1140 if (ret < 0) { 1138 if (ret < 0) {
1141 mlog_errno(ret); 1139 mlog_errno(ret);
1142 goto out; 1140 goto out;
1143 } 1141 }
1144 1142
1145 if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) { 1143 if (phys_cpos == 0) {
1146 ret = 1; 1144 ret = 1;
1147 break; 1145 break;
1148 } 1146 }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d026b4f27757..3db5de4506da 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -390,7 +390,7 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
390 /* We may be deleting metadata blocks, so metadata alloc dinode + 390 /* We may be deleting metadata blocks, so metadata alloc dinode +
391 one desc. block for each possible delete. */ 391 one desc. block for each possible delete. */
392 if (tree_depth && next_free == 1 && 392 if (tree_depth && next_free == 1 &&
393 le32_to_cpu(last_el->l_recs[i].e_clusters) == clusters_to_del) 393 ocfs2_rec_clusters(last_el, &last_el->l_recs[i]) == clusters_to_del)
394 credits += 1 + tree_depth; 394 credits += 1 + tree_depth;
395 395
396 /* update to the truncate log. */ 396 /* update to the truncate log. */
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index f0101974f4f9..71306479c68f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -156,6 +156,12 @@
156#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ 156#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */
157 157
158/* 158/*
159 * Extent record flags (e_node.leaf.flags)
160 */
161#define OCFS2_EXT_UNWRITTEN (0x01) /* Extent is allocated but
162 * unwritten */
163
164/*
159 * ioctl commands 165 * ioctl commands
160 */ 166 */
161#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) 167#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long)
@@ -283,10 +289,21 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
283/* 289/*
284 * On disk extent record for OCFS2 290 * On disk extent record for OCFS2
285 * It describes a range of clusters on disk. 291 * It describes a range of clusters on disk.
292 *
293 * Length fields are divided into interior and leaf node versions.
294 * This leaves room for a flags field (OCFS2_EXT_*) in the leaf nodes.
286 */ 295 */
287struct ocfs2_extent_rec { 296struct ocfs2_extent_rec {
288/*00*/ __le32 e_cpos; /* Offset into the file, in clusters */ 297/*00*/ __le32 e_cpos; /* Offset into the file, in clusters */
289 __le32 e_clusters; /* Clusters covered by this extent */ 298 union {
299 __le32 e_int_clusters; /* Clusters covered by all children */
300 struct {
301 __le16 e_leaf_clusters; /* Clusters covered by this
302 extent */
303 __u8 e_reserved1;
304 __u8 e_flags; /* Extent flags */
305 };
306 };
290 __le64 e_blkno; /* Physical disk offset, in blocks */ 307 __le64 e_blkno; /* Physical disk offset, in blocks */
291/*10*/ 308/*10*/
292}; 309};