aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTao Ma <tao.ma@oracle.com>2009-11-30 01:32:19 -0500
committerJoel Becker <joel.becker@oracle.com>2009-12-02 19:14:57 -0500
commit38a04e432768ec0b016f3c687b4de31ac111ae59 (patch)
treea71a01561d6a654b8daf12c51e2e909f82950573
parent56f3f55cf9b604b924353ab6fcdac5fee5637ae3 (diff)
ocfs2: Find proper end cpos for a leaf refcount block.
ocfs2 refcount tree is stored as an extent tree while the leaf ocfs2_refcount_rec points to a refcount block. The following step can trip a kernel panic. mkfs.ocfs2 -b 512 -C 1M --fs-features=refcount $DEVICE mount -t ocfs2 $DEVICE $MNT_DIR FILE_NAME=$RANDOM FILE_NAME_1=$RANDOM FILE_REF="${FILE_NAME}_ref" FILE_REF_1="${FILE_NAME}_ref_1" for((i=0;i<305;i++)) do # /mnt/1048576 is a file with 1048576 sizes. cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1 done for((i=0;i<3;i++)) do cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME done for((i=0;i<2;i++)) do cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1 done cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME for((i=0;i<11;i++)) do cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1 done reflink $MNT_DIR/$FILE_NAME $MNT_DIR/$FILE_REF # write_f is a program which will write some bytes to a file at offset. # write_f -f file_name -l offset -w write_bytes. ./write_f -f $MNT_DIR/$FILE_REF -l $[310*1048576] -w 4096 ./write_f -f $MNT_DIR/$FILE_REF -l $[306*1048576] -w 4096 ./write_f -f $MNT_DIR/$FILE_REF -l $[311*1048576] -w 4096 ./write_f -f $MNT_DIR/$FILE_NAME -l $[310*1048576] -w 4096 ./write_f -f $MNT_DIR/$FILE_NAME -l $[311*1048576] -w 4096 reflink $MNT_DIR/$FILE_NAME $MNT_DIR/$FILE_REF_1 ./write_f -f $MNT_DIR/$FILE_NAME -l $[311*1048576] -w 4096 #kernel panic here. The reason is that if the ocfs2_extent_rec is the last record in a leaf extent block, the old solution fails to find the suitable end cpos. So this patch try to walk through the b-tree, find the next sub root and get the c_pos the next sub-tree starts from. btw, I have runned tristan's test case against the patched kernel for several days and this type of kernel panic never happens again. Signed-off-by: Tao Ma <tao.ma@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
-rw-r--r--fs/ocfs2/alloc.c10
-rw-r--r--fs/ocfs2/alloc.h5
-rw-r--r--fs/ocfs2/refcounttree.c117
3 files changed, 119 insertions, 13 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 38a42f5d59ff..5661db139ca0 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1765,9 +1765,9 @@ set_and_inc:
1765 * 1765 *
1766 * The array index of the subtree root is passed back. 1766 * The array index of the subtree root is passed back.
1767 */ 1767 */
1768static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, 1768int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
1769 struct ocfs2_path *left, 1769 struct ocfs2_path *left,
1770 struct ocfs2_path *right) 1770 struct ocfs2_path *right)
1771{ 1771{
1772 int i = 0; 1772 int i = 0;
1773 1773
@@ -2872,8 +2872,8 @@ out:
2872 * This looks similar, but is subtly different to 2872 * This looks similar, but is subtly different to
2873 * ocfs2_find_cpos_for_left_leaf(). 2873 * ocfs2_find_cpos_for_left_leaf().
2874 */ 2874 */
2875static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, 2875int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
2876 struct ocfs2_path *path, u32 *cpos) 2876 struct ocfs2_path *path, u32 *cpos)
2877{ 2877{
2878 int i, j, ret = 0; 2878 int i, j, ret = 0;
2879 u64 blkno; 2879 u64 blkno;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 9c122d574464..1db4359ccb90 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -317,4 +317,9 @@ int ocfs2_path_bh_journal_access(handle_t *handle,
317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, 317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
318 handle_t *handle, 318 handle_t *handle,
319 struct ocfs2_path *path); 319 struct ocfs2_path *path);
320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
321 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
323 struct ocfs2_path *left,
324 struct ocfs2_path *right);
320#endif /* OCFS2_ALLOC_H */ 325#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3a0df7a1b810..9c3956f24e58 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -969,6 +969,103 @@ out:
969} 969}
970 970
971/* 971/*
972 * Find the end range for a leaf refcount block indicated by
973 * el->l_recs[index].e_blkno.
974 */
975static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
976 struct buffer_head *ref_root_bh,
977 struct ocfs2_extent_block *eb,
978 struct ocfs2_extent_list *el,
979 int index, u32 *cpos_end)
980{
981 int ret, i, subtree_root;
982 u32 cpos;
983 u64 blkno;
984 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
985 struct ocfs2_path *left_path = NULL, *right_path = NULL;
986 struct ocfs2_extent_tree et;
987 struct ocfs2_extent_list *tmp_el;
988
989 if (index < le16_to_cpu(el->l_next_free_rec) - 1) {
990 /*
991 * We have a extent rec after index, so just use the e_cpos
992 * of the next extent rec.
993 */
994 *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos);
995 return 0;
996 }
997
998 if (!eb || (eb && !eb->h_next_leaf_blk)) {
999 /*
1000 * We are the last extent rec, so any high cpos should
1001 * be stored in this leaf refcount block.
1002 */
1003 *cpos_end = UINT_MAX;
1004 return 0;
1005 }
1006
1007 /*
1008 * If the extent block isn't the last one, we have to find
1009 * the subtree root between this extent block and the next
1010 * leaf extent block and get the corresponding e_cpos from
1011 * the subroot. Otherwise we may corrupt the b-tree.
1012 */
1013 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
1014
1015 left_path = ocfs2_new_path_from_et(&et);
1016 if (!left_path) {
1017 ret = -ENOMEM;
1018 mlog_errno(ret);
1019 goto out;
1020 }
1021
1022 cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos);
1023 ret = ocfs2_find_path(ci, left_path, cpos);
1024 if (ret) {
1025 mlog_errno(ret);
1026 goto out;
1027 }
1028
1029 right_path = ocfs2_new_path_from_path(left_path);
1030 if (!right_path) {
1031 ret = -ENOMEM;
1032 mlog_errno(ret);
1033 goto out;
1034 }
1035
1036 ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos);
1037 if (ret) {
1038 mlog_errno(ret);
1039 goto out;
1040 }
1041
1042 ret = ocfs2_find_path(ci, right_path, cpos);
1043 if (ret) {
1044 mlog_errno(ret);
1045 goto out;
1046 }
1047
1048 subtree_root = ocfs2_find_subtree_root(&et, left_path,
1049 right_path);
1050
1051 tmp_el = left_path->p_node[subtree_root].el;
1052 blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
1053 for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) {
1054 if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
1055 *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
1056 break;
1057 }
1058 }
1059
1060 BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec));
1061
1062out:
1063 ocfs2_free_path(left_path);
1064 ocfs2_free_path(right_path);
1065 return ret;
1066}
1067
1068/*
972 * Given a cpos and len, try to find the refcount record which contains cpos. 1069 * Given a cpos and len, try to find the refcount record which contains cpos.
973 * 1. If cpos can be found in one refcount record, return the record. 1070 * 1. If cpos can be found in one refcount record, return the record.
974 * 2. If cpos can't be found, return a fake record which start from cpos 1071 * 2. If cpos can't be found, return a fake record which start from cpos
@@ -983,10 +1080,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
983 struct buffer_head **ret_bh) 1080 struct buffer_head **ret_bh)
984{ 1081{
985 int ret = 0, i, found; 1082 int ret = 0, i, found;
986 u32 low_cpos; 1083 u32 low_cpos, uninitialized_var(cpos_end);
987 struct ocfs2_extent_list *el; 1084 struct ocfs2_extent_list *el;
988 struct ocfs2_extent_rec *tmp, *rec = NULL; 1085 struct ocfs2_extent_rec *rec = NULL;
989 struct ocfs2_extent_block *eb; 1086 struct ocfs2_extent_block *eb = NULL;
990 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; 1087 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
991 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1088 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
992 struct ocfs2_refcount_block *rb = 1089 struct ocfs2_refcount_block *rb =
@@ -1034,12 +1131,16 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
1034 } 1131 }
1035 } 1132 }
1036 1133
1037 /* adjust len when we have ocfs2_extent_rec after it. */ 1134 if (found) {
1038 if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { 1135 ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh,
1039 tmp = &el->l_recs[i+1]; 1136 eb, el, i, &cpos_end);
1137 if (ret) {
1138 mlog_errno(ret);
1139 goto out;
1140 }
1040 1141
1041 if (le32_to_cpu(tmp->e_cpos) < cpos + len) 1142 if (cpos_end < low_cpos + len)
1042 len = le32_to_cpu(tmp->e_cpos) - cpos; 1143 len = cpos_end - low_cpos;
1043 } 1144 }
1044 1145
1045 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), 1146 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),