diff options
author | Tao Ma <tao.ma@oracle.com> | 2009-11-30 01:32:19 -0500 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2009-12-02 19:14:57 -0500 |
commit | 38a04e432768ec0b016f3c687b4de31ac111ae59 (patch) | |
tree | a71a01561d6a654b8daf12c51e2e909f82950573 /fs/ocfs2 | |
parent | 56f3f55cf9b604b924353ab6fcdac5fee5637ae3 (diff) |
ocfs2: Find proper end cpos for a leaf refcount block.
ocfs2 refcount tree is stored as an extent tree while
the leaf ocfs2_refcount_rec points to a refcount block.
The following step can trip a kernel panic.
mkfs.ocfs2 -b 512 -C 1M --fs-features=refcount $DEVICE
mount -t ocfs2 $DEVICE $MNT_DIR
FILE_NAME=$RANDOM
FILE_NAME_1=$RANDOM
FILE_REF="${FILE_NAME}_ref"
FILE_REF_1="${FILE_NAME}_ref_1"
for((i=0;i<305;i++))
do
# /mnt/1048576 is a file with 1048576 sizes.
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1
done
for((i=0;i<3;i++))
do
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME
done
for((i=0;i<2;i++))
do
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1
done
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME
for((i=0;i<11;i++))
do
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1
done
reflink $MNT_DIR/$FILE_NAME $MNT_DIR/$FILE_REF
# write_f is a program which will write some bytes to a file at offset.
# write_f -f file_name -l offset -w write_bytes.
./write_f -f $MNT_DIR/$FILE_REF -l $[310*1048576] -w 4096
./write_f -f $MNT_DIR/$FILE_REF -l $[306*1048576] -w 4096
./write_f -f $MNT_DIR/$FILE_REF -l $[311*1048576] -w 4096
./write_f -f $MNT_DIR/$FILE_NAME -l $[310*1048576] -w 4096
./write_f -f $MNT_DIR/$FILE_NAME -l $[311*1048576] -w 4096
reflink $MNT_DIR/$FILE_NAME $MNT_DIR/$FILE_REF_1
./write_f -f $MNT_DIR/$FILE_NAME -l $[311*1048576] -w 4096
#kernel panic here.
The reason is that if the ocfs2_extent_rec is the last record
in a leaf extent block, the old solution fails to find the
suitable end cpos. So this patch try to walk through the b-tree,
find the next sub root and get the c_pos the next sub-tree starts
from.
btw, I have runned tristan's test case against the patched kernel
for several days and this type of kernel panic never happens again.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/alloc.c | 10 | ||||
-rw-r--r-- | fs/ocfs2/alloc.h | 5 | ||||
-rw-r--r-- | fs/ocfs2/refcounttree.c | 117 |
3 files changed, 119 insertions, 13 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 38a42f5d59ff..5661db139ca0 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -1765,9 +1765,9 @@ set_and_inc: | |||
1765 | * | 1765 | * |
1766 | * The array index of the subtree root is passed back. | 1766 | * The array index of the subtree root is passed back. |
1767 | */ | 1767 | */ |
1768 | static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, | 1768 | int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, |
1769 | struct ocfs2_path *left, | 1769 | struct ocfs2_path *left, |
1770 | struct ocfs2_path *right) | 1770 | struct ocfs2_path *right) |
1771 | { | 1771 | { |
1772 | int i = 0; | 1772 | int i = 0; |
1773 | 1773 | ||
@@ -2872,8 +2872,8 @@ out: | |||
2872 | * This looks similar, but is subtly different to | 2872 | * This looks similar, but is subtly different to |
2873 | * ocfs2_find_cpos_for_left_leaf(). | 2873 | * ocfs2_find_cpos_for_left_leaf(). |
2874 | */ | 2874 | */ |
2875 | static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, | 2875 | int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, |
2876 | struct ocfs2_path *path, u32 *cpos) | 2876 | struct ocfs2_path *path, u32 *cpos) |
2877 | { | 2877 | { |
2878 | int i, j, ret = 0; | 2878 | int i, j, ret = 0; |
2879 | u64 blkno; | 2879 | u64 blkno; |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 9c122d574464..1db4359ccb90 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -317,4 +317,9 @@ int ocfs2_path_bh_journal_access(handle_t *handle, | |||
317 | int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, | 317 | int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, |
318 | handle_t *handle, | 318 | handle_t *handle, |
319 | struct ocfs2_path *path); | 319 | struct ocfs2_path *path); |
320 | int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, | ||
321 | struct ocfs2_path *path, u32 *cpos); | ||
322 | int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, | ||
323 | struct ocfs2_path *left, | ||
324 | struct ocfs2_path *right); | ||
320 | #endif /* OCFS2_ALLOC_H */ | 325 | #endif /* OCFS2_ALLOC_H */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3a0df7a1b810..9c3956f24e58 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -969,6 +969,103 @@ out: | |||
969 | } | 969 | } |
970 | 970 | ||
971 | /* | 971 | /* |
972 | * Find the end range for a leaf refcount block indicated by | ||
973 | * el->l_recs[index].e_blkno. | ||
974 | */ | ||
975 | static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci, | ||
976 | struct buffer_head *ref_root_bh, | ||
977 | struct ocfs2_extent_block *eb, | ||
978 | struct ocfs2_extent_list *el, | ||
979 | int index, u32 *cpos_end) | ||
980 | { | ||
981 | int ret, i, subtree_root; | ||
982 | u32 cpos; | ||
983 | u64 blkno; | ||
984 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
985 | struct ocfs2_path *left_path = NULL, *right_path = NULL; | ||
986 | struct ocfs2_extent_tree et; | ||
987 | struct ocfs2_extent_list *tmp_el; | ||
988 | |||
989 | if (index < le16_to_cpu(el->l_next_free_rec) - 1) { | ||
990 | /* | ||
991 | * We have a extent rec after index, so just use the e_cpos | ||
992 | * of the next extent rec. | ||
993 | */ | ||
994 | *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos); | ||
995 | return 0; | ||
996 | } | ||
997 | |||
998 | if (!eb || (eb && !eb->h_next_leaf_blk)) { | ||
999 | /* | ||
1000 | * We are the last extent rec, so any high cpos should | ||
1001 | * be stored in this leaf refcount block. | ||
1002 | */ | ||
1003 | *cpos_end = UINT_MAX; | ||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | /* | ||
1008 | * If the extent block isn't the last one, we have to find | ||
1009 | * the subtree root between this extent block and the next | ||
1010 | * leaf extent block and get the corresponding e_cpos from | ||
1011 | * the subroot. Otherwise we may corrupt the b-tree. | ||
1012 | */ | ||
1013 | ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); | ||
1014 | |||
1015 | left_path = ocfs2_new_path_from_et(&et); | ||
1016 | if (!left_path) { | ||
1017 | ret = -ENOMEM; | ||
1018 | mlog_errno(ret); | ||
1019 | goto out; | ||
1020 | } | ||
1021 | |||
1022 | cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos); | ||
1023 | ret = ocfs2_find_path(ci, left_path, cpos); | ||
1024 | if (ret) { | ||
1025 | mlog_errno(ret); | ||
1026 | goto out; | ||
1027 | } | ||
1028 | |||
1029 | right_path = ocfs2_new_path_from_path(left_path); | ||
1030 | if (!right_path) { | ||
1031 | ret = -ENOMEM; | ||
1032 | mlog_errno(ret); | ||
1033 | goto out; | ||
1034 | } | ||
1035 | |||
1036 | ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos); | ||
1037 | if (ret) { | ||
1038 | mlog_errno(ret); | ||
1039 | goto out; | ||
1040 | } | ||
1041 | |||
1042 | ret = ocfs2_find_path(ci, right_path, cpos); | ||
1043 | if (ret) { | ||
1044 | mlog_errno(ret); | ||
1045 | goto out; | ||
1046 | } | ||
1047 | |||
1048 | subtree_root = ocfs2_find_subtree_root(&et, left_path, | ||
1049 | right_path); | ||
1050 | |||
1051 | tmp_el = left_path->p_node[subtree_root].el; | ||
1052 | blkno = left_path->p_node[subtree_root+1].bh->b_blocknr; | ||
1053 | for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) { | ||
1054 | if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) { | ||
1055 | *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos); | ||
1056 | break; | ||
1057 | } | ||
1058 | } | ||
1059 | |||
1060 | BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec)); | ||
1061 | |||
1062 | out: | ||
1063 | ocfs2_free_path(left_path); | ||
1064 | ocfs2_free_path(right_path); | ||
1065 | return ret; | ||
1066 | } | ||
1067 | |||
1068 | /* | ||
972 | * Given a cpos and len, try to find the refcount record which contains cpos. | 1069 | * Given a cpos and len, try to find the refcount record which contains cpos. |
973 | * 1. If cpos can be found in one refcount record, return the record. | 1070 | * 1. If cpos can be found in one refcount record, return the record. |
974 | * 2. If cpos can't be found, return a fake record which start from cpos | 1071 | * 2. If cpos can't be found, return a fake record which start from cpos |
@@ -983,10 +1080,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, | |||
983 | struct buffer_head **ret_bh) | 1080 | struct buffer_head **ret_bh) |
984 | { | 1081 | { |
985 | int ret = 0, i, found; | 1082 | int ret = 0, i, found; |
986 | u32 low_cpos; | 1083 | u32 low_cpos, uninitialized_var(cpos_end); |
987 | struct ocfs2_extent_list *el; | 1084 | struct ocfs2_extent_list *el; |
988 | struct ocfs2_extent_rec *tmp, *rec = NULL; | 1085 | struct ocfs2_extent_rec *rec = NULL; |
989 | struct ocfs2_extent_block *eb; | 1086 | struct ocfs2_extent_block *eb = NULL; |
990 | struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; | 1087 | struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; |
991 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | 1088 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); |
992 | struct ocfs2_refcount_block *rb = | 1089 | struct ocfs2_refcount_block *rb = |
@@ -1034,12 +1131,16 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, | |||
1034 | } | 1131 | } |
1035 | } | 1132 | } |
1036 | 1133 | ||
1037 | /* adjust len when we have ocfs2_extent_rec after it. */ | 1134 | if (found) { |
1038 | if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { | 1135 | ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh, |
1039 | tmp = &el->l_recs[i+1]; | 1136 | eb, el, i, &cpos_end); |
1137 | if (ret) { | ||
1138 | mlog_errno(ret); | ||
1139 | goto out; | ||
1140 | } | ||
1040 | 1141 | ||
1041 | if (le32_to_cpu(tmp->e_cpos) < cpos + len) | 1142 | if (cpos_end < low_cpos + len) |
1042 | len = le32_to_cpu(tmp->e_cpos) - cpos; | 1143 | len = cpos_end - low_cpos; |
1043 | } | 1144 | } |
1044 | 1145 | ||
1045 | ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), | 1146 | ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), |