diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-01-17 15:53:31 -0500 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-04-26 18:01:56 -0400 |
commit | 3a0782d09c07aa3ec767ba6089cd15cfbfbfc508 (patch) | |
tree | 4791919970e11f4b2fb3162481a59a56f5196fe4 /fs/ocfs2 | |
parent | 363041a5f74b953ab6b705ac9c88e5eda218a24b (diff) |
ocfs2: teach extend/truncate about sparse files
For ocfs2_truncate_file(), we eliminate the "simple" truncate case which no
longer exists since i_size is not tied to i_clusters. In
ocfs2_extend_file(), we skip the allocation / page zeroing code for file
systems which understand sparse files.
The core truncate code is changed to do a bottom up tree traversal. This
gets abstracted out into it's own function. To make things more readable,
most of the special case handling for in-inode extents from
ocfs2_do_truncate() is also removed.
Though write support for sparse files comes in a later patch, we at least
update ocfs2_prepare_inode_for_write() to skip allocation for sparse files.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/alloc.c | 480 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 31 | ||||
-rw-r--r-- | fs/ocfs2/inode.c | 46 |
3 files changed, 320 insertions, 237 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 85a05f120249..9a40603c4d4b 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -2921,12 +2921,13 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb) | |||
2921 | * block will be so we can update his h_next_leaf_blk field, as well | 2921 | * block will be so we can update his h_next_leaf_blk field, as well |
2922 | * as the dinodes i_last_eb_blk */ | 2922 | * as the dinodes i_last_eb_blk */ |
2923 | static int ocfs2_find_new_last_ext_blk(struct inode *inode, | 2923 | static int ocfs2_find_new_last_ext_blk(struct inode *inode, |
2924 | u32 new_i_clusters, | 2924 | unsigned int clusters_to_del, |
2925 | struct ocfs2_path *path, | 2925 | struct ocfs2_path *path, |
2926 | struct buffer_head **new_last_eb) | 2926 | struct buffer_head **new_last_eb) |
2927 | { | 2927 | { |
2928 | int ret = 0; | 2928 | int next_free, ret = 0; |
2929 | u32 cpos; | 2929 | u32 cpos; |
2930 | struct ocfs2_extent_rec *rec; | ||
2930 | struct ocfs2_extent_block *eb; | 2931 | struct ocfs2_extent_block *eb; |
2931 | struct ocfs2_extent_list *el; | 2932 | struct ocfs2_extent_list *el; |
2932 | struct buffer_head *bh = NULL; | 2933 | struct buffer_head *bh = NULL; |
@@ -2939,20 +2940,48 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode, | |||
2939 | 2940 | ||
2940 | /* trunc to zero special case - this makes tree_depth = 0 | 2941 | /* trunc to zero special case - this makes tree_depth = 0 |
2941 | * regardless of what it is. */ | 2942 | * regardless of what it is. */ |
2942 | if (!new_i_clusters) | 2943 | if (OCFS2_I(inode)->ip_clusters == clusters_to_del) |
2943 | goto out; | 2944 | goto out; |
2944 | 2945 | ||
2945 | el = path_leaf_el(path); | 2946 | el = path_leaf_el(path); |
2946 | BUG_ON(!el->l_next_free_rec); | 2947 | BUG_ON(!el->l_next_free_rec); |
2947 | 2948 | ||
2948 | /* Make sure that this guy will actually be empty after we | 2949 | /* |
2949 | * clear away the data. */ | 2950 | * Make sure that this extent list will actually be empty |
2951 | * after we clear away the data. We can shortcut out if | ||
2952 | * there's more than one non-empty extent in the | ||
2953 | * list. Otherwise, a check of the remaining extent is | ||
2954 | * necessary. | ||
2955 | */ | ||
2956 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
2957 | rec = NULL; | ||
2950 | if (ocfs2_is_empty_extent(&el->l_recs[0])) { | 2958 | if (ocfs2_is_empty_extent(&el->l_recs[0])) { |
2951 | if (le16_to_cpu(el->l_next_free_rec) > 1 && | 2959 | if (next_free > 2) |
2952 | le32_to_cpu(el->l_recs[1].e_cpos) < new_i_clusters) | ||
2953 | goto out; | 2960 | goto out; |
2954 | } else if (le32_to_cpu(el->l_recs[0].e_cpos) < new_i_clusters) | 2961 | |
2955 | goto out; | 2962 | /* We may have a valid extent in index 1, check it. */ |
2963 | if (next_free == 2) | ||
2964 | rec = &el->l_recs[1]; | ||
2965 | |||
2966 | /* | ||
2967 | * Fall through - no more nonempty extents, so we want | ||
2968 | * to delete this leaf. | ||
2969 | */ | ||
2970 | } else { | ||
2971 | if (next_free > 1) | ||
2972 | goto out; | ||
2973 | |||
2974 | rec = &el->l_recs[0]; | ||
2975 | } | ||
2976 | |||
2977 | if (rec) { | ||
2978 | /* | ||
2979 | * Check it we'll only be trimming off the end of this | ||
2980 | * cluster. | ||
2981 | */ | ||
2982 | if (le16_to_cpu(rec->e_clusters) > clusters_to_del) | ||
2983 | goto out; | ||
2984 | } | ||
2956 | 2985 | ||
2957 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); | 2986 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); |
2958 | if (ret) { | 2987 | if (ret) { |
@@ -2984,6 +3013,223 @@ out: | |||
2984 | return ret; | 3013 | return ret; |
2985 | } | 3014 | } |
2986 | 3015 | ||
3016 | /* | ||
3017 | * Trim some clusters off the rightmost edge of a tree. Only called | ||
3018 | * during truncate. | ||
3019 | * | ||
3020 | * The caller needs to: | ||
3021 | * - start journaling of each path component. | ||
3022 | * - compute and fully set up any new last ext block | ||
3023 | */ | ||
3024 | static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, | ||
3025 | handle_t *handle, struct ocfs2_truncate_context *tc, | ||
3026 | u32 clusters_to_del, u64 *delete_start) | ||
3027 | { | ||
3028 | int ret, i, index = path->p_tree_depth; | ||
3029 | u32 new_edge = 0; | ||
3030 | u64 deleted_eb = 0; | ||
3031 | struct buffer_head *bh; | ||
3032 | struct ocfs2_extent_list *el; | ||
3033 | struct ocfs2_extent_rec *rec; | ||
3034 | |||
3035 | *delete_start = 0; | ||
3036 | |||
3037 | while (index >= 0) { | ||
3038 | bh = path->p_node[index].bh; | ||
3039 | el = path->p_node[index].el; | ||
3040 | |||
3041 | mlog(0, "traveling tree (index = %d, block = %llu)\n", | ||
3042 | index, (unsigned long long)bh->b_blocknr); | ||
3043 | |||
3044 | BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); | ||
3045 | |||
3046 | if (index != | ||
3047 | (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) { | ||
3048 | ocfs2_error(inode->i_sb, | ||
3049 | "Inode %lu has invalid ext. block %llu", | ||
3050 | inode->i_ino, | ||
3051 | (unsigned long long)bh->b_blocknr); | ||
3052 | ret = -EROFS; | ||
3053 | goto out; | ||
3054 | } | ||
3055 | |||
3056 | find_tail_record: | ||
3057 | i = le16_to_cpu(el->l_next_free_rec) - 1; | ||
3058 | rec = &el->l_recs[i]; | ||
3059 | |||
3060 | mlog(0, "Extent list before: record %d: (%u, %u, %llu), " | ||
3061 | "next = %u\n", i, le32_to_cpu(rec->e_cpos), | ||
3062 | le32_to_cpu(rec->e_clusters), | ||
3063 | (unsigned long long)le64_to_cpu(rec->e_blkno), | ||
3064 | le16_to_cpu(el->l_next_free_rec)); | ||
3065 | |||
3066 | BUG_ON(le32_to_cpu(rec->e_clusters) < clusters_to_del); | ||
3067 | |||
3068 | if (le16_to_cpu(el->l_tree_depth) == 0) { | ||
3069 | /* | ||
3070 | * If the leaf block contains a single empty | ||
3071 | * extent and no records, we can just remove | ||
3072 | * the block. | ||
3073 | */ | ||
3074 | if (i == 0 && ocfs2_is_empty_extent(rec)) { | ||
3075 | memset(rec, 0, | ||
3076 | sizeof(struct ocfs2_extent_rec)); | ||
3077 | el->l_next_free_rec = cpu_to_le16(0); | ||
3078 | |||
3079 | goto delete; | ||
3080 | } | ||
3081 | |||
3082 | /* | ||
3083 | * Remove any empty extents by shifting things | ||
3084 | * left. That should make life much easier on | ||
3085 | * the code below. This condition is rare | ||
3086 | * enough that we shouldn't see a performance | ||
3087 | * hit. | ||
3088 | */ | ||
3089 | if (ocfs2_is_empty_extent(&el->l_recs[0])) { | ||
3090 | le16_add_cpu(&el->l_next_free_rec, -1); | ||
3091 | |||
3092 | for(i = 0; | ||
3093 | i < le16_to_cpu(el->l_next_free_rec); i++) | ||
3094 | el->l_recs[i] = el->l_recs[i + 1]; | ||
3095 | |||
3096 | memset(&el->l_recs[i], 0, | ||
3097 | sizeof(struct ocfs2_extent_rec)); | ||
3098 | |||
3099 | /* | ||
3100 | * We've modified our extent list. The | ||
3101 | * simplest way to handle this change | ||
3102 | * is to being the search from the | ||
3103 | * start again. | ||
3104 | */ | ||
3105 | goto find_tail_record; | ||
3106 | } | ||
3107 | |||
3108 | le32_add_cpu(&rec->e_clusters, -clusters_to_del); | ||
3109 | |||
3110 | /* | ||
3111 | * We'll use "new_edge" on our way back up the | ||
3112 | * tree to know what our rightmost cpos is. | ||
3113 | */ | ||
3114 | new_edge = le32_to_cpu(rec->e_clusters); | ||
3115 | new_edge += le32_to_cpu(rec->e_cpos); | ||
3116 | |||
3117 | /* | ||
3118 | * The caller will use this to delete data blocks. | ||
3119 | */ | ||
3120 | *delete_start = le64_to_cpu(rec->e_blkno) | ||
3121 | + ocfs2_clusters_to_blocks(inode->i_sb, | ||
3122 | le32_to_cpu(rec->e_clusters)); | ||
3123 | |||
3124 | /* | ||
3125 | * If it's now empty, remove this record. | ||
3126 | */ | ||
3127 | if (le32_to_cpu(rec->e_clusters) == 0) { | ||
3128 | memset(rec, 0, | ||
3129 | sizeof(struct ocfs2_extent_rec)); | ||
3130 | le16_add_cpu(&el->l_next_free_rec, -1); | ||
3131 | } | ||
3132 | } else { | ||
3133 | if (le64_to_cpu(rec->e_blkno) == deleted_eb) { | ||
3134 | memset(rec, 0, | ||
3135 | sizeof(struct ocfs2_extent_rec)); | ||
3136 | le16_add_cpu(&el->l_next_free_rec, -1); | ||
3137 | |||
3138 | goto delete; | ||
3139 | } | ||
3140 | |||
3141 | /* Can this actually happen? */ | ||
3142 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
3143 | goto delete; | ||
3144 | |||
3145 | /* | ||
3146 | * We never actually deleted any clusters | ||
3147 | * because our leaf was empty. There's no | ||
3148 | * reason to adjust the rightmost edge then. | ||
3149 | */ | ||
3150 | if (new_edge == 0) | ||
3151 | goto delete; | ||
3152 | |||
3153 | rec->e_clusters = cpu_to_le32(new_edge); | ||
3154 | le32_add_cpu(&rec->e_clusters, | ||
3155 | -le32_to_cpu(rec->e_cpos)); | ||
3156 | |||
3157 | /* | ||
3158 | * A deleted child record should have been | ||
3159 | * caught above. | ||
3160 | */ | ||
3161 | BUG_ON(le32_to_cpu(rec->e_clusters) == 0); | ||
3162 | } | ||
3163 | |||
3164 | delete: | ||
3165 | ret = ocfs2_journal_dirty(handle, bh); | ||
3166 | if (ret) { | ||
3167 | mlog_errno(ret); | ||
3168 | goto out; | ||
3169 | } | ||
3170 | |||
3171 | mlog(0, "extent list container %llu, after: record %d: " | ||
3172 | "(%u, %u, %llu), next = %u.\n", | ||
3173 | (unsigned long long)bh->b_blocknr, i, | ||
3174 | le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters), | ||
3175 | (unsigned long long)le64_to_cpu(rec->e_blkno), | ||
3176 | le16_to_cpu(el->l_next_free_rec)); | ||
3177 | |||
3178 | /* | ||
3179 | * We must be careful to only attempt delete of an | ||
3180 | * extent block (and not the root inode block). | ||
3181 | */ | ||
3182 | if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) { | ||
3183 | struct ocfs2_extent_block *eb = | ||
3184 | (struct ocfs2_extent_block *)bh->b_data; | ||
3185 | |||
3186 | /* | ||
3187 | * Save this for use when processing the | ||
3188 | * parent block. | ||
3189 | */ | ||
3190 | deleted_eb = le64_to_cpu(eb->h_blkno); | ||
3191 | |||
3192 | mlog(0, "deleting this extent block.\n"); | ||
3193 | |||
3194 | ocfs2_remove_from_cache(inode, bh); | ||
3195 | |||
3196 | BUG_ON(le32_to_cpu(el->l_recs[0].e_clusters)); | ||
3197 | BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); | ||
3198 | BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); | ||
3199 | |||
3200 | if (le16_to_cpu(eb->h_suballoc_slot) == 0) { | ||
3201 | /* | ||
3202 | * This code only understands how to | ||
3203 | * lock the suballocator in slot 0, | ||
3204 | * which is fine because allocation is | ||
3205 | * only ever done out of that | ||
3206 | * suballocator too. A future version | ||
3207 | * might change that however, so avoid | ||
3208 | * a free if we don't know how to | ||
3209 | * handle it. This way an fs incompat | ||
3210 | * bit will not be necessary. | ||
3211 | */ | ||
3212 | ret = ocfs2_free_extent_block(handle, | ||
3213 | tc->tc_ext_alloc_inode, | ||
3214 | tc->tc_ext_alloc_bh, | ||
3215 | eb); | ||
3216 | |||
3217 | /* An error here is not fatal. */ | ||
3218 | if (ret < 0) | ||
3219 | mlog_errno(ret); | ||
3220 | } | ||
3221 | } else { | ||
3222 | deleted_eb = 0; | ||
3223 | } | ||
3224 | |||
3225 | index--; | ||
3226 | } | ||
3227 | |||
3228 | ret = 0; | ||
3229 | out: | ||
3230 | return ret; | ||
3231 | } | ||
3232 | |||
2987 | static int ocfs2_do_truncate(struct ocfs2_super *osb, | 3233 | static int ocfs2_do_truncate(struct ocfs2_super *osb, |
2988 | unsigned int clusters_to_del, | 3234 | unsigned int clusters_to_del, |
2989 | struct inode *inode, | 3235 | struct inode *inode, |
@@ -2992,20 +3238,16 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
2992 | struct ocfs2_truncate_context *tc, | 3238 | struct ocfs2_truncate_context *tc, |
2993 | struct ocfs2_path *path) | 3239 | struct ocfs2_path *path) |
2994 | { | 3240 | { |
2995 | int status, i, index; | 3241 | int status; |
2996 | struct ocfs2_dinode *fe; | 3242 | struct ocfs2_dinode *fe; |
2997 | struct ocfs2_extent_block *eb; | ||
2998 | struct ocfs2_extent_block *last_eb = NULL; | 3243 | struct ocfs2_extent_block *last_eb = NULL; |
2999 | struct ocfs2_extent_list *el; | 3244 | struct ocfs2_extent_list *el; |
3000 | struct buffer_head *eb_bh = NULL; | ||
3001 | struct buffer_head *last_eb_bh = NULL; | 3245 | struct buffer_head *last_eb_bh = NULL; |
3002 | u64 delete_blk = 0; | 3246 | u64 delete_blk = 0; |
3003 | 3247 | ||
3004 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 3248 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
3005 | 3249 | ||
3006 | status = ocfs2_find_new_last_ext_blk(inode, | 3250 | status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del, |
3007 | le32_to_cpu(fe->i_clusters) - | ||
3008 | clusters_to_del, | ||
3009 | path, &last_eb_bh); | 3251 | path, &last_eb_bh); |
3010 | if (status < 0) { | 3252 | if (status < 0) { |
3011 | mlog_errno(status); | 3253 | mlog_errno(status); |
@@ -3016,14 +3258,10 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
3016 | * Each component will be touched, so we might as well journal | 3258 | * Each component will be touched, so we might as well journal |
3017 | * here to avoid having to handle errors later. | 3259 | * here to avoid having to handle errors later. |
3018 | */ | 3260 | */ |
3019 | for (i = 0; i < path_num_items(path); i++) { | 3261 | status = ocfs2_journal_access_path(inode, handle, path); |
3020 | status = ocfs2_journal_access(handle, inode, | 3262 | if (status < 0) { |
3021 | path->p_node[i].bh, | 3263 | mlog_errno(status); |
3022 | OCFS2_JOURNAL_ACCESS_WRITE); | 3264 | goto bail; |
3023 | if (status < 0) { | ||
3024 | mlog_errno(status); | ||
3025 | goto bail; | ||
3026 | } | ||
3027 | } | 3265 | } |
3028 | 3266 | ||
3029 | if (last_eb_bh) { | 3267 | if (last_eb_bh) { |
@@ -3047,6 +3285,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
3047 | ocfs2_error(inode->i_sb, | 3285 | ocfs2_error(inode->i_sb, |
3048 | "Inode %lu has an empty extent record, depth %u\n", | 3286 | "Inode %lu has an empty extent record, depth %u\n", |
3049 | inode->i_ino, le16_to_cpu(el->l_tree_depth)); | 3287 | inode->i_ino, le16_to_cpu(el->l_tree_depth)); |
3288 | status = -EROFS; | ||
3050 | goto bail; | 3289 | goto bail; |
3051 | } | 3290 | } |
3052 | 3291 | ||
@@ -3056,38 +3295,11 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
3056 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 3295 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
3057 | le32_add_cpu(&fe->i_clusters, -clusters_to_del); | 3296 | le32_add_cpu(&fe->i_clusters, -clusters_to_del); |
3058 | 3297 | ||
3059 | i = le16_to_cpu(el->l_next_free_rec) - 1; | 3298 | status = ocfs2_trim_tree(inode, path, handle, tc, |
3060 | 3299 | clusters_to_del, &delete_blk); | |
3061 | BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del); | 3300 | if (status) { |
3062 | le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del); | 3301 | mlog_errno(status); |
3063 | /* tree depth zero, we can just delete the clusters, otherwise | 3302 | goto bail; |
3064 | * we need to record the offset of the next level extent block | ||
3065 | * as we may overwrite it. */ | ||
3066 | if (!el->l_tree_depth) { | ||
3067 | delete_blk = le64_to_cpu(el->l_recs[i].e_blkno) | ||
3068 | + ocfs2_clusters_to_blocks(osb->sb, | ||
3069 | le32_to_cpu(el->l_recs[i].e_clusters)); | ||
3070 | |||
3071 | if (!el->l_recs[i].e_clusters) { | ||
3072 | /* if we deleted the whole extent record, then clear | ||
3073 | * out the other fields and update the extent | ||
3074 | * list. | ||
3075 | */ | ||
3076 | el->l_recs[i].e_cpos = 0; | ||
3077 | el->l_recs[i].e_blkno = 0; | ||
3078 | BUG_ON(!el->l_next_free_rec); | ||
3079 | le16_add_cpu(&el->l_next_free_rec, -1); | ||
3080 | |||
3081 | /* | ||
3082 | * The leftmost record might be an empty extent - | ||
3083 | * delete it here too. | ||
3084 | */ | ||
3085 | if (i == 1 && ocfs2_is_empty_extent(&el->l_recs[0])) { | ||
3086 | el->l_recs[0].e_cpos = 0; | ||
3087 | el->l_recs[0].e_blkno = 0; | ||
3088 | el->l_next_free_rec = 0; | ||
3089 | } | ||
3090 | } | ||
3091 | } | 3303 | } |
3092 | 3304 | ||
3093 | if (le32_to_cpu(fe->i_clusters) == 0) { | 3305 | if (le32_to_cpu(fe->i_clusters) == 0) { |
@@ -3115,125 +3327,13 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
3115 | } | 3327 | } |
3116 | } | 3328 | } |
3117 | 3329 | ||
3118 | index = 1; | 3330 | if (delete_blk) { |
3119 | /* if our tree depth > 0, update all the tree blocks below us. */ | 3331 | status = ocfs2_truncate_log_append(osb, handle, delete_blk, |
3120 | while (index <= path->p_tree_depth) { | 3332 | clusters_to_del); |
3121 | eb_bh = path->p_node[index].bh; | ||
3122 | eb = (struct ocfs2_extent_block *)eb_bh->b_data; | ||
3123 | el = path->p_node[index].el; | ||
3124 | |||
3125 | mlog(0, "traveling tree (index = %d, extent block: %llu)\n", | ||
3126 | index, (unsigned long long)eb_bh->b_blocknr); | ||
3127 | |||
3128 | BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); | ||
3129 | if (index != | ||
3130 | (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) { | ||
3131 | ocfs2_error(inode->i_sb, | ||
3132 | "Inode %lu has invalid ext. block %llu\n", | ||
3133 | inode->i_ino, | ||
3134 | (unsigned long long)eb_bh->b_blocknr); | ||
3135 | goto bail; | ||
3136 | } | ||
3137 | |||
3138 | i = le16_to_cpu(el->l_next_free_rec) - 1; | ||
3139 | |||
3140 | mlog(0, "extent block %llu, before: record %d: " | ||
3141 | "(%u, %u, %llu), next = %u\n", | ||
3142 | (unsigned long long)le64_to_cpu(eb->h_blkno), i, | ||
3143 | le32_to_cpu(el->l_recs[i].e_cpos), | ||
3144 | le32_to_cpu(el->l_recs[i].e_clusters), | ||
3145 | (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno), | ||
3146 | le16_to_cpu(el->l_next_free_rec)); | ||
3147 | |||
3148 | BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del); | ||
3149 | le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del); | ||
3150 | |||
3151 | /* bottom-most block requires us to delete data.*/ | ||
3152 | if (!el->l_tree_depth) | ||
3153 | delete_blk = le64_to_cpu(el->l_recs[i].e_blkno) | ||
3154 | + ocfs2_clusters_to_blocks(osb->sb, | ||
3155 | le32_to_cpu(el->l_recs[i].e_clusters)); | ||
3156 | if (!el->l_recs[i].e_clusters) { | ||
3157 | el->l_recs[i].e_cpos = 0; | ||
3158 | el->l_recs[i].e_blkno = 0; | ||
3159 | BUG_ON(!el->l_next_free_rec); | ||
3160 | le16_add_cpu(&el->l_next_free_rec, -1); | ||
3161 | } | ||
3162 | if (i == 1 && ocfs2_is_empty_extent(&el->l_recs[0])) { | ||
3163 | el->l_recs[0].e_cpos = 0; | ||
3164 | el->l_recs[0].e_blkno = 0; | ||
3165 | el->l_next_free_rec = 0; | ||
3166 | } | ||
3167 | |||
3168 | mlog(0, "extent block %llu, after: record %d: " | ||
3169 | "(%u, %u, %llu), next = %u\n", | ||
3170 | (unsigned long long)le64_to_cpu(eb->h_blkno), i, | ||
3171 | le32_to_cpu(el->l_recs[i].e_cpos), | ||
3172 | le32_to_cpu(el->l_recs[i].e_clusters), | ||
3173 | (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno), | ||
3174 | le16_to_cpu(el->l_next_free_rec)); | ||
3175 | |||
3176 | status = ocfs2_journal_dirty(handle, eb_bh); | ||
3177 | if (status < 0) { | 3333 | if (status < 0) { |
3178 | mlog_errno(status); | 3334 | mlog_errno(status); |
3179 | goto bail; | 3335 | goto bail; |
3180 | } | 3336 | } |
3181 | |||
3182 | if (!el->l_next_free_rec) { | ||
3183 | mlog(0, "deleting this extent block.\n"); | ||
3184 | |||
3185 | ocfs2_remove_from_cache(inode, eb_bh); | ||
3186 | |||
3187 | BUG_ON(el->l_recs[0].e_clusters); | ||
3188 | BUG_ON(el->l_recs[0].e_cpos); | ||
3189 | BUG_ON(el->l_recs[0].e_blkno); | ||
3190 | |||
3191 | /* | ||
3192 | * We need to remove this extent block from | ||
3193 | * the list above it. | ||
3194 | * | ||
3195 | * Since we've passed it already in this loop, | ||
3196 | * no need to worry about journaling. | ||
3197 | */ | ||
3198 | el = path->p_node[index - 1].el; | ||
3199 | i = le16_to_cpu(el->l_next_free_rec) - 1; | ||
3200 | BUG_ON(i < 0); | ||
3201 | el->l_recs[i].e_cpos = 0; | ||
3202 | el->l_recs[i].e_clusters = 0; | ||
3203 | el->l_recs[i].e_blkno = 0; | ||
3204 | le16_add_cpu(&el->l_next_free_rec, -1); | ||
3205 | |||
3206 | if (eb->h_suballoc_slot == 0) { | ||
3207 | /* | ||
3208 | * This code only understands how to | ||
3209 | * lock the suballocator in slot 0, | ||
3210 | * which is fine because allocation is | ||
3211 | * only ever done out of that | ||
3212 | * suballocator too. A future version | ||
3213 | * might change that however, so avoid | ||
3214 | * a free if we don't know how to | ||
3215 | * handle it. This way an fs incompat | ||
3216 | * bit will not be necessary. | ||
3217 | */ | ||
3218 | status = ocfs2_free_extent_block(handle, | ||
3219 | tc->tc_ext_alloc_inode, | ||
3220 | tc->tc_ext_alloc_bh, | ||
3221 | eb); | ||
3222 | if (status < 0) { | ||
3223 | mlog_errno(status); | ||
3224 | goto bail; | ||
3225 | } | ||
3226 | } | ||
3227 | } | ||
3228 | index++; | ||
3229 | } | ||
3230 | |||
3231 | BUG_ON(!delete_blk); | ||
3232 | status = ocfs2_truncate_log_append(osb, handle, delete_blk, | ||
3233 | clusters_to_del); | ||
3234 | if (status < 0) { | ||
3235 | mlog_errno(status); | ||
3236 | goto bail; | ||
3237 | } | 3337 | } |
3238 | status = 0; | 3338 | status = 0; |
3239 | bail: | 3339 | bail: |
@@ -3275,6 +3375,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
3275 | } | 3375 | } |
3276 | start: | 3376 | start: |
3277 | /* | 3377 | /* |
3378 | * Check that we still have allocation to delete. | ||
3379 | */ | ||
3380 | if (OCFS2_I(inode)->ip_clusters == 0) { | ||
3381 | status = 0; | ||
3382 | goto bail; | ||
3383 | } | ||
3384 | |||
3385 | /* | ||
3278 | * Truncate always works against the rightmost tree branch. | 3386 | * Truncate always works against the rightmost tree branch. |
3279 | */ | 3387 | */ |
3280 | status = ocfs2_find_path(inode, path, UINT_MAX); | 3388 | status = ocfs2_find_path(inode, path, UINT_MAX); |
@@ -3298,6 +3406,15 @@ start: | |||
3298 | * - no record needs to be removed (truncate has completed) | 3406 | * - no record needs to be removed (truncate has completed) |
3299 | */ | 3407 | */ |
3300 | el = path_leaf_el(path); | 3408 | el = path_leaf_el(path); |
3409 | if (le16_to_cpu(el->l_next_free_rec) == 0) { | ||
3410 | ocfs2_error(inode->i_sb, | ||
3411 | "Inode %llu has empty extent block at %llu\n", | ||
3412 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
3413 | (unsigned long long)path_leaf_bh(path)->b_blocknr); | ||
3414 | status = -EROFS; | ||
3415 | goto bail; | ||
3416 | } | ||
3417 | |||
3301 | i = le16_to_cpu(el->l_next_free_rec) - 1; | 3418 | i = le16_to_cpu(el->l_next_free_rec) - 1; |
3302 | range = le32_to_cpu(el->l_recs[i].e_cpos) + | 3419 | range = le32_to_cpu(el->l_recs[i].e_cpos) + |
3303 | le32_to_cpu(el->l_recs[i].e_clusters); | 3420 | le32_to_cpu(el->l_recs[i].e_clusters); |
@@ -3359,10 +3476,11 @@ start: | |||
3359 | ocfs2_reinit_path(path, 1); | 3476 | ocfs2_reinit_path(path, 1); |
3360 | 3477 | ||
3361 | /* | 3478 | /* |
3362 | * Only loop if we still have allocation. | 3479 | * The check above will catch the case where we've truncated |
3480 | * away all allocation. | ||
3363 | */ | 3481 | */ |
3364 | if (OCFS2_I(inode)->ip_clusters) | 3482 | goto start; |
3365 | goto start; | 3483 | |
3366 | bail: | 3484 | bail: |
3367 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 3485 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
3368 | 3486 | ||
@@ -3414,22 +3532,6 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, | |||
3414 | "%llu\n", fe->i_clusters, new_i_clusters, | 3532 | "%llu\n", fe->i_clusters, new_i_clusters, |
3415 | (unsigned long long)fe->i_size); | 3533 | (unsigned long long)fe->i_size); |
3416 | 3534 | ||
3417 | if (!ocfs2_sparse_alloc(osb) && | ||
3418 | le32_to_cpu(fe->i_clusters) <= new_i_clusters) { | ||
3419 | ocfs2_error(inode->i_sb, "Dinode %llu has cluster count " | ||
3420 | "%u and size %llu whereas struct inode has " | ||
3421 | "cluster count %u and size %llu which caused an " | ||
3422 | "invalid truncate to %u clusters.", | ||
3423 | (unsigned long long)le64_to_cpu(fe->i_blkno), | ||
3424 | le32_to_cpu(fe->i_clusters), | ||
3425 | (unsigned long long)le64_to_cpu(fe->i_size), | ||
3426 | OCFS2_I(inode)->ip_clusters, i_size_read(inode), | ||
3427 | new_i_clusters); | ||
3428 | mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres); | ||
3429 | status = -EIO; | ||
3430 | goto bail; | ||
3431 | } | ||
3432 | |||
3433 | *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL); | 3535 | *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL); |
3434 | if (!(*tc)) { | 3536 | if (!(*tc)) { |
3435 | status = -ENOMEM; | 3537 | status = -ENOMEM; |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8c97fa1c45f6..edc0b617f409 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -344,18 +344,6 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
344 | } | 344 | } |
345 | ocfs2_data_unlock(inode, 1); | 345 | ocfs2_data_unlock(inode, 1); |
346 | 346 | ||
347 | if (le32_to_cpu(fe->i_clusters) == | ||
348 | ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { | ||
349 | mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", | ||
350 | fe->i_clusters); | ||
351 | /* No allocation change is required, so lets fast path | ||
352 | * this truncate. */ | ||
353 | status = ocfs2_simple_size_update(inode, di_bh, new_i_size); | ||
354 | if (status < 0) | ||
355 | mlog_errno(status); | ||
356 | goto bail; | ||
357 | } | ||
358 | |||
359 | /* alright, we're going to need to do a full blown alloc size | 347 | /* alright, we're going to need to do a full blown alloc size |
360 | * change. Orphan the inode so that recovery can complete the | 348 | * change. Orphan the inode so that recovery can complete the |
361 | * truncate if necessary. This does the task of marking | 349 | * truncate if necessary. This does the task of marking |
@@ -785,7 +773,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
785 | size_t tail_to_skip) | 773 | size_t tail_to_skip) |
786 | { | 774 | { |
787 | int ret = 0; | 775 | int ret = 0; |
788 | u32 clusters_to_add; | 776 | u32 clusters_to_add = 0; |
789 | 777 | ||
790 | BUG_ON(!tail_to_skip && !di_bh); | 778 | BUG_ON(!tail_to_skip && !di_bh); |
791 | 779 | ||
@@ -797,6 +785,11 @@ static int ocfs2_extend_file(struct inode *inode, | |||
797 | goto out; | 785 | goto out; |
798 | BUG_ON(new_i_size < i_size_read(inode)); | 786 | BUG_ON(new_i_size < i_size_read(inode)); |
799 | 787 | ||
788 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { | ||
789 | BUG_ON(tail_to_skip != 0); | ||
790 | goto out_update_size; | ||
791 | } | ||
792 | |||
800 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - | 793 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - |
801 | OCFS2_I(inode)->ip_clusters; | 794 | OCFS2_I(inode)->ip_clusters; |
802 | 795 | ||
@@ -832,6 +825,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
832 | goto out_unlock; | 825 | goto out_unlock; |
833 | } | 826 | } |
834 | 827 | ||
828 | out_update_size: | ||
835 | if (!tail_to_skip) { | 829 | if (!tail_to_skip) { |
836 | /* We're being called from ocfs2_setattr() which wants | 830 | /* We're being called from ocfs2_setattr() which wants |
837 | * us to update i_size */ | 831 | * us to update i_size */ |
@@ -841,7 +835,8 @@ static int ocfs2_extend_file(struct inode *inode, | |||
841 | } | 835 | } |
842 | 836 | ||
843 | out_unlock: | 837 | out_unlock: |
844 | ocfs2_data_unlock(inode, 1); | 838 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) |
839 | ocfs2_data_unlock(inode, 1); | ||
845 | 840 | ||
846 | out: | 841 | out: |
847 | return ret; | 842 | return ret; |
@@ -1097,6 +1092,14 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1097 | } else { | 1092 | } else { |
1098 | saved_pos = *ppos; | 1093 | saved_pos = *ppos; |
1099 | } | 1094 | } |
1095 | |||
1096 | /* | ||
1097 | * The rest of this loop is concerned with legacy file | ||
1098 | * systems which don't support sparse files. | ||
1099 | */ | ||
1100 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | ||
1101 | break; | ||
1102 | |||
1100 | newsize = count + saved_pos; | 1103 | newsize = count + saved_pos; |
1101 | 1104 | ||
1102 | mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", | 1105 | mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 5ff8549eb1a3..0bd86a137591 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -487,7 +487,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, | |||
487 | struct buffer_head *fe_bh) | 487 | struct buffer_head *fe_bh) |
488 | { | 488 | { |
489 | int status = 0; | 489 | int status = 0; |
490 | handle_t *handle = NULL; | ||
491 | struct ocfs2_truncate_context *tc = NULL; | 490 | struct ocfs2_truncate_context *tc = NULL; |
492 | struct ocfs2_dinode *fe; | 491 | struct ocfs2_dinode *fe; |
493 | 492 | ||
@@ -495,41 +494,20 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, | |||
495 | 494 | ||
496 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 495 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
497 | 496 | ||
498 | /* zero allocation, zero truncate :) */ | 497 | if (fe->i_clusters) { |
499 | if (!fe->i_clusters) | 498 | status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); |
500 | goto bail; | 499 | if (status < 0) { |
501 | 500 | mlog_errno(status); | |
502 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 501 | goto out; |
503 | if (IS_ERR(handle)) { | 502 | } |
504 | status = PTR_ERR(handle); | ||
505 | handle = NULL; | ||
506 | mlog_errno(status); | ||
507 | goto bail; | ||
508 | } | ||
509 | |||
510 | status = ocfs2_set_inode_size(handle, inode, fe_bh, 0ULL); | ||
511 | if (status < 0) { | ||
512 | mlog_errno(status); | ||
513 | goto bail; | ||
514 | } | ||
515 | |||
516 | ocfs2_commit_trans(osb, handle); | ||
517 | handle = NULL; | ||
518 | |||
519 | status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); | ||
520 | if (status < 0) { | ||
521 | mlog_errno(status); | ||
522 | goto bail; | ||
523 | } | ||
524 | 503 | ||
525 | status = ocfs2_commit_truncate(osb, inode, fe_bh, tc); | 504 | status = ocfs2_commit_truncate(osb, inode, fe_bh, tc); |
526 | if (status < 0) { | 505 | if (status < 0) { |
527 | mlog_errno(status); | 506 | mlog_errno(status); |
528 | goto bail; | 507 | goto out; |
508 | } | ||
529 | } | 509 | } |
530 | bail: | 510 | out: |
531 | if (handle) | ||
532 | ocfs2_commit_trans(osb, handle); | ||
533 | 511 | ||
534 | mlog_exit(status); | 512 | mlog_exit(status); |
535 | return status; | 513 | return status; |