aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorTao Ma <tao.ma@oracle.com>2008-10-26 18:06:24 -0400
committerMark Fasheh <mfasheh@suse.com>2008-11-10 12:51:47 -0500
commit80bcaf3469b8aefd316d4ceb27d9af7cfbb0b913 (patch)
tree250e38e24f88aeb5bd89b5f4fca3304207eec0f1 /fs/ocfs2
parent4c1bbf1ba631d7db61ce3462349a3f5d14ae3009 (diff)
ocfs2/xattr: Proper hash collision handle in bucket division
In ocfs2/xattr, we must make sure the xattrs which have the same hash value exist in the same bucket so that the search schema can work. But in the old implementation, when we want to extend a bucket, we just move half number of xattrs to the new bucket. This works in most cases, but if we are lucky enough we will move 2 xattrs into 2 different buckets. This means that an xattr from the previous bucket cannot be found anymore. This patch fix this problem by finding the right position during extending the bucket and extend an empty bucket if needed. Signed-off-by: Tao Ma <tao.ma@oracle.com> Cc: Joel Becker <joel.becker@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/xattr.c144
1 files changed, 115 insertions, 29 deletions
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index a371c01942b1..f3ea7efb48c6 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3110,25 +3110,73 @@ static int ocfs2_read_xattr_bucket(struct inode *inode,
3110} 3110}
3111 3111
3112/* 3112/*
3113 * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk). 3113 * Find the suitable pos when we divide a bucket into 2.
3114 * We have to make sure the xattrs with the same hash value exist
3115 * in the same bucket.
3116 *
3117 * If this ocfs2_xattr_header covers more than one hash value, find a
3118 * place where the hash value changes. Try to find the most even split.
3119 * The most common case is that all entries have different hash values,
3120 * and the first check we make will find a place to split.
3121 */
3122static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3123{
3124 struct ocfs2_xattr_entry *entries = xh->xh_entries;
3125 int count = le16_to_cpu(xh->xh_count);
3126 int delta, middle = count / 2;
3127
3128 /*
3129 * We start at the middle. Each step gets farther away in both
3130 * directions. We therefore hit the change in hash value
3131 * nearest to the middle. Note that this loop does not execute for
3132 * count < 2.
3133 */
3134 for (delta = 0; delta < middle; delta++) {
3135 /* Let's check delta earlier than middle */
3136 if (cmp_xe(&entries[middle - delta - 1],
3137 &entries[middle - delta]))
3138 return middle - delta;
3139
3140 /* For even counts, don't walk off the end */
3141 if ((middle + delta + 1) == count)
3142 continue;
3143
3144 /* Now try delta past middle */
3145 if (cmp_xe(&entries[middle + delta],
3146 &entries[middle + delta + 1]))
3147 return middle + delta + 1;
3148 }
3149
3150 /* Every entry had the same hash */
3151 return count;
3152}
3153
3154/*
3155 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3114 * first_hash will record the 1st hash of the new bucket. 3156 * first_hash will record the 1st hash of the new bucket.
3157 *
3158 * Normally half of the xattrs will be moved. But we have to make
3159 * sure that the xattrs with the same hash value are stored in the
3160 * same bucket. If all the xattrs in this bucket have the same hash
3161 * value, the new bucket will be initialized as an empty one and the
3162 * first_hash will be initialized as (hash_value+1).
3115 */ 3163 */
3116static int ocfs2_half_xattr_bucket(struct inode *inode, 3164static int ocfs2_divide_xattr_bucket(struct inode *inode,
3117 handle_t *handle, 3165 handle_t *handle,
3118 u64 blk, 3166 u64 blk,
3119 u64 new_blk, 3167 u64 new_blk,
3120 u32 *first_hash, 3168 u32 *first_hash,
3121 int new_bucket_head) 3169 int new_bucket_head)
3122{ 3170{
3123 int ret, i; 3171 int ret, i;
3124 u16 count, start, len, name_value_len, xe_len, name_offset; 3172 int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3125 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3173 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3126 struct buffer_head **s_bhs, **t_bhs = NULL; 3174 struct buffer_head **s_bhs, **t_bhs = NULL;
3127 struct ocfs2_xattr_header *xh; 3175 struct ocfs2_xattr_header *xh;
3128 struct ocfs2_xattr_entry *xe; 3176 struct ocfs2_xattr_entry *xe;
3129 int blocksize = inode->i_sb->s_blocksize; 3177 int blocksize = inode->i_sb->s_blocksize;
3130 3178
3131 mlog(0, "move half of xattrs from bucket %llu to %llu\n", 3179 mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3132 blk, new_blk); 3180 blk, new_blk);
3133 3181
3134 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); 3182 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
@@ -3171,14 +3219,35 @@ static int ocfs2_half_xattr_bucket(struct inode *inode,
3171 } 3219 }
3172 } 3220 }
3173 3221
3222 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3223 count = le16_to_cpu(xh->xh_count);
3224 start = ocfs2_xattr_find_divide_pos(xh);
3225
3226 if (start == count) {
3227 xe = &xh->xh_entries[start-1];
3228
3229 /*
3230 * initialized a new empty bucket here.
3231 * The hash value is set as one larger than
3232 * that of the last entry in the previous bucket.
3233 */
3234 for (i = 0; i < blk_per_bucket; i++)
3235 memset(t_bhs[i]->b_data, 0, blocksize);
3236
3237 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3238 xh->xh_free_start = cpu_to_le16(blocksize);
3239 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3240 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3241
3242 goto set_num_buckets;
3243 }
3244
3174 /* copy the whole bucket to the new first. */ 3245 /* copy the whole bucket to the new first. */
3175 for (i = 0; i < blk_per_bucket; i++) 3246 for (i = 0; i < blk_per_bucket; i++)
3176 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); 3247 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3177 3248
3178 /* update the new bucket. */ 3249 /* update the new bucket. */
3179 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; 3250 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3180 count = le16_to_cpu(xh->xh_count);
3181 start = count / 2;
3182 3251
3183 /* 3252 /*
3184 * Calculate the total name/value len and xh_free_start for 3253 * Calculate the total name/value len and xh_free_start for
@@ -3235,6 +3304,7 @@ static int ocfs2_half_xattr_bucket(struct inode *inode,
3235 xh->xh_free_start = xe->xe_name_offset; 3304 xh->xh_free_start = xe->xe_name_offset;
3236 } 3305 }
3237 3306
3307set_num_buckets:
3238 /* set xh->xh_num_buckets for the new xh. */ 3308 /* set xh->xh_num_buckets for the new xh. */
3239 if (new_bucket_head) 3309 if (new_bucket_head)
3240 xh->xh_num_buckets = cpu_to_le16(1); 3310 xh->xh_num_buckets = cpu_to_le16(1);
@@ -3252,9 +3322,13 @@ static int ocfs2_half_xattr_bucket(struct inode *inode,
3252 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 3322 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3253 3323
3254 /* 3324 /*
3255 * Now only update the 1st block of the old bucket. 3325 * Now only update the 1st block of the old bucket. If we
3256 * Please note that the entry has been sorted already above. 3326 * just added a new empty bucket, there is no need to modify
3327 * it.
3257 */ 3328 */
3329 if (start == count)
3330 goto out;
3331
3258 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; 3332 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3259 memset(&xh->xh_entries[start], 0, 3333 memset(&xh->xh_entries[start], 0,
3260 sizeof(struct ocfs2_xattr_entry) * (count - start)); 3334 sizeof(struct ocfs2_xattr_entry) * (count - start));
@@ -3439,15 +3513,15 @@ out:
3439} 3513}
3440 3514
3441/* 3515/*
3442 * Move half of the xattrs in this cluster to the new cluster. 3516 * Move some xattrs in this cluster to the new cluster.
3443 * This function should only be called when bucket size == cluster size. 3517 * This function should only be called when bucket size == cluster size.
3444 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 3518 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3445 */ 3519 */
3446static int ocfs2_half_xattr_cluster(struct inode *inode, 3520static int ocfs2_divide_xattr_cluster(struct inode *inode,
3447 handle_t *handle, 3521 handle_t *handle,
3448 u64 prev_blk, 3522 u64 prev_blk,
3449 u64 new_blk, 3523 u64 new_blk,
3450 u32 *first_hash) 3524 u32 *first_hash)
3451{ 3525{
3452 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3526 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3453 int ret, credits = 2 * blk_per_bucket; 3527 int ret, credits = 2 * blk_per_bucket;
@@ -3461,8 +3535,8 @@ static int ocfs2_half_xattr_cluster(struct inode *inode,
3461 } 3535 }
3462 3536
3463 /* Move half of the xattr in start_blk to the next bucket. */ 3537 /* Move half of the xattr in start_blk to the next bucket. */
3464 return ocfs2_half_xattr_bucket(inode, handle, prev_blk, 3538 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
3465 new_blk, first_hash, 1); 3539 new_blk, first_hash, 1);
3466} 3540}
3467 3541
3468/* 3542/*
@@ -3524,9 +3598,9 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3524 last_blk, new_blk, 3598 last_blk, new_blk,
3525 v_start); 3599 v_start);
3526 else { 3600 else {
3527 ret = ocfs2_half_xattr_cluster(inode, handle, 3601 ret = ocfs2_divide_xattr_cluster(inode, handle,
3528 last_blk, new_blk, 3602 last_blk, new_blk,
3529 v_start); 3603 v_start);
3530 3604
3531 if ((*header_bh)->b_blocknr == last_blk && extend) 3605 if ((*header_bh)->b_blocknr == last_blk && extend)
3532 *extend = 0; 3606 *extend = 0;
@@ -3743,8 +3817,8 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
3743 } 3817 }
3744 3818
3745 /* Move half of the xattr in start_blk to the next bucket. */ 3819 /* Move half of the xattr in start_blk to the next bucket. */
3746 ret = ocfs2_half_xattr_bucket(inode, handle, start_blk, 3820 ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
3747 start_blk + blk_per_bucket, NULL, 0); 3821 start_blk + blk_per_bucket, NULL, 0);
3748 3822
3749 le16_add_cpu(&first_xh->xh_num_buckets, 1); 3823 le16_add_cpu(&first_xh->xh_num_buckets, 1);
3750 ocfs2_journal_dirty(handle, first_bh); 3824 ocfs2_journal_dirty(handle, first_bh);
@@ -4435,11 +4509,21 @@ out:
4435 return ret; 4509 return ret;
4436} 4510}
4437 4511
4438/* check whether the xattr bucket is filled up with the same hash value. */ 4512/*
4513 * check whether the xattr bucket is filled up with the same hash value.
4514 * If we want to insert the xattr with the same hash, return -ENOSPC.
4515 * If we want to insert a xattr with different hash value, go ahead
4516 * and ocfs2_divide_xattr_bucket will handle this.
4517 */
4439static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 4518static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4440 struct ocfs2_xattr_bucket *bucket) 4519 struct ocfs2_xattr_bucket *bucket,
4520 const char *name)
4441{ 4521{
4442 struct ocfs2_xattr_header *xh = bucket->xh; 4522 struct ocfs2_xattr_header *xh = bucket->xh;
4523 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4524
4525 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
4526 return 0;
4443 4527
4444 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 4528 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4445 xh->xh_entries[0].xe_name_hash) { 4529 xh->xh_entries[0].xe_name_hash) {
@@ -4562,7 +4646,9 @@ try_again:
4562 * one bucket's worth, so check it here whether we need to 4646 * one bucket's worth, so check it here whether we need to
4563 * add a new bucket for the insert. 4647 * add a new bucket for the insert.
4564 */ 4648 */
4565 ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket); 4649 ret = ocfs2_check_xattr_bucket_collision(inode,
4650 &xs->bucket,
4651 xi->name);
4566 if (ret) { 4652 if (ret) {
4567 mlog_errno(ret); 4653 mlog_errno(ret);
4568 goto out; 4654 goto out;