aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2012-03-28 13:41:23 -0400
committerAlasdair G Kergon <agk@redhat.com>2012-03-28 13:41:23 -0400
commitb0988900bae9ecf968a8a8d086a9eec671a9517a (patch)
tree9dd34ec6f4563b78ac454f3691757dece46c1926 /drivers/md
parent6f94a4c45a6f744383f9f695dde019998db3df55 (diff)
dm persistent data: fix btree rebalancing after remove
When we remove an entry from a node we sometimes rebalance with it's two neighbours. This wasn't being done correctly; in some cases entries have to move all the way from the right neighbour to the left neighbour, or vice versa. This patch pretty much re-writes the balancing code to fix it. This code is barely used currently; only when you delete a thin device, and then only if you have hundreds of them in the same pool. Once we have discard support, which removes mappings, this will be used much more heavily. Signed-off-by: Joe Thornber <ejt@redhat.com> Cc: stable@kernel.org Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/persistent-data/dm-btree-remove.c174
1 files changed, 99 insertions, 75 deletions
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index 023fbc2d389e..1a35caf2563d 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -128,18 +128,9 @@ static void delete_at(struct node *n, unsigned index)
128 n->header.nr_entries = cpu_to_le32(nr_entries - 1); 128 n->header.nr_entries = cpu_to_le32(nr_entries - 1);
129} 129}
130 130
131static unsigned del_threshold(struct node *n)
132{
133 return le32_to_cpu(n->header.max_entries) / 3;
134}
135
136static unsigned merge_threshold(struct node *n) 131static unsigned merge_threshold(struct node *n)
137{ 132{
138 /* 133 return le32_to_cpu(n->header.max_entries) / 3;
139 * The extra one is because we know we're potentially going to
140 * delete an entry.
141 */
142 return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1;
143} 134}
144 135
145struct child { 136struct child {
@@ -188,6 +179,15 @@ static int exit_child(struct dm_btree_info *info, struct child *c)
188 179
189static void shift(struct node *left, struct node *right, int count) 180static void shift(struct node *left, struct node *right, int count)
190{ 181{
182 uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
183 uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
184 uint32_t max_entries = le32_to_cpu(left->header.max_entries);
185 uint32_t r_max_entries = le32_to_cpu(right->header.max_entries);
186
187 BUG_ON(max_entries != r_max_entries);
188 BUG_ON(nr_left - count > max_entries);
189 BUG_ON(nr_right + count > max_entries);
190
191 if (!count) 191 if (!count)
192 return; 192 return;
193 193
@@ -199,13 +199,8 @@ static void shift(struct node *left, struct node *right, int count)
199 node_shift(right, count); 199 node_shift(right, count);
200 } 200 }
201 201
202 left->header.nr_entries = 202 left->header.nr_entries = cpu_to_le32(nr_left - count);
203 cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count); 203 right->header.nr_entries = cpu_to_le32(nr_right + count);
204 BUG_ON(le32_to_cpu(left->header.nr_entries) > le32_to_cpu(left->header.max_entries));
205
206 right->header.nr_entries =
207 cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count);
208 BUG_ON(le32_to_cpu(right->header.nr_entries) > le32_to_cpu(right->header.max_entries));
209} 204}
210 205
211static void __rebalance2(struct dm_btree_info *info, struct node *parent, 206static void __rebalance2(struct dm_btree_info *info, struct node *parent,
@@ -215,8 +210,9 @@ static void __rebalance2(struct dm_btree_info *info, struct node *parent,
215 struct node *right = r->n; 210 struct node *right = r->n;
216 uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 211 uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
217 uint32_t nr_right = le32_to_cpu(right->header.nr_entries); 212 uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
213 unsigned threshold = 2 * merge_threshold(left) + 1;
218 214
219 if (nr_left + nr_right <= merge_threshold(left)) { 215 if (nr_left + nr_right < threshold) {
220 /* 216 /*
221 * Merge 217 * Merge
222 */ 218 */
@@ -234,9 +230,6 @@ static void __rebalance2(struct dm_btree_info *info, struct node *parent,
234 * Rebalance. 230 * Rebalance.
235 */ 231 */
236 unsigned target_left = (nr_left + nr_right) / 2; 232 unsigned target_left = (nr_left + nr_right) / 2;
237 unsigned shift_ = nr_left - target_left;
238 BUG_ON(le32_to_cpu(left->header.max_entries) <= nr_left - shift_);
239 BUG_ON(le32_to_cpu(right->header.max_entries) <= nr_right + shift_);
240 shift(left, right, nr_left - target_left); 233 shift(left, right, nr_left - target_left);
241 *key_ptr(parent, r->index) = right->keys[0]; 234 *key_ptr(parent, r->index) = right->keys[0];
242 } 235 }
@@ -272,6 +265,84 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
272 return exit_child(info, &right); 265 return exit_child(info, &right);
273} 266}
274 267
268/*
269 * We dump as many entries from center as possible into left, then the rest
270 * in right, then rebalance2. This wastes some cpu, but I want something
271 * simple atm.
272 */
273static void delete_center_node(struct dm_btree_info *info, struct node *parent,
274 struct child *l, struct child *c, struct child *r,
275 struct node *left, struct node *center, struct node *right,
276 uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
277{
278 uint32_t max_entries = le32_to_cpu(left->header.max_entries);
279 unsigned shift = min(max_entries - nr_left, nr_center);
280
281 BUG_ON(nr_left + shift > max_entries);
282 node_copy(left, center, -shift);
283 left->header.nr_entries = cpu_to_le32(nr_left + shift);
284
285 if (shift != nr_center) {
286 shift = nr_center - shift;
287 BUG_ON((nr_right + shift) > max_entries);
288 node_shift(right, shift);
289 node_copy(center, right, shift);
290 right->header.nr_entries = cpu_to_le32(nr_right + shift);
291 }
292 *key_ptr(parent, r->index) = right->keys[0];
293
294 delete_at(parent, c->index);
295 r->index--;
296
297 dm_tm_dec(info->tm, dm_block_location(c->block));
298 __rebalance2(info, parent, l, r);
299}
300
301/*
302 * Redistributes entries among 3 sibling nodes.
303 */
304static void redistribute3(struct dm_btree_info *info, struct node *parent,
305 struct child *l, struct child *c, struct child *r,
306 struct node *left, struct node *center, struct node *right,
307 uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
308{
309 int s;
310 uint32_t max_entries = le32_to_cpu(left->header.max_entries);
311 unsigned target = (nr_left + nr_center + nr_right) / 3;
312 BUG_ON(target > max_entries);
313
314 if (nr_left < nr_right) {
315 s = nr_left - target;
316
317 if (s < 0 && nr_center < -s) {
318 /* not enough in central node */
319 shift(left, center, nr_center);
320 s = nr_center - target;
321 shift(left, right, s);
322 nr_right += s;
323 } else
324 shift(left, center, s);
325
326 shift(center, right, target - nr_right);
327
328 } else {
329 s = target - nr_right;
330 if (s > 0 && nr_center < s) {
331 /* not enough in central node */
332 shift(center, right, nr_center);
333 s = target - nr_center;
334 shift(left, right, s);
335 nr_left -= s;
336 } else
337 shift(center, right, s);
338
339 shift(left, center, nr_left - target);
340 }
341
342 *key_ptr(parent, c->index) = center->keys[0];
343 *key_ptr(parent, r->index) = right->keys[0];
344}
345
275static void __rebalance3(struct dm_btree_info *info, struct node *parent, 346static void __rebalance3(struct dm_btree_info *info, struct node *parent,
276 struct child *l, struct child *c, struct child *r) 347 struct child *l, struct child *c, struct child *r)
277{ 348{
@@ -282,62 +353,18 @@ static void __rebalance3(struct dm_btree_info *info, struct node *parent,
282 uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 353 uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
283 uint32_t nr_center = le32_to_cpu(center->header.nr_entries); 354 uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
284 uint32_t nr_right = le32_to_cpu(right->header.nr_entries); 355 uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
285 uint32_t max_entries = le32_to_cpu(left->header.max_entries);
286 356
287 unsigned target; 357 unsigned threshold = merge_threshold(left) * 4 + 1;
288 358
289 BUG_ON(left->header.max_entries != center->header.max_entries); 359 BUG_ON(left->header.max_entries != center->header.max_entries);
290 BUG_ON(center->header.max_entries != right->header.max_entries); 360 BUG_ON(center->header.max_entries != right->header.max_entries);
291 361
292 if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) { 362 if ((nr_left + nr_center + nr_right) < threshold)
293 /* 363 delete_center_node(info, parent, l, c, r, left, center, right,
294 * Delete center node: 364 nr_left, nr_center, nr_right);
295 * 365 else
296 * We dump as many entries from center as possible into 366 redistribute3(info, parent, l, c, r, left, center, right,
297 * left, then the rest in right, then rebalance2. This 367 nr_left, nr_center, nr_right);
298 * wastes some cpu, but I want something simple atm.
299 */
300 unsigned shift = min(max_entries - nr_left, nr_center);
301
302 BUG_ON(nr_left + shift > max_entries);
303 node_copy(left, center, -shift);
304 left->header.nr_entries = cpu_to_le32(nr_left + shift);
305
306 if (shift != nr_center) {
307 shift = nr_center - shift;
308 BUG_ON((nr_right + shift) >= max_entries);
309 node_shift(right, shift);
310 node_copy(center, right, shift);
311 right->header.nr_entries = cpu_to_le32(nr_right + shift);
312 }
313 *key_ptr(parent, r->index) = right->keys[0];
314
315 delete_at(parent, c->index);
316 r->index--;
317
318 dm_tm_dec(info->tm, dm_block_location(c->block));
319 __rebalance2(info, parent, l, r);
320
321 return;
322 }
323
324 /*
325 * Rebalance
326 */
327 target = (nr_left + nr_center + nr_right) / 3;
328 BUG_ON(target > max_entries);
329
330 /*
331 * Adjust the left node
332 */
333 shift(left, center, nr_left - target);
334
335 /*
336 * Adjust the right node
337 */
338 shift(center, right, target - nr_right);
339 *key_ptr(parent, c->index) = center->keys[0];
340 *key_ptr(parent, r->index) = right->keys[0];
341} 368}
342 369
343static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, 370static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
@@ -441,9 +468,6 @@ static int rebalance_children(struct shadow_spine *s,
441 if (r) 468 if (r)
442 return r; 469 return r;
443 470
444 if (child_entries > del_threshold(n))
445 return 0;
446
447 has_left_sibling = i > 0; 471 has_left_sibling = i > 0;
448 has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); 472 has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
449 473