diff options
author | Joe Thornber <ejt@redhat.com> | 2013-03-01 17:45:51 -0500 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2013-03-01 17:45:51 -0500 |
commit | 6513c29f44f2cc970c0e9fecfe5a6526c3e73025 (patch) | |
tree | 5c501dceffb8a4c5c0a70bde68b084a171fee861 /drivers/md | |
parent | 025b96853fe0bdc977d88b4242ca5e1f19d9bb66 (diff) |
dm persistent data: add transactional array
Add a transactional array.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/persistent-data/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.c | 808 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.h | 166 |
3 files changed, 975 insertions, 0 deletions
diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile index d8e7cb767c1e..ebd8d80d529c 100644 --- a/drivers/md/persistent-data/Makefile +++ b/drivers/md/persistent-data/Makefile | |||
@@ -1,5 +1,6 @@ | |||
1 | obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o | 1 | obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o |
2 | dm-persistent-data-objs := \ | 2 | dm-persistent-data-objs := \ |
3 | dm-array.o \ | ||
3 | dm-block-manager.o \ | 4 | dm-block-manager.o \ |
4 | dm-space-map-common.o \ | 5 | dm-space-map-common.o \ |
5 | dm-space-map-disk.o \ | 6 | dm-space-map-disk.o \ |
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c new file mode 100644 index 000000000000..172147eb1d40 --- /dev/null +++ b/drivers/md/persistent-data/dm-array.c | |||
@@ -0,0 +1,808 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #include "dm-array.h" | ||
8 | #include "dm-space-map.h" | ||
9 | #include "dm-transaction-manager.h" | ||
10 | |||
11 | #include <linux/export.h> | ||
12 | #include <linux/device-mapper.h> | ||
13 | |||
14 | #define DM_MSG_PREFIX "array" | ||
15 | |||
16 | /*----------------------------------------------------------------*/ | ||
17 | |||
18 | /* | ||
19 | * The array is implemented as a fully populated btree, which points to | ||
20 | * blocks that contain the packed values. This is more space efficient | ||
21 | * than just using a btree since we don't store 1 key per value. | ||
22 | */ | ||
23 | struct array_block { | ||
24 | __le32 csum; | ||
25 | __le32 max_entries; | ||
26 | __le32 nr_entries; | ||
27 | __le32 value_size; | ||
28 | __le64 blocknr; /* Block this node is supposed to live in. */ | ||
29 | } __packed; | ||
30 | |||
31 | /*----------------------------------------------------------------*/ | ||
32 | |||
33 | /* | ||
34 | * Validator methods. As usual we calculate a checksum, and also write the | ||
35 | * block location into the header (paranoia about ssds remapping areas by | ||
36 | * mistake). | ||
37 | */ | ||
38 | #define CSUM_XOR 595846735 | ||
39 | |||
40 | static void array_block_prepare_for_write(struct dm_block_validator *v, | ||
41 | struct dm_block *b, | ||
42 | size_t size_of_block) | ||
43 | { | ||
44 | struct array_block *bh_le = dm_block_data(b); | ||
45 | |||
46 | bh_le->blocknr = cpu_to_le64(dm_block_location(b)); | ||
47 | bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, | ||
48 | size_of_block - sizeof(__le32), | ||
49 | CSUM_XOR)); | ||
50 | } | ||
51 | |||
52 | static int array_block_check(struct dm_block_validator *v, | ||
53 | struct dm_block *b, | ||
54 | size_t size_of_block) | ||
55 | { | ||
56 | struct array_block *bh_le = dm_block_data(b); | ||
57 | __le32 csum_disk; | ||
58 | |||
59 | if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) { | ||
60 | DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu", | ||
61 | (unsigned long long) le64_to_cpu(bh_le->blocknr), | ||
62 | (unsigned long long) dm_block_location(b)); | ||
63 | return -ENOTBLK; | ||
64 | } | ||
65 | |||
66 | csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, | ||
67 | size_of_block - sizeof(__le32), | ||
68 | CSUM_XOR)); | ||
69 | if (csum_disk != bh_le->csum) { | ||
70 | DMERR_LIMIT("array_block_check failed: csum %u != wanted %u", | ||
71 | (unsigned) le32_to_cpu(csum_disk), | ||
72 | (unsigned) le32_to_cpu(bh_le->csum)); | ||
73 | return -EILSEQ; | ||
74 | } | ||
75 | |||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static struct dm_block_validator array_validator = { | ||
80 | .name = "array", | ||
81 | .prepare_for_write = array_block_prepare_for_write, | ||
82 | .check = array_block_check | ||
83 | }; | ||
84 | |||
85 | /*----------------------------------------------------------------*/ | ||
86 | |||
87 | /* | ||
88 | * Functions for manipulating the array blocks. | ||
89 | */ | ||
90 | |||
91 | /* | ||
92 | * Returns a pointer to a value within an array block. | ||
93 | * | ||
94 | * index - The index into _this_ specific block. | ||
95 | */ | ||
96 | static void *element_at(struct dm_array_info *info, struct array_block *ab, | ||
97 | unsigned index) | ||
98 | { | ||
99 | unsigned char *entry = (unsigned char *) (ab + 1); | ||
100 | |||
101 | entry += index * info->value_type.size; | ||
102 | |||
103 | return entry; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * Utility function that calls one of the value_type methods on every value | ||
108 | * in an array block. | ||
109 | */ | ||
110 | static void on_entries(struct dm_array_info *info, struct array_block *ab, | ||
111 | void (*fn)(void *, const void *)) | ||
112 | { | ||
113 | unsigned i, nr_entries = le32_to_cpu(ab->nr_entries); | ||
114 | |||
115 | for (i = 0; i < nr_entries; i++) | ||
116 | fn(info->value_type.context, element_at(info, ab, i)); | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Increment every value in an array block. | ||
121 | */ | ||
122 | static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab) | ||
123 | { | ||
124 | struct dm_btree_value_type *vt = &info->value_type; | ||
125 | |||
126 | if (vt->inc) | ||
127 | on_entries(info, ab, vt->inc); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Decrement every value in an array block. | ||
132 | */ | ||
133 | static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab) | ||
134 | { | ||
135 | struct dm_btree_value_type *vt = &info->value_type; | ||
136 | |||
137 | if (vt->dec) | ||
138 | on_entries(info, ab, vt->dec); | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Each array block can hold this many values. | ||
143 | */ | ||
144 | static uint32_t calc_max_entries(size_t value_size, size_t size_of_block) | ||
145 | { | ||
146 | return (size_of_block - sizeof(struct array_block)) / value_size; | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Allocate a new array block. The caller will need to unlock block. | ||
151 | */ | ||
152 | static int alloc_ablock(struct dm_array_info *info, size_t size_of_block, | ||
153 | uint32_t max_entries, | ||
154 | struct dm_block **block, struct array_block **ab) | ||
155 | { | ||
156 | int r; | ||
157 | |||
158 | r = dm_tm_new_block(info->btree_info.tm, &array_validator, block); | ||
159 | if (r) | ||
160 | return r; | ||
161 | |||
162 | (*ab) = dm_block_data(*block); | ||
163 | (*ab)->max_entries = cpu_to_le32(max_entries); | ||
164 | (*ab)->nr_entries = cpu_to_le32(0); | ||
165 | (*ab)->value_size = cpu_to_le32(info->value_type.size); | ||
166 | |||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * Pad an array block out with a particular value. Every instance will | ||
172 | * cause an increment of the value_type. new_nr must always be more than | ||
173 | * the current number of entries. | ||
174 | */ | ||
175 | static void fill_ablock(struct dm_array_info *info, struct array_block *ab, | ||
176 | const void *value, unsigned new_nr) | ||
177 | { | ||
178 | unsigned i; | ||
179 | uint32_t nr_entries; | ||
180 | struct dm_btree_value_type *vt = &info->value_type; | ||
181 | |||
182 | BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); | ||
183 | BUG_ON(new_nr < le32_to_cpu(ab->nr_entries)); | ||
184 | |||
185 | nr_entries = le32_to_cpu(ab->nr_entries); | ||
186 | for (i = nr_entries; i < new_nr; i++) { | ||
187 | if (vt->inc) | ||
188 | vt->inc(vt->context, value); | ||
189 | memcpy(element_at(info, ab, i), value, vt->size); | ||
190 | } | ||
191 | ab->nr_entries = cpu_to_le32(new_nr); | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * Remove some entries from the back of an array block. Every value | ||
196 | * removed will be decremented. new_nr must be <= the current number of | ||
197 | * entries. | ||
198 | */ | ||
199 | static void trim_ablock(struct dm_array_info *info, struct array_block *ab, | ||
200 | unsigned new_nr) | ||
201 | { | ||
202 | unsigned i; | ||
203 | uint32_t nr_entries; | ||
204 | struct dm_btree_value_type *vt = &info->value_type; | ||
205 | |||
206 | BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); | ||
207 | BUG_ON(new_nr > le32_to_cpu(ab->nr_entries)); | ||
208 | |||
209 | nr_entries = le32_to_cpu(ab->nr_entries); | ||
210 | for (i = nr_entries; i > new_nr; i--) | ||
211 | if (vt->dec) | ||
212 | vt->dec(vt->context, element_at(info, ab, i - 1)); | ||
213 | ab->nr_entries = cpu_to_le32(new_nr); | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * Read locks a block, and coerces it to an array block. The caller must | ||
218 | * unlock 'block' when finished. | ||
219 | */ | ||
220 | static int get_ablock(struct dm_array_info *info, dm_block_t b, | ||
221 | struct dm_block **block, struct array_block **ab) | ||
222 | { | ||
223 | int r; | ||
224 | |||
225 | r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block); | ||
226 | if (r) | ||
227 | return r; | ||
228 | |||
229 | *ab = dm_block_data(*block); | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * Unlocks an array block. | ||
235 | */ | ||
236 | static int unlock_ablock(struct dm_array_info *info, struct dm_block *block) | ||
237 | { | ||
238 | return dm_tm_unlock(info->btree_info.tm, block); | ||
239 | } | ||
240 | |||
241 | /*----------------------------------------------------------------*/ | ||
242 | |||
243 | /* | ||
244 | * Btree manipulation. | ||
245 | */ | ||
246 | |||
247 | /* | ||
248 | * Looks up an array block in the btree, and then read locks it. | ||
249 | * | ||
250 | * index is the index of the index of the array_block, (ie. the array index | ||
251 | * / max_entries). | ||
252 | */ | ||
253 | static int lookup_ablock(struct dm_array_info *info, dm_block_t root, | ||
254 | unsigned index, struct dm_block **block, | ||
255 | struct array_block **ab) | ||
256 | { | ||
257 | int r; | ||
258 | uint64_t key = index; | ||
259 | __le64 block_le; | ||
260 | |||
261 | r = dm_btree_lookup(&info->btree_info, root, &key, &block_le); | ||
262 | if (r) | ||
263 | return r; | ||
264 | |||
265 | return get_ablock(info, le64_to_cpu(block_le), block, ab); | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * Insert an array block into the btree. The block is _not_ unlocked. | ||
270 | */ | ||
271 | static int insert_ablock(struct dm_array_info *info, uint64_t index, | ||
272 | struct dm_block *block, dm_block_t *root) | ||
273 | { | ||
274 | __le64 block_le = cpu_to_le64(dm_block_location(block)); | ||
275 | |||
276 | __dm_bless_for_disk(block_le); | ||
277 | return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root); | ||
278 | } | ||
279 | |||
280 | /* | ||
281 | * Looks up an array block in the btree. Then shadows it, and updates the | ||
282 | * btree to point to this new shadow. 'root' is an input/output parameter | ||
283 | * for both the current root block, and the new one. | ||
284 | */ | ||
285 | static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, | ||
286 | unsigned index, struct dm_block **block, | ||
287 | struct array_block **ab) | ||
288 | { | ||
289 | int r, inc; | ||
290 | uint64_t key = index; | ||
291 | dm_block_t b; | ||
292 | __le64 block_le; | ||
293 | |||
294 | /* | ||
295 | * lookup | ||
296 | */ | ||
297 | r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le); | ||
298 | if (r) | ||
299 | return r; | ||
300 | b = le64_to_cpu(block_le); | ||
301 | |||
302 | /* | ||
303 | * shadow | ||
304 | */ | ||
305 | r = dm_tm_shadow_block(info->btree_info.tm, b, | ||
306 | &array_validator, block, &inc); | ||
307 | if (r) | ||
308 | return r; | ||
309 | |||
310 | *ab = dm_block_data(*block); | ||
311 | if (inc) | ||
312 | inc_ablock_entries(info, *ab); | ||
313 | |||
314 | /* | ||
315 | * Reinsert. | ||
316 | * | ||
317 | * The shadow op will often be a noop. Only insert if it really | ||
318 | * copied data. | ||
319 | */ | ||
320 | if (dm_block_location(*block) != b) | ||
321 | r = insert_ablock(info, index, *block, root); | ||
322 | |||
323 | return r; | ||
324 | } | ||
325 | |||
326 | /* | ||
327 | * Allocate an new array block, and fill it with some values. | ||
328 | */ | ||
329 | static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block, | ||
330 | uint32_t max_entries, | ||
331 | unsigned block_index, uint32_t nr, | ||
332 | const void *value, dm_block_t *root) | ||
333 | { | ||
334 | int r; | ||
335 | struct dm_block *block; | ||
336 | struct array_block *ab; | ||
337 | |||
338 | r = alloc_ablock(info, size_of_block, max_entries, &block, &ab); | ||
339 | if (r) | ||
340 | return r; | ||
341 | |||
342 | fill_ablock(info, ab, value, nr); | ||
343 | r = insert_ablock(info, block_index, block, root); | ||
344 | unlock_ablock(info, block); | ||
345 | |||
346 | return r; | ||
347 | } | ||
348 | |||
349 | static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block, | ||
350 | unsigned begin_block, unsigned end_block, | ||
351 | unsigned max_entries, const void *value, | ||
352 | dm_block_t *root) | ||
353 | { | ||
354 | int r = 0; | ||
355 | |||
356 | for (; !r && begin_block != end_block; begin_block++) | ||
357 | r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root); | ||
358 | |||
359 | return r; | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * There are a bunch of functions involved with resizing an array. This | ||
364 | * structure holds information that commonly needed by them. Purely here | ||
365 | * to reduce parameter count. | ||
366 | */ | ||
367 | struct resize { | ||
368 | /* | ||
369 | * Describes the array. | ||
370 | */ | ||
371 | struct dm_array_info *info; | ||
372 | |||
373 | /* | ||
374 | * The current root of the array. This gets updated. | ||
375 | */ | ||
376 | dm_block_t root; | ||
377 | |||
378 | /* | ||
379 | * Metadata block size. Used to calculate the nr entries in an | ||
380 | * array block. | ||
381 | */ | ||
382 | size_t size_of_block; | ||
383 | |||
384 | /* | ||
385 | * Maximum nr entries in an array block. | ||
386 | */ | ||
387 | unsigned max_entries; | ||
388 | |||
389 | /* | ||
390 | * nr of completely full blocks in the array. | ||
391 | * | ||
392 | * 'old' refers to before the resize, 'new' after. | ||
393 | */ | ||
394 | unsigned old_nr_full_blocks, new_nr_full_blocks; | ||
395 | |||
396 | /* | ||
397 | * Number of entries in the final block. 0 iff only full blocks in | ||
398 | * the array. | ||
399 | */ | ||
400 | unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block; | ||
401 | |||
402 | /* | ||
403 | * The default value used when growing the array. | ||
404 | */ | ||
405 | const void *value; | ||
406 | }; | ||
407 | |||
408 | /* | ||
409 | * Removes a consecutive set of array blocks from the btree. The values | ||
410 | * in block are decremented as a side effect of the btree remove. | ||
411 | * | ||
412 | * begin_index - the index of the first array block to remove. | ||
413 | * end_index - the one-past-the-end value. ie. this block is not removed. | ||
414 | */ | ||
415 | static int drop_blocks(struct resize *resize, unsigned begin_index, | ||
416 | unsigned end_index) | ||
417 | { | ||
418 | int r; | ||
419 | |||
420 | while (begin_index != end_index) { | ||
421 | uint64_t key = begin_index++; | ||
422 | r = dm_btree_remove(&resize->info->btree_info, resize->root, | ||
423 | &key, &resize->root); | ||
424 | if (r) | ||
425 | return r; | ||
426 | } | ||
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Calculates how many blocks are needed for the array. | ||
433 | */ | ||
434 | static unsigned total_nr_blocks_needed(unsigned nr_full_blocks, | ||
435 | unsigned nr_entries_in_last_block) | ||
436 | { | ||
437 | return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0); | ||
438 | } | ||
439 | |||
440 | /* | ||
441 | * Shrink an array. | ||
442 | */ | ||
443 | static int shrink(struct resize *resize) | ||
444 | { | ||
445 | int r; | ||
446 | unsigned begin, end; | ||
447 | struct dm_block *block; | ||
448 | struct array_block *ab; | ||
449 | |||
450 | /* | ||
451 | * Lose some blocks from the back? | ||
452 | */ | ||
453 | if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) { | ||
454 | begin = total_nr_blocks_needed(resize->new_nr_full_blocks, | ||
455 | resize->new_nr_entries_in_last_block); | ||
456 | end = total_nr_blocks_needed(resize->old_nr_full_blocks, | ||
457 | resize->old_nr_entries_in_last_block); | ||
458 | |||
459 | r = drop_blocks(resize, begin, end); | ||
460 | if (r) | ||
461 | return r; | ||
462 | } | ||
463 | |||
464 | /* | ||
465 | * Trim the new tail block | ||
466 | */ | ||
467 | if (resize->new_nr_entries_in_last_block) { | ||
468 | r = shadow_ablock(resize->info, &resize->root, | ||
469 | resize->new_nr_full_blocks, &block, &ab); | ||
470 | if (r) | ||
471 | return r; | ||
472 | |||
473 | trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block); | ||
474 | unlock_ablock(resize->info, block); | ||
475 | } | ||
476 | |||
477 | return 0; | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * Grow an array. | ||
482 | */ | ||
483 | static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries) | ||
484 | { | ||
485 | int r; | ||
486 | struct dm_block *block; | ||
487 | struct array_block *ab; | ||
488 | |||
489 | r = shadow_ablock(resize->info, &resize->root, | ||
490 | resize->old_nr_full_blocks, &block, &ab); | ||
491 | if (r) | ||
492 | return r; | ||
493 | |||
494 | fill_ablock(resize->info, ab, resize->value, new_nr_entries); | ||
495 | unlock_ablock(resize->info, block); | ||
496 | |||
497 | return r; | ||
498 | } | ||
499 | |||
500 | static int grow_add_tail_block(struct resize *resize) | ||
501 | { | ||
502 | return insert_new_ablock(resize->info, resize->size_of_block, | ||
503 | resize->max_entries, | ||
504 | resize->new_nr_full_blocks, | ||
505 | resize->new_nr_entries_in_last_block, | ||
506 | resize->value, &resize->root); | ||
507 | } | ||
508 | |||
509 | static int grow_needs_more_blocks(struct resize *resize) | ||
510 | { | ||
511 | int r; | ||
512 | |||
513 | if (resize->old_nr_entries_in_last_block > 0) { | ||
514 | r = grow_extend_tail_block(resize, resize->max_entries); | ||
515 | if (r) | ||
516 | return r; | ||
517 | } | ||
518 | |||
519 | r = insert_full_ablocks(resize->info, resize->size_of_block, | ||
520 | resize->old_nr_full_blocks, | ||
521 | resize->new_nr_full_blocks, | ||
522 | resize->max_entries, resize->value, | ||
523 | &resize->root); | ||
524 | if (r) | ||
525 | return r; | ||
526 | |||
527 | if (resize->new_nr_entries_in_last_block) | ||
528 | r = grow_add_tail_block(resize); | ||
529 | |||
530 | return r; | ||
531 | } | ||
532 | |||
533 | static int grow(struct resize *resize) | ||
534 | { | ||
535 | if (resize->new_nr_full_blocks > resize->old_nr_full_blocks) | ||
536 | return grow_needs_more_blocks(resize); | ||
537 | |||
538 | else if (resize->old_nr_entries_in_last_block) | ||
539 | return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block); | ||
540 | |||
541 | else | ||
542 | return grow_add_tail_block(resize); | ||
543 | } | ||
544 | |||
545 | /*----------------------------------------------------------------*/ | ||
546 | |||
547 | /* | ||
548 | * These are the value_type functions for the btree elements, which point | ||
549 | * to array blocks. | ||
550 | */ | ||
551 | static void block_inc(void *context, const void *value) | ||
552 | { | ||
553 | __le64 block_le; | ||
554 | struct dm_array_info *info = context; | ||
555 | |||
556 | memcpy(&block_le, value, sizeof(block_le)); | ||
557 | dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le)); | ||
558 | } | ||
559 | |||
560 | static void block_dec(void *context, const void *value) | ||
561 | { | ||
562 | int r; | ||
563 | uint64_t b; | ||
564 | __le64 block_le; | ||
565 | uint32_t ref_count; | ||
566 | struct dm_block *block; | ||
567 | struct array_block *ab; | ||
568 | struct dm_array_info *info = context; | ||
569 | |||
570 | memcpy(&block_le, value, sizeof(block_le)); | ||
571 | b = le64_to_cpu(block_le); | ||
572 | |||
573 | r = dm_tm_ref(info->btree_info.tm, b, &ref_count); | ||
574 | if (r) { | ||
575 | DMERR_LIMIT("couldn't get reference count for block %llu", | ||
576 | (unsigned long long) b); | ||
577 | return; | ||
578 | } | ||
579 | |||
580 | if (ref_count == 1) { | ||
581 | /* | ||
582 | * We're about to drop the last reference to this ablock. | ||
583 | * So we need to decrement the ref count of the contents. | ||
584 | */ | ||
585 | r = get_ablock(info, b, &block, &ab); | ||
586 | if (r) { | ||
587 | DMERR_LIMIT("couldn't get array block %llu", | ||
588 | (unsigned long long) b); | ||
589 | return; | ||
590 | } | ||
591 | |||
592 | dec_ablock_entries(info, ab); | ||
593 | unlock_ablock(info, block); | ||
594 | } | ||
595 | |||
596 | dm_tm_dec(info->btree_info.tm, b); | ||
597 | } | ||
598 | |||
599 | static int block_equal(void *context, const void *value1, const void *value2) | ||
600 | { | ||
601 | return !memcmp(value1, value2, sizeof(__le64)); | ||
602 | } | ||
603 | |||
604 | /*----------------------------------------------------------------*/ | ||
605 | |||
606 | void dm_array_info_init(struct dm_array_info *info, | ||
607 | struct dm_transaction_manager *tm, | ||
608 | struct dm_btree_value_type *vt) | ||
609 | { | ||
610 | struct dm_btree_value_type *bvt = &info->btree_info.value_type; | ||
611 | |||
612 | memcpy(&info->value_type, vt, sizeof(info->value_type)); | ||
613 | info->btree_info.tm = tm; | ||
614 | info->btree_info.levels = 1; | ||
615 | |||
616 | bvt->context = info; | ||
617 | bvt->size = sizeof(__le64); | ||
618 | bvt->inc = block_inc; | ||
619 | bvt->dec = block_dec; | ||
620 | bvt->equal = block_equal; | ||
621 | } | ||
622 | EXPORT_SYMBOL_GPL(dm_array_info_init); | ||
623 | |||
624 | int dm_array_empty(struct dm_array_info *info, dm_block_t *root) | ||
625 | { | ||
626 | return dm_btree_empty(&info->btree_info, root); | ||
627 | } | ||
628 | EXPORT_SYMBOL_GPL(dm_array_empty); | ||
629 | |||
630 | static int array_resize(struct dm_array_info *info, dm_block_t root, | ||
631 | uint32_t old_size, uint32_t new_size, | ||
632 | const void *value, dm_block_t *new_root) | ||
633 | { | ||
634 | int r; | ||
635 | struct resize resize; | ||
636 | |||
637 | if (old_size == new_size) | ||
638 | return 0; | ||
639 | |||
640 | resize.info = info; | ||
641 | resize.root = root; | ||
642 | resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); | ||
643 | resize.max_entries = calc_max_entries(info->value_type.size, | ||
644 | resize.size_of_block); | ||
645 | |||
646 | resize.old_nr_full_blocks = old_size / resize.max_entries; | ||
647 | resize.old_nr_entries_in_last_block = old_size % resize.max_entries; | ||
648 | resize.new_nr_full_blocks = new_size / resize.max_entries; | ||
649 | resize.new_nr_entries_in_last_block = new_size % resize.max_entries; | ||
650 | resize.value = value; | ||
651 | |||
652 | r = ((new_size > old_size) ? grow : shrink)(&resize); | ||
653 | if (r) | ||
654 | return r; | ||
655 | |||
656 | *new_root = resize.root; | ||
657 | return 0; | ||
658 | } | ||
659 | |||
660 | int dm_array_resize(struct dm_array_info *info, dm_block_t root, | ||
661 | uint32_t old_size, uint32_t new_size, | ||
662 | const void *value, dm_block_t *new_root) | ||
663 | __dm_written_to_disk(value) | ||
664 | { | ||
665 | int r = array_resize(info, root, old_size, new_size, value, new_root); | ||
666 | __dm_unbless_for_disk(value); | ||
667 | return r; | ||
668 | } | ||
669 | EXPORT_SYMBOL_GPL(dm_array_resize); | ||
670 | |||
671 | int dm_array_del(struct dm_array_info *info, dm_block_t root) | ||
672 | { | ||
673 | return dm_btree_del(&info->btree_info, root); | ||
674 | } | ||
675 | EXPORT_SYMBOL_GPL(dm_array_del); | ||
676 | |||
677 | int dm_array_get_value(struct dm_array_info *info, dm_block_t root, | ||
678 | uint32_t index, void *value_le) | ||
679 | { | ||
680 | int r; | ||
681 | struct dm_block *block; | ||
682 | struct array_block *ab; | ||
683 | size_t size_of_block; | ||
684 | unsigned entry, max_entries; | ||
685 | |||
686 | size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); | ||
687 | max_entries = calc_max_entries(info->value_type.size, size_of_block); | ||
688 | |||
689 | r = lookup_ablock(info, root, index / max_entries, &block, &ab); | ||
690 | if (r) | ||
691 | return r; | ||
692 | |||
693 | entry = index % max_entries; | ||
694 | if (entry >= le32_to_cpu(ab->nr_entries)) | ||
695 | r = -ENODATA; | ||
696 | else | ||
697 | memcpy(value_le, element_at(info, ab, entry), | ||
698 | info->value_type.size); | ||
699 | |||
700 | unlock_ablock(info, block); | ||
701 | return r; | ||
702 | } | ||
703 | EXPORT_SYMBOL_GPL(dm_array_get_value); | ||
704 | |||
705 | static int array_set_value(struct dm_array_info *info, dm_block_t root, | ||
706 | uint32_t index, const void *value, dm_block_t *new_root) | ||
707 | { | ||
708 | int r; | ||
709 | struct dm_block *block; | ||
710 | struct array_block *ab; | ||
711 | size_t size_of_block; | ||
712 | unsigned max_entries; | ||
713 | unsigned entry; | ||
714 | void *old_value; | ||
715 | struct dm_btree_value_type *vt = &info->value_type; | ||
716 | |||
717 | size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); | ||
718 | max_entries = calc_max_entries(info->value_type.size, size_of_block); | ||
719 | |||
720 | r = shadow_ablock(info, &root, index / max_entries, &block, &ab); | ||
721 | if (r) | ||
722 | return r; | ||
723 | *new_root = root; | ||
724 | |||
725 | entry = index % max_entries; | ||
726 | if (entry >= le32_to_cpu(ab->nr_entries)) { | ||
727 | r = -ENODATA; | ||
728 | goto out; | ||
729 | } | ||
730 | |||
731 | old_value = element_at(info, ab, entry); | ||
732 | if (vt->dec && | ||
733 | (!vt->equal || !vt->equal(vt->context, old_value, value))) { | ||
734 | vt->dec(vt->context, old_value); | ||
735 | if (vt->inc) | ||
736 | vt->inc(vt->context, value); | ||
737 | } | ||
738 | |||
739 | memcpy(old_value, value, info->value_type.size); | ||
740 | |||
741 | out: | ||
742 | unlock_ablock(info, block); | ||
743 | return r; | ||
744 | } | ||
745 | |||
746 | int dm_array_set_value(struct dm_array_info *info, dm_block_t root, | ||
747 | uint32_t index, const void *value, dm_block_t *new_root) | ||
748 | __dm_written_to_disk(value) | ||
749 | { | ||
750 | int r; | ||
751 | |||
752 | r = array_set_value(info, root, index, value, new_root); | ||
753 | __dm_unbless_for_disk(value); | ||
754 | return r; | ||
755 | } | ||
756 | EXPORT_SYMBOL_GPL(dm_array_set_value); | ||
757 | |||
758 | struct walk_info { | ||
759 | struct dm_array_info *info; | ||
760 | int (*fn)(void *context, uint64_t key, void *leaf); | ||
761 | void *context; | ||
762 | }; | ||
763 | |||
764 | static int walk_ablock(void *context, uint64_t *keys, void *leaf) | ||
765 | { | ||
766 | struct walk_info *wi = context; | ||
767 | |||
768 | int r; | ||
769 | unsigned i; | ||
770 | __le64 block_le; | ||
771 | unsigned nr_entries, max_entries; | ||
772 | struct dm_block *block; | ||
773 | struct array_block *ab; | ||
774 | |||
775 | memcpy(&block_le, leaf, sizeof(block_le)); | ||
776 | r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab); | ||
777 | if (r) | ||
778 | return r; | ||
779 | |||
780 | max_entries = le32_to_cpu(ab->max_entries); | ||
781 | nr_entries = le32_to_cpu(ab->nr_entries); | ||
782 | for (i = 0; i < nr_entries; i++) { | ||
783 | r = wi->fn(wi->context, keys[0] * max_entries + i, | ||
784 | element_at(wi->info, ab, i)); | ||
785 | |||
786 | if (r) | ||
787 | break; | ||
788 | } | ||
789 | |||
790 | unlock_ablock(wi->info, block); | ||
791 | return r; | ||
792 | } | ||
793 | |||
794 | int dm_array_walk(struct dm_array_info *info, dm_block_t root, | ||
795 | int (*fn)(void *, uint64_t key, void *leaf), | ||
796 | void *context) | ||
797 | { | ||
798 | struct walk_info wi; | ||
799 | |||
800 | wi.info = info; | ||
801 | wi.fn = fn; | ||
802 | wi.context = context; | ||
803 | |||
804 | return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi); | ||
805 | } | ||
806 | EXPORT_SYMBOL_GPL(dm_array_walk); | ||
807 | |||
808 | /*----------------------------------------------------------------*/ | ||
diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h new file mode 100644 index 000000000000..ea177d6fa58f --- /dev/null +++ b/drivers/md/persistent-data/dm-array.h | |||
@@ -0,0 +1,166 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | #ifndef _LINUX_DM_ARRAY_H | ||
7 | #define _LINUX_DM_ARRAY_H | ||
8 | |||
9 | #include "dm-btree.h" | ||
10 | |||
11 | /*----------------------------------------------------------------*/ | ||
12 | |||
13 | /* | ||
14 | * The dm-array is a persistent version of an array. It packs the data | ||
15 | * more efficiently than a btree which will result in less disk space use, | ||
16 | * and a performance boost. The element get and set operations are still | ||
17 | * O(ln(n)), but with a much smaller constant. | ||
18 | * | ||
19 | * The value type structure is reused from the btree type to support proper | ||
20 | * reference counting of values. | ||
21 | * | ||
22 | * The arrays implicitly know their length, and bounds are checked for | ||
23 | * lookups and updated. It doesn't store this in an accessible place | ||
24 | * because it would waste a whole metadata block. Make sure you store the | ||
25 | * size along with the array root in your encompassing data. | ||
26 | * | ||
27 | * Array entries are indexed via an unsigned integer starting from zero. | ||
28 | * Arrays are not sparse; if you resize an array to have 'n' entries then | ||
29 | * 'n - 1' will be the last valid index. | ||
30 | * | ||
31 | * Typical use: | ||
32 | * | ||
33 | * a) initialise a dm_array_info structure. This describes the array | ||
34 | * values and ties it into a specific transaction manager. It holds no | ||
35 | * instance data; the same info can be used for many similar arrays if | ||
36 | * you wish. | ||
37 | * | ||
38 | * b) Get yourself a root. The root is the index of a block of data on the | ||
39 | * disk that holds a particular instance of an array. You may have a | ||
40 | * pre existing root in your metadata that you wish to use, or you may | ||
41 | * want to create a brand new, empty array with dm_array_empty(). | ||
42 | * | ||
43 | * Like the other data structures in this library, dm_array objects are | ||
44 | * immutable between transactions. Update functions will return you the | ||
45 | * root for a _new_ array. If you've incremented the old root, via | ||
46 | * dm_tm_inc(), before calling the update function you may continue to use | ||
47 | * it in parallel with the new root. | ||
48 | * | ||
49 | * c) resize an array with dm_array_resize(). | ||
50 | * | ||
51 | * d) Get a value from the array with dm_array_get_value(). | ||
52 | * | ||
53 | * e) Set a value in the array with dm_array_set_value(). | ||
54 | * | ||
55 | * f) Walk an array of values in index order with dm_array_walk(). More | ||
56 | * efficient than making many calls to dm_array_get_value(). | ||
57 | * | ||
58 | * g) Destroy the array with dm_array_del(). This tells the transaction | ||
59 | * manager that you're no longer using this data structure so it can | ||
60 | * recycle it's blocks. (dm_array_dec() would be a better name for it, | ||
61 | * but del is in keeping with dm_btree_del()). | ||
62 | */ | ||
63 | |||
64 | /* | ||
65 | * Describes an array. Don't initialise this structure yourself, use the | ||
66 | * init function below. | ||
67 | */ | ||
68 | struct dm_array_info { | ||
69 | struct dm_transaction_manager *tm; | ||
70 | struct dm_btree_value_type value_type; | ||
71 | struct dm_btree_info btree_info; | ||
72 | }; | ||
73 | |||
74 | /* | ||
75 | * Sets up a dm_array_info structure. You don't need to do anything with | ||
76 | * this structure when you finish using it. | ||
77 | * | ||
78 | * info - the structure being filled in. | ||
79 | * tm - the transaction manager that should supervise this structure. | ||
80 | * vt - describes the leaf values. | ||
81 | */ | ||
82 | void dm_array_info_init(struct dm_array_info *info, | ||
83 | struct dm_transaction_manager *tm, | ||
84 | struct dm_btree_value_type *vt); | ||
85 | |||
86 | /* | ||
87 | * Create an empty, zero length array. | ||
88 | * | ||
89 | * info - describes the array | ||
90 | * root - on success this will be filled out with the root block | ||
91 | */ | ||
92 | int dm_array_empty(struct dm_array_info *info, dm_block_t *root); | ||
93 | |||
94 | /* | ||
95 | * Resizes the array. | ||
96 | * | ||
97 | * info - describes the array | ||
98 | * root - the root block of the array on disk | ||
99 | * old_size - the caller is responsible for remembering the size of | ||
100 | * the array | ||
101 | * new_size - can be bigger or smaller than old_size | ||
102 | * value - if we're growing the array the new entries will have this value | ||
103 | * new_root - on success, points to the new root block | ||
104 | * | ||
105 | * If growing the inc function for 'value' will be called the appropriate | ||
106 | * number of times. So if the caller is holding a reference they may want | ||
107 | * to drop it. | ||
108 | */ | ||
109 | int dm_array_resize(struct dm_array_info *info, dm_block_t root, | ||
110 | uint32_t old_size, uint32_t new_size, | ||
111 | const void *value, dm_block_t *new_root) | ||
112 | __dm_written_to_disk(value); | ||
113 | |||
114 | /* | ||
115 | * Frees a whole array. The value_type's decrement operation will be called | ||
116 | * for all values in the array | ||
117 | */ | ||
118 | int dm_array_del(struct dm_array_info *info, dm_block_t root); | ||
119 | |||
120 | /* | ||
121 | * Lookup a value in the array | ||
122 | * | ||
123 | * info - describes the array | ||
124 | * root - root block of the array | ||
125 | * index - array index | ||
126 | * value - the value to be read. Will be in on-disk format of course. | ||
127 | * | ||
128 | * -ENODATA will be returned if the index is out of bounds. | ||
129 | */ | ||
130 | int dm_array_get_value(struct dm_array_info *info, dm_block_t root, | ||
131 | uint32_t index, void *value); | ||
132 | |||
133 | /* | ||
134 | * Set an entry in the array. | ||
135 | * | ||
136 | * info - describes the array | ||
137 | * root - root block of the array | ||
138 | * index - array index | ||
139 | * value - value to be written to disk. Make sure you confirm the value is | ||
140 | * in on-disk format with__dm_bless_for_disk() before calling. | ||
141 | * new_root - the new root block | ||
142 | * | ||
143 | * The old value being overwritten will be decremented, the new value | ||
144 | * incremented. | ||
145 | * | ||
146 | * -ENODATA will be returned if the index is out of bounds. | ||
147 | */ | ||
148 | int dm_array_set_value(struct dm_array_info *info, dm_block_t root, | ||
149 | uint32_t index, const void *value, dm_block_t *new_root) | ||
150 | __dm_written_to_disk(value); | ||
151 | |||
152 | /* | ||
153 | * Walk through all the entries in an array. | ||
154 | * | ||
155 | * info - describes the array | ||
156 | * root - root block of the array | ||
157 | * fn - called back for every element | ||
158 | * context - passed to the callback | ||
159 | */ | ||
160 | int dm_array_walk(struct dm_array_info *info, dm_block_t root, | ||
161 | int (*fn)(void *context, uint64_t key, void *leaf), | ||
162 | void *context); | ||
163 | |||
164 | /*----------------------------------------------------------------*/ | ||
165 | |||
166 | #endif /* _LINUX_DM_ARRAY_H */ | ||