diff options
| author | Joe Thornber <ejt@redhat.com> | 2013-03-01 17:45:51 -0500 |
|---|---|---|
| committer | Alasdair G Kergon <agk@redhat.com> | 2013-03-01 17:45:51 -0500 |
| commit | 6513c29f44f2cc970c0e9fecfe5a6526c3e73025 (patch) | |
| tree | 5c501dceffb8a4c5c0a70bde68b084a171fee861 /drivers/md/persistent-data | |
| parent | 025b96853fe0bdc977d88b4242ca5e1f19d9bb66 (diff) | |
dm persistent data: add transactional array
Add a transactional array.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md/persistent-data')
| -rw-r--r-- | drivers/md/persistent-data/Makefile | 1 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-array.c | 808 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-array.h | 166 |
3 files changed, 975 insertions, 0 deletions
diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile index d8e7cb767c1e..ebd8d80d529c 100644 --- a/drivers/md/persistent-data/Makefile +++ b/drivers/md/persistent-data/Makefile | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o | 1 | obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o |
| 2 | dm-persistent-data-objs := \ | 2 | dm-persistent-data-objs := \ |
| 3 | dm-array.o \ | ||
| 3 | dm-block-manager.o \ | 4 | dm-block-manager.o \ |
| 4 | dm-space-map-common.o \ | 5 | dm-space-map-common.o \ |
| 5 | dm-space-map-disk.o \ | 6 | dm-space-map-disk.o \ |
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c new file mode 100644 index 000000000000..172147eb1d40 --- /dev/null +++ b/drivers/md/persistent-data/dm-array.c | |||
| @@ -0,0 +1,808 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2012 Red Hat, Inc. | ||
| 3 | * | ||
| 4 | * This file is released under the GPL. | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include "dm-array.h" | ||
| 8 | #include "dm-space-map.h" | ||
| 9 | #include "dm-transaction-manager.h" | ||
| 10 | |||
| 11 | #include <linux/export.h> | ||
| 12 | #include <linux/device-mapper.h> | ||
| 13 | |||
| 14 | #define DM_MSG_PREFIX "array" | ||
| 15 | |||
| 16 | /*----------------------------------------------------------------*/ | ||
| 17 | |||
| 18 | /* | ||
| 19 | * The array is implemented as a fully populated btree, which points to | ||
| 20 | * blocks that contain the packed values. This is more space efficient | ||
| 21 | * than just using a btree since we don't store 1 key per value. | ||
| 22 | */ | ||
| 23 | struct array_block { | ||
| 24 | __le32 csum; | ||
| 25 | __le32 max_entries; | ||
| 26 | __le32 nr_entries; | ||
| 27 | __le32 value_size; | ||
| 28 | __le64 blocknr; /* Block this node is supposed to live in. */ | ||
| 29 | } __packed; | ||
| 30 | |||
| 31 | /*----------------------------------------------------------------*/ | ||
| 32 | |||
| 33 | /* | ||
| 34 | * Validator methods. As usual we calculate a checksum, and also write the | ||
| 35 | * block location into the header (paranoia about ssds remapping areas by | ||
| 36 | * mistake). | ||
| 37 | */ | ||
| 38 | #define CSUM_XOR 595846735 | ||
| 39 | |||
| 40 | static void array_block_prepare_for_write(struct dm_block_validator *v, | ||
| 41 | struct dm_block *b, | ||
| 42 | size_t size_of_block) | ||
| 43 | { | ||
| 44 | struct array_block *bh_le = dm_block_data(b); | ||
| 45 | |||
| 46 | bh_le->blocknr = cpu_to_le64(dm_block_location(b)); | ||
| 47 | bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, | ||
| 48 | size_of_block - sizeof(__le32), | ||
| 49 | CSUM_XOR)); | ||
| 50 | } | ||
| 51 | |||
| 52 | static int array_block_check(struct dm_block_validator *v, | ||
| 53 | struct dm_block *b, | ||
| 54 | size_t size_of_block) | ||
| 55 | { | ||
| 56 | struct array_block *bh_le = dm_block_data(b); | ||
| 57 | __le32 csum_disk; | ||
| 58 | |||
| 59 | if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) { | ||
| 60 | DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu", | ||
| 61 | (unsigned long long) le64_to_cpu(bh_le->blocknr), | ||
| 62 | (unsigned long long) dm_block_location(b)); | ||
| 63 | return -ENOTBLK; | ||
| 64 | } | ||
| 65 | |||
| 66 | csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, | ||
| 67 | size_of_block - sizeof(__le32), | ||
| 68 | CSUM_XOR)); | ||
| 69 | if (csum_disk != bh_le->csum) { | ||
| 70 | DMERR_LIMIT("array_block_check failed: csum %u != wanted %u", | ||
| 71 | (unsigned) le32_to_cpu(csum_disk), | ||
| 72 | (unsigned) le32_to_cpu(bh_le->csum)); | ||
| 73 | return -EILSEQ; | ||
| 74 | } | ||
| 75 | |||
| 76 | return 0; | ||
| 77 | } | ||
| 78 | |||
| 79 | static struct dm_block_validator array_validator = { | ||
| 80 | .name = "array", | ||
| 81 | .prepare_for_write = array_block_prepare_for_write, | ||
| 82 | .check = array_block_check | ||
| 83 | }; | ||
| 84 | |||
| 85 | /*----------------------------------------------------------------*/ | ||
| 86 | |||
| 87 | /* | ||
| 88 | * Functions for manipulating the array blocks. | ||
| 89 | */ | ||
| 90 | |||
| 91 | /* | ||
| 92 | * Returns a pointer to a value within an array block. | ||
| 93 | * | ||
| 94 | * index - The index into _this_ specific block. | ||
| 95 | */ | ||
| 96 | static void *element_at(struct dm_array_info *info, struct array_block *ab, | ||
| 97 | unsigned index) | ||
| 98 | { | ||
| 99 | unsigned char *entry = (unsigned char *) (ab + 1); | ||
| 100 | |||
| 101 | entry += index * info->value_type.size; | ||
| 102 | |||
| 103 | return entry; | ||
| 104 | } | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Utility function that calls one of the value_type methods on every value | ||
| 108 | * in an array block. | ||
| 109 | */ | ||
| 110 | static void on_entries(struct dm_array_info *info, struct array_block *ab, | ||
| 111 | void (*fn)(void *, const void *)) | ||
| 112 | { | ||
| 113 | unsigned i, nr_entries = le32_to_cpu(ab->nr_entries); | ||
| 114 | |||
| 115 | for (i = 0; i < nr_entries; i++) | ||
| 116 | fn(info->value_type.context, element_at(info, ab, i)); | ||
| 117 | } | ||
| 118 | |||
| 119 | /* | ||
| 120 | * Increment every value in an array block. | ||
| 121 | */ | ||
| 122 | static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab) | ||
| 123 | { | ||
| 124 | struct dm_btree_value_type *vt = &info->value_type; | ||
| 125 | |||
| 126 | if (vt->inc) | ||
| 127 | on_entries(info, ab, vt->inc); | ||
| 128 | } | ||
| 129 | |||
| 130 | /* | ||
| 131 | * Decrement every value in an array block. | ||
| 132 | */ | ||
| 133 | static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab) | ||
| 134 | { | ||
| 135 | struct dm_btree_value_type *vt = &info->value_type; | ||
| 136 | |||
| 137 | if (vt->dec) | ||
| 138 | on_entries(info, ab, vt->dec); | ||
| 139 | } | ||
| 140 | |||
| 141 | /* | ||
| 142 | * Each array block can hold this many values. | ||
| 143 | */ | ||
| 144 | static uint32_t calc_max_entries(size_t value_size, size_t size_of_block) | ||
| 145 | { | ||
| 146 | return (size_of_block - sizeof(struct array_block)) / value_size; | ||
| 147 | } | ||
| 148 | |||
| 149 | /* | ||
| 150 | * Allocate a new array block. The caller will need to unlock block. | ||
| 151 | */ | ||
| 152 | static int alloc_ablock(struct dm_array_info *info, size_t size_of_block, | ||
| 153 | uint32_t max_entries, | ||
| 154 | struct dm_block **block, struct array_block **ab) | ||
| 155 | { | ||
| 156 | int r; | ||
| 157 | |||
| 158 | r = dm_tm_new_block(info->btree_info.tm, &array_validator, block); | ||
| 159 | if (r) | ||
| 160 | return r; | ||
| 161 | |||
| 162 | (*ab) = dm_block_data(*block); | ||
| 163 | (*ab)->max_entries = cpu_to_le32(max_entries); | ||
| 164 | (*ab)->nr_entries = cpu_to_le32(0); | ||
| 165 | (*ab)->value_size = cpu_to_le32(info->value_type.size); | ||
| 166 | |||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | |||
| 170 | /* | ||
| 171 | * Pad an array block out with a particular value. Every instance will | ||
| 172 | * cause an increment of the value_type. new_nr must always be more than | ||
| 173 | * the current number of entries. | ||
| 174 | */ | ||
| 175 | static void fill_ablock(struct dm_array_info *info, struct array_block *ab, | ||
| 176 | const void *value, unsigned new_nr) | ||
| 177 | { | ||
| 178 | unsigned i; | ||
| 179 | uint32_t nr_entries; | ||
| 180 | struct dm_btree_value_type *vt = &info->value_type; | ||
| 181 | |||
| 182 | BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); | ||
| 183 | BUG_ON(new_nr < le32_to_cpu(ab->nr_entries)); | ||
| 184 | |||
| 185 | nr_entries = le32_to_cpu(ab->nr_entries); | ||
| 186 | for (i = nr_entries; i < new_nr; i++) { | ||
| 187 | if (vt->inc) | ||
| 188 | vt->inc(vt->context, value); | ||
| 189 | memcpy(element_at(info, ab, i), value, vt->size); | ||
| 190 | } | ||
| 191 | ab->nr_entries = cpu_to_le32(new_nr); | ||
| 192 | } | ||
| 193 | |||
| 194 | /* | ||
| 195 | * Remove some entries from the back of an array block. Every value | ||
| 196 | * removed will be decremented. new_nr must be <= the current number of | ||
| 197 | * entries. | ||
| 198 | */ | ||
| 199 | static void trim_ablock(struct dm_array_info *info, struct array_block *ab, | ||
| 200 | unsigned new_nr) | ||
| 201 | { | ||
| 202 | unsigned i; | ||
| 203 | uint32_t nr_entries; | ||
| 204 | struct dm_btree_value_type *vt = &info->value_type; | ||
| 205 | |||
| 206 | BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); | ||
| 207 | BUG_ON(new_nr > le32_to_cpu(ab->nr_entries)); | ||
| 208 | |||
| 209 | nr_entries = le32_to_cpu(ab->nr_entries); | ||
| 210 | for (i = nr_entries; i > new_nr; i--) | ||
| 211 | if (vt->dec) | ||
| 212 | vt->dec(vt->context, element_at(info, ab, i - 1)); | ||
| 213 | ab->nr_entries = cpu_to_le32(new_nr); | ||
| 214 | } | ||
| 215 | |||
| 216 | /* | ||
| 217 | * Read locks a block, and coerces it to an array block. The caller must | ||
| 218 | * unlock 'block' when finished. | ||
| 219 | */ | ||
| 220 | static int get_ablock(struct dm_array_info *info, dm_block_t b, | ||
| 221 | struct dm_block **block, struct array_block **ab) | ||
| 222 | { | ||
| 223 | int r; | ||
| 224 | |||
| 225 | r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block); | ||
| 226 | if (r) | ||
| 227 | return r; | ||
| 228 | |||
| 229 | *ab = dm_block_data(*block); | ||
| 230 | return 0; | ||
| 231 | } | ||
| 232 | |||
| 233 | /* | ||
| 234 | * Unlocks an array block. | ||
| 235 | */ | ||
| 236 | static int unlock_ablock(struct dm_array_info *info, struct dm_block *block) | ||
| 237 | { | ||
| 238 | return dm_tm_unlock(info->btree_info.tm, block); | ||
| 239 | } | ||
| 240 | |||
| 241 | /*----------------------------------------------------------------*/ | ||
| 242 | |||
| 243 | /* | ||
| 244 | * Btree manipulation. | ||
| 245 | */ | ||
| 246 | |||
| 247 | /* | ||
| 248 | * Looks up an array block in the btree, and then read locks it. | ||
| 249 | * | ||
| 250 | * index is the index of the index of the array_block, (ie. the array index | ||
| 251 | * / max_entries). | ||
| 252 | */ | ||
| 253 | static int lookup_ablock(struct dm_array_info *info, dm_block_t root, | ||
| 254 | unsigned index, struct dm_block **block, | ||
| 255 | struct array_block **ab) | ||
| 256 | { | ||
| 257 | int r; | ||
| 258 | uint64_t key = index; | ||
| 259 | __le64 block_le; | ||
| 260 | |||
| 261 | r = dm_btree_lookup(&info->btree_info, root, &key, &block_le); | ||
| 262 | if (r) | ||
| 263 | return r; | ||
| 264 | |||
| 265 | return get_ablock(info, le64_to_cpu(block_le), block, ab); | ||
| 266 | } | ||
| 267 | |||
| 268 | /* | ||
| 269 | * Insert an array block into the btree. The block is _not_ unlocked. | ||
| 270 | */ | ||
| 271 | static int insert_ablock(struct dm_array_info *info, uint64_t index, | ||
| 272 | struct dm_block *block, dm_block_t *root) | ||
| 273 | { | ||
| 274 | __le64 block_le = cpu_to_le64(dm_block_location(block)); | ||
| 275 | |||
| 276 | __dm_bless_for_disk(block_le); | ||
| 277 | return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root); | ||
| 278 | } | ||
| 279 | |||
| 280 | /* | ||
| 281 | * Looks up an array block in the btree. Then shadows it, and updates the | ||
| 282 | * btree to point to this new shadow. 'root' is an input/output parameter | ||
| 283 | * for both the current root block, and the new one. | ||
| 284 | */ | ||
| 285 | static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, | ||
| 286 | unsigned index, struct dm_block **block, | ||
| 287 | struct array_block **ab) | ||
| 288 | { | ||
| 289 | int r, inc; | ||
| 290 | uint64_t key = index; | ||
| 291 | dm_block_t b; | ||
| 292 | __le64 block_le; | ||
| 293 | |||
| 294 | /* | ||
| 295 | * lookup | ||
| 296 | */ | ||
| 297 | r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le); | ||
| 298 | if (r) | ||
| 299 | return r; | ||
| 300 | b = le64_to_cpu(block_le); | ||
| 301 | |||
| 302 | /* | ||
| 303 | * shadow | ||
| 304 | */ | ||
| 305 | r = dm_tm_shadow_block(info->btree_info.tm, b, | ||
| 306 | &array_validator, block, &inc); | ||
| 307 | if (r) | ||
| 308 | return r; | ||
| 309 | |||
| 310 | *ab = dm_block_data(*block); | ||
| 311 | if (inc) | ||
| 312 | inc_ablock_entries(info, *ab); | ||
| 313 | |||
| 314 | /* | ||
| 315 | * Reinsert. | ||
| 316 | * | ||
| 317 | * The shadow op will often be a noop. Only insert if it really | ||
| 318 | * copied data. | ||
| 319 | */ | ||
| 320 | if (dm_block_location(*block) != b) | ||
| 321 | r = insert_ablock(info, index, *block, root); | ||
| 322 | |||
| 323 | return r; | ||
| 324 | } | ||
| 325 | |||
| 326 | /* | ||
| 327 | * Allocate an new array block, and fill it with some values. | ||
| 328 | */ | ||
| 329 | static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block, | ||
| 330 | uint32_t max_entries, | ||
| 331 | unsigned block_index, uint32_t nr, | ||
| 332 | const void *value, dm_block_t *root) | ||
| 333 | { | ||
| 334 | int r; | ||
| 335 | struct dm_block *block; | ||
| 336 | struct array_block *ab; | ||
| 337 | |||
| 338 | r = alloc_ablock(info, size_of_block, max_entries, &block, &ab); | ||
| 339 | if (r) | ||
| 340 | return r; | ||
| 341 | |||
| 342 | fill_ablock(info, ab, value, nr); | ||
| 343 | r = insert_ablock(info, block_index, block, root); | ||
| 344 | unlock_ablock(info, block); | ||
| 345 | |||
| 346 | return r; | ||
| 347 | } | ||
| 348 | |||
| 349 | static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block, | ||
| 350 | unsigned begin_block, unsigned end_block, | ||
| 351 | unsigned max_entries, const void *value, | ||
| 352 | dm_block_t *root) | ||
| 353 | { | ||
| 354 | int r = 0; | ||
| 355 | |||
| 356 | for (; !r && begin_block != end_block; begin_block++) | ||
| 357 | r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root); | ||
| 358 | |||
| 359 | return r; | ||
| 360 | } | ||
| 361 | |||
| 362 | /* | ||
| 363 | * There are a bunch of functions involved with resizing an array. This | ||
| 364 | * structure holds information that commonly needed by them. Purely here | ||
| 365 | * to reduce parameter count. | ||
| 366 | */ | ||
| 367 | struct resize { | ||
| 368 | /* | ||
| 369 | * Describes the array. | ||
| 370 | */ | ||
| 371 | struct dm_array_info *info; | ||
| 372 | |||
| 373 | /* | ||
| 374 | * The current root of the array. This gets updated. | ||
| 375 | */ | ||
| 376 | dm_block_t root; | ||
| 377 | |||
| 378 | /* | ||
| 379 | * Metadata block size. Used to calculate the nr entries in an | ||
| 380 | * array block. | ||
| 381 | */ | ||
| 382 | size_t size_of_block; | ||
| 383 | |||
| 384 | /* | ||
| 385 | * Maximum nr entries in an array block. | ||
| 386 | */ | ||
| 387 | unsigned max_entries; | ||
| 388 | |||
| 389 | /* | ||
| 390 | * nr of completely full blocks in the array. | ||
| 391 | * | ||
| 392 | * 'old' refers to before the resize, 'new' after. | ||
| 393 | */ | ||
| 394 | unsigned old_nr_full_blocks, new_nr_full_blocks; | ||
| 395 | |||
| 396 | /* | ||
| 397 | * Number of entries in the final block. 0 iff only full blocks in | ||
| 398 | * the array. | ||
| 399 | */ | ||
| 400 | unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block; | ||
| 401 | |||
| 402 | /* | ||
| 403 | * The default value used when growing the array. | ||
| 404 | */ | ||
| 405 | const void *value; | ||
| 406 | }; | ||
| 407 | |||
| 408 | /* | ||
| 409 | * Removes a consecutive set of array blocks from the btree. The values | ||
| 410 | * in block are decremented as a side effect of the btree remove. | ||
| 411 | * | ||
| 412 | * begin_index - the index of the first array block to remove. | ||
| 413 | * end_index - the one-past-the-end value. ie. this block is not removed. | ||
| 414 | */ | ||
| 415 | static int drop_blocks(struct resize *resize, unsigned begin_index, | ||
| 416 | unsigned end_index) | ||
| 417 | { | ||
| 418 | int r; | ||
| 419 | |||
| 420 | while (begin_index != end_index) { | ||
| 421 | uint64_t key = begin_index++; | ||
| 422 | r = dm_btree_remove(&resize->info->btree_info, resize->root, | ||
| 423 | &key, &resize->root); | ||
| 424 | if (r) | ||
| 425 | return r; | ||
| 426 | } | ||
| 427 | |||
| 428 | return 0; | ||
| 429 | } | ||
| 430 | |||
| 431 | /* | ||
| 432 | * Calculates how many blocks are needed for the array. | ||
| 433 | */ | ||
| 434 | static unsigned total_nr_blocks_needed(unsigned nr_full_blocks, | ||
| 435 | unsigned nr_entries_in_last_block) | ||
| 436 | { | ||
| 437 | return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0); | ||
| 438 | } | ||
| 439 | |||
| 440 | /* | ||
| 441 | * Shrink an array. | ||
| 442 | */ | ||
| 443 | static int shrink(struct resize *resize) | ||
| 444 | { | ||
| 445 | int r; | ||
| 446 | unsigned begin, end; | ||
| 447 | struct dm_block *block; | ||
| 448 | struct array_block *ab; | ||
| 449 | |||
| 450 | /* | ||
| 451 | * Lose some blocks from the back? | ||
| 452 | */ | ||
| 453 | if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) { | ||
| 454 | begin = total_nr_blocks_needed(resize->new_nr_full_blocks, | ||
| 455 | resize->new_nr_entries_in_last_block); | ||
| 456 | end = total_nr_blocks_needed(resize->old_nr_full_blocks, | ||
| 457 | resize->old_nr_entries_in_last_block); | ||
| 458 | |||
| 459 | r = drop_blocks(resize, begin, end); | ||
| 460 | if (r) | ||
| 461 | return r; | ||
| 462 | } | ||
| 463 | |||
| 464 | /* | ||
| 465 | * Trim the new tail block | ||
| 466 | */ | ||
| 467 | if (resize->new_nr_entries_in_last_block) { | ||
| 468 | r = shadow_ablock(resize->info, &resize->root, | ||
| 469 | resize->new_nr_full_blocks, &block, &ab); | ||
| 470 | if (r) | ||
| 471 | return r; | ||
| 472 | |||
| 473 | trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block); | ||
| 474 | unlock_ablock(resize->info, block); | ||
| 475 | } | ||
| 476 | |||
| 477 | return 0; | ||
| 478 | } | ||
| 479 | |||
| 480 | /* | ||
| 481 | * Grow an array. | ||
| 482 | */ | ||
| 483 | static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries) | ||
| 484 | { | ||
| 485 | int r; | ||
| 486 | struct dm_block *block; | ||
| 487 | struct array_block *ab; | ||
| 488 | |||
| 489 | r = shadow_ablock(resize->info, &resize->root, | ||
| 490 | resize->old_nr_full_blocks, &block, &ab); | ||
| 491 | if (r) | ||
| 492 | return r; | ||
| 493 | |||
| 494 | fill_ablock(resize->info, ab, resize->value, new_nr_entries); | ||
| 495 | unlock_ablock(resize->info, block); | ||
| 496 | |||
| 497 | return r; | ||
| 498 | } | ||
| 499 | |||
| 500 | static int grow_add_tail_block(struct resize *resize) | ||
| 501 | { | ||
| 502 | return insert_new_ablock(resize->info, resize->size_of_block, | ||
| 503 | resize->max_entries, | ||
| 504 | resize->new_nr_full_blocks, | ||
| 505 | resize->new_nr_entries_in_last_block, | ||
| 506 | resize->value, &resize->root); | ||
| 507 | } | ||
| 508 | |||
| 509 | static int grow_needs_more_blocks(struct resize *resize) | ||
| 510 | { | ||
| 511 | int r; | ||
| 512 | |||
| 513 | if (resize->old_nr_entries_in_last_block > 0) { | ||
| 514 | r = grow_extend_tail_block(resize, resize->max_entries); | ||
| 515 | if (r) | ||
| 516 | return r; | ||
| 517 | } | ||
| 518 | |||
| 519 | r = insert_full_ablocks(resize->info, resize->size_of_block, | ||
| 520 | resize->old_nr_full_blocks, | ||
| 521 | resize->new_nr_full_blocks, | ||
| 522 | resize->max_entries, resize->value, | ||
| 523 | &resize->root); | ||
| 524 | if (r) | ||
| 525 | return r; | ||
| 526 | |||
| 527 | if (resize->new_nr_entries_in_last_block) | ||
| 528 | r = grow_add_tail_block(resize); | ||
| 529 | |||
| 530 | return r; | ||
| 531 | } | ||
| 532 | |||
| 533 | static int grow(struct resize *resize) | ||
| 534 | { | ||
| 535 | if (resize->new_nr_full_blocks > resize->old_nr_full_blocks) | ||
| 536 | return grow_needs_more_blocks(resize); | ||
| 537 | |||
| 538 | else if (resize->old_nr_entries_in_last_block) | ||
| 539 | return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block); | ||
| 540 | |||
| 541 | else | ||
| 542 | return grow_add_tail_block(resize); | ||
| 543 | } | ||
| 544 | |||
| 545 | /*----------------------------------------------------------------*/ | ||
| 546 | |||
| 547 | /* | ||
| 548 | * These are the value_type functions for the btree elements, which point | ||
| 549 | * to array blocks. | ||
| 550 | */ | ||
| 551 | static void block_inc(void *context, const void *value) | ||
| 552 | { | ||
| 553 | __le64 block_le; | ||
| 554 | struct dm_array_info *info = context; | ||
| 555 | |||
| 556 | memcpy(&block_le, value, sizeof(block_le)); | ||
| 557 | dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le)); | ||
| 558 | } | ||
| 559 | |||
| 560 | static void block_dec(void *context, const void *value) | ||
| 561 | { | ||
| 562 | int r; | ||
| 563 | uint64_t b; | ||
| 564 | __le64 block_le; | ||
| 565 | uint32_t ref_count; | ||
| 566 | struct dm_block *block; | ||
| 567 | struct array_block *ab; | ||
| 568 | struct dm_array_info *info = context; | ||
| 569 | |||
| 570 | memcpy(&block_le, value, sizeof(block_le)); | ||
| 571 | b = le64_to_cpu(block_le); | ||
| 572 | |||
| 573 | r = dm_tm_ref(info->btree_info.tm, b, &ref_count); | ||
| 574 | if (r) { | ||
| 575 | DMERR_LIMIT("couldn't get reference count for block %llu", | ||
| 576 | (unsigned long long) b); | ||
| 577 | return; | ||
| 578 | } | ||
| 579 | |||
| 580 | if (ref_count == 1) { | ||
| 581 | /* | ||
| 582 | * We're about to drop the last reference to this ablock. | ||
| 583 | * So we need to decrement the ref count of the contents. | ||
| 584 | */ | ||
| 585 | r = get_ablock(info, b, &block, &ab); | ||
| 586 | if (r) { | ||
| 587 | DMERR_LIMIT("couldn't get array block %llu", | ||
| 588 | (unsigned long long) b); | ||
| 589 | return; | ||
| 590 | } | ||
| 591 | |||
| 592 | dec_ablock_entries(info, ab); | ||
| 593 | unlock_ablock(info, block); | ||
| 594 | } | ||
| 595 | |||
| 596 | dm_tm_dec(info->btree_info.tm, b); | ||
| 597 | } | ||
| 598 | |||
| 599 | static int block_equal(void *context, const void *value1, const void *value2) | ||
| 600 | { | ||
| 601 | return !memcmp(value1, value2, sizeof(__le64)); | ||
| 602 | } | ||
| 603 | |||
| 604 | /*----------------------------------------------------------------*/ | ||
| 605 | |||
| 606 | void dm_array_info_init(struct dm_array_info *info, | ||
| 607 | struct dm_transaction_manager *tm, | ||
| 608 | struct dm_btree_value_type *vt) | ||
| 609 | { | ||
| 610 | struct dm_btree_value_type *bvt = &info->btree_info.value_type; | ||
| 611 | |||
| 612 | memcpy(&info->value_type, vt, sizeof(info->value_type)); | ||
| 613 | info->btree_info.tm = tm; | ||
| 614 | info->btree_info.levels = 1; | ||
| 615 | |||
| 616 | bvt->context = info; | ||
| 617 | bvt->size = sizeof(__le64); | ||
| 618 | bvt->inc = block_inc; | ||
| 619 | bvt->dec = block_dec; | ||
| 620 | bvt->equal = block_equal; | ||
| 621 | } | ||
| 622 | EXPORT_SYMBOL_GPL(dm_array_info_init); | ||
| 623 | |||
| 624 | int dm_array_empty(struct dm_array_info *info, dm_block_t *root) | ||
| 625 | { | ||
| 626 | return dm_btree_empty(&info->btree_info, root); | ||
| 627 | } | ||
| 628 | EXPORT_SYMBOL_GPL(dm_array_empty); | ||
| 629 | |||
| 630 | static int array_resize(struct dm_array_info *info, dm_block_t root, | ||
| 631 | uint32_t old_size, uint32_t new_size, | ||
| 632 | const void *value, dm_block_t *new_root) | ||
| 633 | { | ||
| 634 | int r; | ||
| 635 | struct resize resize; | ||
| 636 | |||
| 637 | if (old_size == new_size) | ||
| 638 | return 0; | ||
| 639 | |||
| 640 | resize.info = info; | ||
| 641 | resize.root = root; | ||
| 642 | resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); | ||
| 643 | resize.max_entries = calc_max_entries(info->value_type.size, | ||
| 644 | resize.size_of_block); | ||
| 645 | |||
| 646 | resize.old_nr_full_blocks = old_size / resize.max_entries; | ||
| 647 | resize.old_nr_entries_in_last_block = old_size % resize.max_entries; | ||
| 648 | resize.new_nr_full_blocks = new_size / resize.max_entries; | ||
| 649 | resize.new_nr_entries_in_last_block = new_size % resize.max_entries; | ||
| 650 | resize.value = value; | ||
| 651 | |||
| 652 | r = ((new_size > old_size) ? grow : shrink)(&resize); | ||
| 653 | if (r) | ||
| 654 | return r; | ||
| 655 | |||
| 656 | *new_root = resize.root; | ||
| 657 | return 0; | ||
| 658 | } | ||
| 659 | |||
| 660 | int dm_array_resize(struct dm_array_info *info, dm_block_t root, | ||
| 661 | uint32_t old_size, uint32_t new_size, | ||
| 662 | const void *value, dm_block_t *new_root) | ||
| 663 | __dm_written_to_disk(value) | ||
| 664 | { | ||
| 665 | int r = array_resize(info, root, old_size, new_size, value, new_root); | ||
| 666 | __dm_unbless_for_disk(value); | ||
| 667 | return r; | ||
| 668 | } | ||
| 669 | EXPORT_SYMBOL_GPL(dm_array_resize); | ||
| 670 | |||
| 671 | int dm_array_del(struct dm_array_info *info, dm_block_t root) | ||
| 672 | { | ||
| 673 | return dm_btree_del(&info->btree_info, root); | ||
| 674 | } | ||
| 675 | EXPORT_SYMBOL_GPL(dm_array_del); | ||
| 676 | |||
| 677 | int dm_array_get_value(struct dm_array_info *info, dm_block_t root, | ||
| 678 | uint32_t index, void *value_le) | ||
| 679 | { | ||
| 680 | int r; | ||
| 681 | struct dm_block *block; | ||
| 682 | struct array_block *ab; | ||
| 683 | size_t size_of_block; | ||
| 684 | unsigned entry, max_entries; | ||
| 685 | |||
| 686 | size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); | ||
| 687 | max_entries = calc_max_entries(info->value_type.size, size_of_block); | ||
| 688 | |||
| 689 | r = lookup_ablock(info, root, index / max_entries, &block, &ab); | ||
| 690 | if (r) | ||
| 691 | return r; | ||
| 692 | |||
| 693 | entry = index % max_entries; | ||
| 694 | if (entry >= le32_to_cpu(ab->nr_entries)) | ||
| 695 | r = -ENODATA; | ||
| 696 | else | ||
| 697 | memcpy(value_le, element_at(info, ab, entry), | ||
| 698 | info->value_type.size); | ||
| 699 | |||
| 700 | unlock_ablock(info, block); | ||
| 701 | return r; | ||
| 702 | } | ||
| 703 | EXPORT_SYMBOL_GPL(dm_array_get_value); | ||
| 704 | |||
| 705 | static int array_set_value(struct dm_array_info *info, dm_block_t root, | ||
| 706 | uint32_t index, const void *value, dm_block_t *new_root) | ||
| 707 | { | ||
| 708 | int r; | ||
| 709 | struct dm_block *block; | ||
| 710 | struct array_block *ab; | ||
| 711 | size_t size_of_block; | ||
| 712 | unsigned max_entries; | ||
| 713 | unsigned entry; | ||
| 714 | void *old_value; | ||
| 715 | struct dm_btree_value_type *vt = &info->value_type; | ||
| 716 | |||
| 717 | size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); | ||
| 718 | max_entries = calc_max_entries(info->value_type.size, size_of_block); | ||
| 719 | |||
| 720 | r = shadow_ablock(info, &root, index / max_entries, &block, &ab); | ||
| 721 | if (r) | ||
| 722 | return r; | ||
| 723 | *new_root = root; | ||
| 724 | |||
| 725 | entry = index % max_entries; | ||
| 726 | if (entry >= le32_to_cpu(ab->nr_entries)) { | ||
| 727 | r = -ENODATA; | ||
| 728 | goto out; | ||
| 729 | } | ||
| 730 | |||
| 731 | old_value = element_at(info, ab, entry); | ||
| 732 | if (vt->dec && | ||
| 733 | (!vt->equal || !vt->equal(vt->context, old_value, value))) { | ||
| 734 | vt->dec(vt->context, old_value); | ||
| 735 | if (vt->inc) | ||
| 736 | vt->inc(vt->context, value); | ||
| 737 | } | ||
| 738 | |||
| 739 | memcpy(old_value, value, info->value_type.size); | ||
| 740 | |||
| 741 | out: | ||
| 742 | unlock_ablock(info, block); | ||
| 743 | return r; | ||
| 744 | } | ||
| 745 | |||
| 746 | int dm_array_set_value(struct dm_array_info *info, dm_block_t root, | ||
| 747 | uint32_t index, const void *value, dm_block_t *new_root) | ||
| 748 | __dm_written_to_disk(value) | ||
| 749 | { | ||
| 750 | int r; | ||
| 751 | |||
| 752 | r = array_set_value(info, root, index, value, new_root); | ||
| 753 | __dm_unbless_for_disk(value); | ||
| 754 | return r; | ||
| 755 | } | ||
| 756 | EXPORT_SYMBOL_GPL(dm_array_set_value); | ||
| 757 | |||
| 758 | struct walk_info { | ||
| 759 | struct dm_array_info *info; | ||
| 760 | int (*fn)(void *context, uint64_t key, void *leaf); | ||
| 761 | void *context; | ||
| 762 | }; | ||
| 763 | |||
| 764 | static int walk_ablock(void *context, uint64_t *keys, void *leaf) | ||
| 765 | { | ||
| 766 | struct walk_info *wi = context; | ||
| 767 | |||
| 768 | int r; | ||
| 769 | unsigned i; | ||
| 770 | __le64 block_le; | ||
| 771 | unsigned nr_entries, max_entries; | ||
| 772 | struct dm_block *block; | ||
| 773 | struct array_block *ab; | ||
| 774 | |||
| 775 | memcpy(&block_le, leaf, sizeof(block_le)); | ||
| 776 | r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab); | ||
| 777 | if (r) | ||
| 778 | return r; | ||
| 779 | |||
| 780 | max_entries = le32_to_cpu(ab->max_entries); | ||
| 781 | nr_entries = le32_to_cpu(ab->nr_entries); | ||
| 782 | for (i = 0; i < nr_entries; i++) { | ||
| 783 | r = wi->fn(wi->context, keys[0] * max_entries + i, | ||
| 784 | element_at(wi->info, ab, i)); | ||
| 785 | |||
| 786 | if (r) | ||
| 787 | break; | ||
| 788 | } | ||
| 789 | |||
| 790 | unlock_ablock(wi->info, block); | ||
| 791 | return r; | ||
| 792 | } | ||
| 793 | |||
| 794 | int dm_array_walk(struct dm_array_info *info, dm_block_t root, | ||
| 795 | int (*fn)(void *, uint64_t key, void *leaf), | ||
| 796 | void *context) | ||
| 797 | { | ||
| 798 | struct walk_info wi; | ||
| 799 | |||
| 800 | wi.info = info; | ||
| 801 | wi.fn = fn; | ||
| 802 | wi.context = context; | ||
| 803 | |||
| 804 | return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi); | ||
| 805 | } | ||
| 806 | EXPORT_SYMBOL_GPL(dm_array_walk); | ||
| 807 | |||
| 808 | /*----------------------------------------------------------------*/ | ||
diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h new file mode 100644 index 000000000000..ea177d6fa58f --- /dev/null +++ b/drivers/md/persistent-data/dm-array.h | |||
| @@ -0,0 +1,166 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2012 Red Hat, Inc. | ||
| 3 | * | ||
| 4 | * This file is released under the GPL. | ||
| 5 | */ | ||
| 6 | #ifndef _LINUX_DM_ARRAY_H | ||
| 7 | #define _LINUX_DM_ARRAY_H | ||
| 8 | |||
| 9 | #include "dm-btree.h" | ||
| 10 | |||
| 11 | /*----------------------------------------------------------------*/ | ||
| 12 | |||
| 13 | /* | ||
| 14 | * The dm-array is a persistent version of an array. It packs the data | ||
| 15 | * more efficiently than a btree which will result in less disk space use, | ||
| 16 | * and a performance boost. The element get and set operations are still | ||
| 17 | * O(ln(n)), but with a much smaller constant. | ||
| 18 | * | ||
| 19 | * The value type structure is reused from the btree type to support proper | ||
| 20 | * reference counting of values. | ||
| 21 | * | ||
| 22 | * The arrays implicitly know their length, and bounds are checked for | ||
| 23 | * lookups and updated. It doesn't store this in an accessible place | ||
| 24 | * because it would waste a whole metadata block. Make sure you store the | ||
| 25 | * size along with the array root in your encompassing data. | ||
| 26 | * | ||
| 27 | * Array entries are indexed via an unsigned integer starting from zero. | ||
| 28 | * Arrays are not sparse; if you resize an array to have 'n' entries then | ||
| 29 | * 'n - 1' will be the last valid index. | ||
| 30 | * | ||
| 31 | * Typical use: | ||
| 32 | * | ||
| 33 | * a) initialise a dm_array_info structure. This describes the array | ||
| 34 | * values and ties it into a specific transaction manager. It holds no | ||
| 35 | * instance data; the same info can be used for many similar arrays if | ||
| 36 | * you wish. | ||
| 37 | * | ||
| 38 | * b) Get yourself a root. The root is the index of a block of data on the | ||
| 39 | * disk that holds a particular instance of an array. You may have a | ||
| 40 | * pre existing root in your metadata that you wish to use, or you may | ||
| 41 | * want to create a brand new, empty array with dm_array_empty(). | ||
| 42 | * | ||
| 43 | * Like the other data structures in this library, dm_array objects are | ||
| 44 | * immutable between transactions. Update functions will return you the | ||
| 45 | * root for a _new_ array. If you've incremented the old root, via | ||
| 46 | * dm_tm_inc(), before calling the update function you may continue to use | ||
| 47 | * it in parallel with the new root. | ||
| 48 | * | ||
| 49 | * c) resize an array with dm_array_resize(). | ||
| 50 | * | ||
| 51 | * d) Get a value from the array with dm_array_get_value(). | ||
| 52 | * | ||
| 53 | * e) Set a value in the array with dm_array_set_value(). | ||
| 54 | * | ||
| 55 | * f) Walk an array of values in index order with dm_array_walk(). More | ||
| 56 | * efficient than making many calls to dm_array_get_value(). | ||
| 57 | * | ||
| 58 | * g) Destroy the array with dm_array_del(). This tells the transaction | ||
| 59 | * manager that you're no longer using this data structure so it can | ||
| 60 | * recycle it's blocks. (dm_array_dec() would be a better name for it, | ||
| 61 | * but del is in keeping with dm_btree_del()). | ||
| 62 | */ | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Describes an array. Don't initialise this structure yourself, use the | ||
| 66 | * init function below. | ||
| 67 | */ | ||
| 68 | struct dm_array_info { | ||
| 69 | struct dm_transaction_manager *tm; | ||
| 70 | struct dm_btree_value_type value_type; | ||
| 71 | struct dm_btree_info btree_info; | ||
| 72 | }; | ||
| 73 | |||
| 74 | /* | ||
| 75 | * Sets up a dm_array_info structure. You don't need to do anything with | ||
| 76 | * this structure when you finish using it. | ||
| 77 | * | ||
| 78 | * info - the structure being filled in. | ||
| 79 | * tm - the transaction manager that should supervise this structure. | ||
| 80 | * vt - describes the leaf values. | ||
| 81 | */ | ||
| 82 | void dm_array_info_init(struct dm_array_info *info, | ||
| 83 | struct dm_transaction_manager *tm, | ||
| 84 | struct dm_btree_value_type *vt); | ||
| 85 | |||
| 86 | /* | ||
| 87 | * Create an empty, zero length array. | ||
| 88 | * | ||
| 89 | * info - describes the array | ||
| 90 | * root - on success this will be filled out with the root block | ||
| 91 | */ | ||
| 92 | int dm_array_empty(struct dm_array_info *info, dm_block_t *root); | ||
| 93 | |||
| 94 | /* | ||
| 95 | * Resizes the array. | ||
| 96 | * | ||
| 97 | * info - describes the array | ||
| 98 | * root - the root block of the array on disk | ||
| 99 | * old_size - the caller is responsible for remembering the size of | ||
| 100 | * the array | ||
| 101 | * new_size - can be bigger or smaller than old_size | ||
| 102 | * value - if we're growing the array the new entries will have this value | ||
| 103 | * new_root - on success, points to the new root block | ||
| 104 | * | ||
| 105 | * If growing the inc function for 'value' will be called the appropriate | ||
| 106 | * number of times. So if the caller is holding a reference they may want | ||
| 107 | * to drop it. | ||
| 108 | */ | ||
| 109 | int dm_array_resize(struct dm_array_info *info, dm_block_t root, | ||
| 110 | uint32_t old_size, uint32_t new_size, | ||
| 111 | const void *value, dm_block_t *new_root) | ||
| 112 | __dm_written_to_disk(value); | ||
| 113 | |||
| 114 | /* | ||
| 115 | * Frees a whole array. The value_type's decrement operation will be called | ||
| 116 | * for all values in the array | ||
| 117 | */ | ||
| 118 | int dm_array_del(struct dm_array_info *info, dm_block_t root); | ||
| 119 | |||
| 120 | /* | ||
| 121 | * Lookup a value in the array | ||
| 122 | * | ||
| 123 | * info - describes the array | ||
| 124 | * root - root block of the array | ||
| 125 | * index - array index | ||
| 126 | * value - the value to be read. Will be in on-disk format of course. | ||
| 127 | * | ||
| 128 | * -ENODATA will be returned if the index is out of bounds. | ||
| 129 | */ | ||
| 130 | int dm_array_get_value(struct dm_array_info *info, dm_block_t root, | ||
| 131 | uint32_t index, void *value); | ||
| 132 | |||
| 133 | /* | ||
| 134 | * Set an entry in the array. | ||
| 135 | * | ||
| 136 | * info - describes the array | ||
| 137 | * root - root block of the array | ||
| 138 | * index - array index | ||
| 139 | * value - value to be written to disk. Make sure you confirm the value is | ||
| 140 | * in on-disk format with__dm_bless_for_disk() before calling. | ||
| 141 | * new_root - the new root block | ||
| 142 | * | ||
| 143 | * The old value being overwritten will be decremented, the new value | ||
| 144 | * incremented. | ||
| 145 | * | ||
| 146 | * -ENODATA will be returned if the index is out of bounds. | ||
| 147 | */ | ||
| 148 | int dm_array_set_value(struct dm_array_info *info, dm_block_t root, | ||
| 149 | uint32_t index, const void *value, dm_block_t *new_root) | ||
| 150 | __dm_written_to_disk(value); | ||
| 151 | |||
| 152 | /* | ||
| 153 | * Walk through all the entries in an array. | ||
| 154 | * | ||
| 155 | * info - describes the array | ||
| 156 | * root - root block of the array | ||
| 157 | * fn - called back for every element | ||
| 158 | * context - passed to the callback | ||
| 159 | */ | ||
| 160 | int dm_array_walk(struct dm_array_info *info, dm_block_t root, | ||
| 161 | int (*fn)(void *context, uint64_t key, void *leaf), | ||
| 162 | void *context); | ||
| 163 | |||
| 164 | /*----------------------------------------------------------------*/ | ||
| 165 | |||
| 166 | #endif /* _LINUX_DM_ARRAY_H */ | ||
