diff options
Diffstat (limited to 'drivers/md/dm-thin-metadata.c')
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 1391 |
1 files changed, 1391 insertions, 0 deletions
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c new file mode 100644 index 00000000000..59c4f0446ff --- /dev/null +++ b/drivers/md/dm-thin-metadata.c | |||
@@ -0,0 +1,1391 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #include "dm-thin-metadata.h" | ||
8 | #include "persistent-data/dm-btree.h" | ||
9 | #include "persistent-data/dm-space-map.h" | ||
10 | #include "persistent-data/dm-space-map-disk.h" | ||
11 | #include "persistent-data/dm-transaction-manager.h" | ||
12 | |||
13 | #include <linux/list.h> | ||
14 | #include <linux/device-mapper.h> | ||
15 | #include <linux/workqueue.h> | ||
16 | |||
17 | /*-------------------------------------------------------------------------- | ||
18 | * As far as the metadata goes, there is: | ||
19 | * | ||
20 | * - A superblock in block zero, taking up fewer than 512 bytes for | ||
21 | * atomic writes. | ||
22 | * | ||
23 | * - A space map managing the metadata blocks. | ||
24 | * | ||
25 | * - A space map managing the data blocks. | ||
26 | * | ||
27 | * - A btree mapping our internal thin dev ids onto struct disk_device_details. | ||
28 | * | ||
29 | * - A hierarchical btree, with 2 levels which effectively maps (thin | ||
30 | * dev id, virtual block) -> block_time. Block time is a 64-bit | ||
31 | * field holding the time in the low 24 bits, and block in the top 48 | ||
32 | * bits. | ||
33 | * | ||
34 | * BTrees consist solely of btree_nodes, that fill a block. Some are | ||
35 | * internal nodes, as such their values are a __le64 pointing to other | ||
36 | * nodes. Leaf nodes can store data of any reasonable size (ie. much | ||
37 | * smaller than the block size). The nodes consist of the header, | ||
38 | * followed by an array of keys, followed by an array of values. We have | ||
39 | * to binary search on the keys so they're all held together to help the | ||
40 | * cpu cache. | ||
41 | * | ||
42 | * Space maps have 2 btrees: | ||
43 | * | ||
44 | * - One maps a uint64_t onto a struct index_entry. Which points to a | ||
45 | * bitmap block, and has some details about how many free entries there | ||
46 | * are etc. | ||
47 | * | ||
48 | * - The bitmap blocks have a header (for the checksum). Then the rest | ||
49 | * of the block is pairs of bits. With the meaning being: | ||
50 | * | ||
51 | * 0 - ref count is 0 | ||
52 | * 1 - ref count is 1 | ||
53 | * 2 - ref count is 2 | ||
54 | * 3 - ref count is higher than 2 | ||
55 | * | ||
56 | * - If the count is higher than 2 then the ref count is entered in a | ||
57 | * second btree that directly maps the block_address to a uint32_t ref | ||
58 | * count. | ||
59 | * | ||
60 | * The space map metadata variant doesn't have a bitmaps btree. Instead | ||
61 | * it has one single blocks worth of index_entries. This avoids | ||
62 | * recursive issues with the bitmap btree needing to allocate space in | ||
63 | * order to insert. With a small data block size such as 64k the | ||
64 | * metadata support data devices that are hundreds of terrabytes. | ||
65 | * | ||
66 | * The space maps allocate space linearly from front to back. Space that | ||
67 | * is freed in a transaction is never recycled within that transaction. | ||
68 | * To try and avoid fragmenting _free_ space the allocator always goes | ||
69 | * back and fills in gaps. | ||
70 | * | ||
71 | * All metadata io is in THIN_METADATA_BLOCK_SIZE sized/aligned chunks | ||
72 | * from the block manager. | ||
73 | *--------------------------------------------------------------------------*/ | ||
74 | |||
75 | #define DM_MSG_PREFIX "thin metadata" | ||
76 | |||
77 | #define THIN_SUPERBLOCK_MAGIC 27022010 | ||
78 | #define THIN_SUPERBLOCK_LOCATION 0 | ||
79 | #define THIN_VERSION 1 | ||
80 | #define THIN_METADATA_CACHE_SIZE 64 | ||
81 | #define SECTOR_TO_BLOCK_SHIFT 3 | ||
82 | |||
83 | /* This should be plenty */ | ||
84 | #define SPACE_MAP_ROOT_SIZE 128 | ||
85 | |||
86 | /* | ||
87 | * Little endian on-disk superblock and device details. | ||
88 | */ | ||
89 | struct thin_disk_superblock { | ||
90 | __le32 csum; /* Checksum of superblock except for this field. */ | ||
91 | __le32 flags; | ||
92 | __le64 blocknr; /* This block number, dm_block_t. */ | ||
93 | |||
94 | __u8 uuid[16]; | ||
95 | __le64 magic; | ||
96 | __le32 version; | ||
97 | __le32 time; | ||
98 | |||
99 | __le64 trans_id; | ||
100 | |||
101 | /* | ||
102 | * Root held by userspace transactions. | ||
103 | */ | ||
104 | __le64 held_root; | ||
105 | |||
106 | __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE]; | ||
107 | __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; | ||
108 | |||
109 | /* | ||
110 | * 2-level btree mapping (dev_id, (dev block, time)) -> data block | ||
111 | */ | ||
112 | __le64 data_mapping_root; | ||
113 | |||
114 | /* | ||
115 | * Device detail root mapping dev_id -> device_details | ||
116 | */ | ||
117 | __le64 device_details_root; | ||
118 | |||
119 | __le32 data_block_size; /* In 512-byte sectors. */ | ||
120 | |||
121 | __le32 metadata_block_size; /* In 512-byte sectors. */ | ||
122 | __le64 metadata_nr_blocks; | ||
123 | |||
124 | __le32 compat_flags; | ||
125 | __le32 compat_ro_flags; | ||
126 | __le32 incompat_flags; | ||
127 | } __packed; | ||
128 | |||
129 | struct disk_device_details { | ||
130 | __le64 mapped_blocks; | ||
131 | __le64 transaction_id; /* When created. */ | ||
132 | __le32 creation_time; | ||
133 | __le32 snapshotted_time; | ||
134 | } __packed; | ||
135 | |||
136 | struct dm_pool_metadata { | ||
137 | struct hlist_node hash; | ||
138 | |||
139 | struct block_device *bdev; | ||
140 | struct dm_block_manager *bm; | ||
141 | struct dm_space_map *metadata_sm; | ||
142 | struct dm_space_map *data_sm; | ||
143 | struct dm_transaction_manager *tm; | ||
144 | struct dm_transaction_manager *nb_tm; | ||
145 | |||
146 | /* | ||
147 | * Two-level btree. | ||
148 | * First level holds thin_dev_t. | ||
149 | * Second level holds mappings. | ||
150 | */ | ||
151 | struct dm_btree_info info; | ||
152 | |||
153 | /* | ||
154 | * Non-blocking version of the above. | ||
155 | */ | ||
156 | struct dm_btree_info nb_info; | ||
157 | |||
158 | /* | ||
159 | * Just the top level for deleting whole devices. | ||
160 | */ | ||
161 | struct dm_btree_info tl_info; | ||
162 | |||
163 | /* | ||
164 | * Just the bottom level for creating new devices. | ||
165 | */ | ||
166 | struct dm_btree_info bl_info; | ||
167 | |||
168 | /* | ||
169 | * Describes the device details btree. | ||
170 | */ | ||
171 | struct dm_btree_info details_info; | ||
172 | |||
173 | struct rw_semaphore root_lock; | ||
174 | uint32_t time; | ||
175 | int need_commit; | ||
176 | dm_block_t root; | ||
177 | dm_block_t details_root; | ||
178 | struct list_head thin_devices; | ||
179 | uint64_t trans_id; | ||
180 | unsigned long flags; | ||
181 | sector_t data_block_size; | ||
182 | }; | ||
183 | |||
184 | struct dm_thin_device { | ||
185 | struct list_head list; | ||
186 | struct dm_pool_metadata *pmd; | ||
187 | dm_thin_id id; | ||
188 | |||
189 | int open_count; | ||
190 | int changed; | ||
191 | uint64_t mapped_blocks; | ||
192 | uint64_t transaction_id; | ||
193 | uint32_t creation_time; | ||
194 | uint32_t snapshotted_time; | ||
195 | }; | ||
196 | |||
197 | /*---------------------------------------------------------------- | ||
198 | * superblock validator | ||
199 | *--------------------------------------------------------------*/ | ||
200 | |||
201 | #define SUPERBLOCK_CSUM_XOR 160774 | ||
202 | |||
203 | static void sb_prepare_for_write(struct dm_block_validator *v, | ||
204 | struct dm_block *b, | ||
205 | size_t block_size) | ||
206 | { | ||
207 | struct thin_disk_superblock *disk_super = dm_block_data(b); | ||
208 | |||
209 | disk_super->blocknr = cpu_to_le64(dm_block_location(b)); | ||
210 | disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags, | ||
211 | block_size - sizeof(__le32), | ||
212 | SUPERBLOCK_CSUM_XOR)); | ||
213 | } | ||
214 | |||
215 | static int sb_check(struct dm_block_validator *v, | ||
216 | struct dm_block *b, | ||
217 | size_t block_size) | ||
218 | { | ||
219 | struct thin_disk_superblock *disk_super = dm_block_data(b); | ||
220 | __le32 csum_le; | ||
221 | |||
222 | if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) { | ||
223 | DMERR("sb_check failed: blocknr %llu: " | ||
224 | "wanted %llu", le64_to_cpu(disk_super->blocknr), | ||
225 | (unsigned long long)dm_block_location(b)); | ||
226 | return -ENOTBLK; | ||
227 | } | ||
228 | |||
229 | if (le64_to_cpu(disk_super->magic) != THIN_SUPERBLOCK_MAGIC) { | ||
230 | DMERR("sb_check failed: magic %llu: " | ||
231 | "wanted %llu", le64_to_cpu(disk_super->magic), | ||
232 | (unsigned long long)THIN_SUPERBLOCK_MAGIC); | ||
233 | return -EILSEQ; | ||
234 | } | ||
235 | |||
236 | csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags, | ||
237 | block_size - sizeof(__le32), | ||
238 | SUPERBLOCK_CSUM_XOR)); | ||
239 | if (csum_le != disk_super->csum) { | ||
240 | DMERR("sb_check failed: csum %u: wanted %u", | ||
241 | le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum)); | ||
242 | return -EILSEQ; | ||
243 | } | ||
244 | |||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | static struct dm_block_validator sb_validator = { | ||
249 | .name = "superblock", | ||
250 | .prepare_for_write = sb_prepare_for_write, | ||
251 | .check = sb_check | ||
252 | }; | ||
253 | |||
254 | /*---------------------------------------------------------------- | ||
255 | * Methods for the btree value types | ||
256 | *--------------------------------------------------------------*/ | ||
257 | |||
258 | static uint64_t pack_block_time(dm_block_t b, uint32_t t) | ||
259 | { | ||
260 | return (b << 24) | t; | ||
261 | } | ||
262 | |||
263 | static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t) | ||
264 | { | ||
265 | *b = v >> 24; | ||
266 | *t = v & ((1 << 24) - 1); | ||
267 | } | ||
268 | |||
269 | static void data_block_inc(void *context, void *value_le) | ||
270 | { | ||
271 | struct dm_space_map *sm = context; | ||
272 | __le64 v_le; | ||
273 | uint64_t b; | ||
274 | uint32_t t; | ||
275 | |||
276 | memcpy(&v_le, value_le, sizeof(v_le)); | ||
277 | unpack_block_time(le64_to_cpu(v_le), &b, &t); | ||
278 | dm_sm_inc_block(sm, b); | ||
279 | } | ||
280 | |||
281 | static void data_block_dec(void *context, void *value_le) | ||
282 | { | ||
283 | struct dm_space_map *sm = context; | ||
284 | __le64 v_le; | ||
285 | uint64_t b; | ||
286 | uint32_t t; | ||
287 | |||
288 | memcpy(&v_le, value_le, sizeof(v_le)); | ||
289 | unpack_block_time(le64_to_cpu(v_le), &b, &t); | ||
290 | dm_sm_dec_block(sm, b); | ||
291 | } | ||
292 | |||
293 | static int data_block_equal(void *context, void *value1_le, void *value2_le) | ||
294 | { | ||
295 | __le64 v1_le, v2_le; | ||
296 | uint64_t b1, b2; | ||
297 | uint32_t t; | ||
298 | |||
299 | memcpy(&v1_le, value1_le, sizeof(v1_le)); | ||
300 | memcpy(&v2_le, value2_le, sizeof(v2_le)); | ||
301 | unpack_block_time(le64_to_cpu(v1_le), &b1, &t); | ||
302 | unpack_block_time(le64_to_cpu(v2_le), &b2, &t); | ||
303 | |||
304 | return b1 == b2; | ||
305 | } | ||
306 | |||
307 | static void subtree_inc(void *context, void *value) | ||
308 | { | ||
309 | struct dm_btree_info *info = context; | ||
310 | __le64 root_le; | ||
311 | uint64_t root; | ||
312 | |||
313 | memcpy(&root_le, value, sizeof(root_le)); | ||
314 | root = le64_to_cpu(root_le); | ||
315 | dm_tm_inc(info->tm, root); | ||
316 | } | ||
317 | |||
318 | static void subtree_dec(void *context, void *value) | ||
319 | { | ||
320 | struct dm_btree_info *info = context; | ||
321 | __le64 root_le; | ||
322 | uint64_t root; | ||
323 | |||
324 | memcpy(&root_le, value, sizeof(root_le)); | ||
325 | root = le64_to_cpu(root_le); | ||
326 | if (dm_btree_del(info, root)) | ||
327 | DMERR("btree delete failed\n"); | ||
328 | } | ||
329 | |||
330 | static int subtree_equal(void *context, void *value1_le, void *value2_le) | ||
331 | { | ||
332 | __le64 v1_le, v2_le; | ||
333 | memcpy(&v1_le, value1_le, sizeof(v1_le)); | ||
334 | memcpy(&v2_le, value2_le, sizeof(v2_le)); | ||
335 | |||
336 | return v1_le == v2_le; | ||
337 | } | ||
338 | |||
339 | /*----------------------------------------------------------------*/ | ||
340 | |||
341 | static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) | ||
342 | { | ||
343 | int r; | ||
344 | unsigned i; | ||
345 | struct dm_block *b; | ||
346 | __le64 *data_le, zero = cpu_to_le64(0); | ||
347 | unsigned block_size = dm_bm_block_size(bm) / sizeof(__le64); | ||
348 | |||
349 | /* | ||
350 | * We can't use a validator here - it may be all zeroes. | ||
351 | */ | ||
352 | r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, NULL, &b); | ||
353 | if (r) | ||
354 | return r; | ||
355 | |||
356 | data_le = dm_block_data(b); | ||
357 | *result = 1; | ||
358 | for (i = 0; i < block_size; i++) { | ||
359 | if (data_le[i] != zero) { | ||
360 | *result = 0; | ||
361 | break; | ||
362 | } | ||
363 | } | ||
364 | |||
365 | return dm_bm_unlock(b); | ||
366 | } | ||
367 | |||
368 | static int init_pmd(struct dm_pool_metadata *pmd, | ||
369 | struct dm_block_manager *bm, | ||
370 | dm_block_t nr_blocks, int create) | ||
371 | { | ||
372 | int r; | ||
373 | struct dm_space_map *sm, *data_sm; | ||
374 | struct dm_transaction_manager *tm; | ||
375 | struct dm_block *sblock; | ||
376 | |||
377 | if (create) { | ||
378 | r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION, | ||
379 | &sb_validator, &tm, &sm, &sblock); | ||
380 | if (r < 0) { | ||
381 | DMERR("tm_create_with_sm failed"); | ||
382 | return r; | ||
383 | } | ||
384 | |||
385 | data_sm = dm_sm_disk_create(tm, nr_blocks); | ||
386 | if (IS_ERR(data_sm)) { | ||
387 | DMERR("sm_disk_create failed"); | ||
388 | r = PTR_ERR(data_sm); | ||
389 | goto bad; | ||
390 | } | ||
391 | } else { | ||
392 | struct thin_disk_superblock *disk_super = NULL; | ||
393 | size_t space_map_root_offset = | ||
394 | offsetof(struct thin_disk_superblock, metadata_space_map_root); | ||
395 | |||
396 | r = dm_tm_open_with_sm(bm, THIN_SUPERBLOCK_LOCATION, | ||
397 | &sb_validator, space_map_root_offset, | ||
398 | SPACE_MAP_ROOT_SIZE, &tm, &sm, &sblock); | ||
399 | if (r < 0) { | ||
400 | DMERR("tm_open_with_sm failed"); | ||
401 | return r; | ||
402 | } | ||
403 | |||
404 | disk_super = dm_block_data(sblock); | ||
405 | data_sm = dm_sm_disk_open(tm, disk_super->data_space_map_root, | ||
406 | sizeof(disk_super->data_space_map_root)); | ||
407 | if (IS_ERR(data_sm)) { | ||
408 | DMERR("sm_disk_open failed"); | ||
409 | r = PTR_ERR(data_sm); | ||
410 | goto bad; | ||
411 | } | ||
412 | } | ||
413 | |||
414 | |||
415 | r = dm_tm_unlock(tm, sblock); | ||
416 | if (r < 0) { | ||
417 | DMERR("couldn't unlock superblock"); | ||
418 | goto bad_data_sm; | ||
419 | } | ||
420 | |||
421 | pmd->bm = bm; | ||
422 | pmd->metadata_sm = sm; | ||
423 | pmd->data_sm = data_sm; | ||
424 | pmd->tm = tm; | ||
425 | pmd->nb_tm = dm_tm_create_non_blocking_clone(tm); | ||
426 | if (!pmd->nb_tm) { | ||
427 | DMERR("could not create clone tm"); | ||
428 | r = -ENOMEM; | ||
429 | goto bad_data_sm; | ||
430 | } | ||
431 | |||
432 | pmd->info.tm = tm; | ||
433 | pmd->info.levels = 2; | ||
434 | pmd->info.value_type.context = pmd->data_sm; | ||
435 | pmd->info.value_type.size = sizeof(__le64); | ||
436 | pmd->info.value_type.inc = data_block_inc; | ||
437 | pmd->info.value_type.dec = data_block_dec; | ||
438 | pmd->info.value_type.equal = data_block_equal; | ||
439 | |||
440 | memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info)); | ||
441 | pmd->nb_info.tm = pmd->nb_tm; | ||
442 | |||
443 | pmd->tl_info.tm = tm; | ||
444 | pmd->tl_info.levels = 1; | ||
445 | pmd->tl_info.value_type.context = &pmd->info; | ||
446 | pmd->tl_info.value_type.size = sizeof(__le64); | ||
447 | pmd->tl_info.value_type.inc = subtree_inc; | ||
448 | pmd->tl_info.value_type.dec = subtree_dec; | ||
449 | pmd->tl_info.value_type.equal = subtree_equal; | ||
450 | |||
451 | pmd->bl_info.tm = tm; | ||
452 | pmd->bl_info.levels = 1; | ||
453 | pmd->bl_info.value_type.context = pmd->data_sm; | ||
454 | pmd->bl_info.value_type.size = sizeof(__le64); | ||
455 | pmd->bl_info.value_type.inc = data_block_inc; | ||
456 | pmd->bl_info.value_type.dec = data_block_dec; | ||
457 | pmd->bl_info.value_type.equal = data_block_equal; | ||
458 | |||
459 | pmd->details_info.tm = tm; | ||
460 | pmd->details_info.levels = 1; | ||
461 | pmd->details_info.value_type.context = NULL; | ||
462 | pmd->details_info.value_type.size = sizeof(struct disk_device_details); | ||
463 | pmd->details_info.value_type.inc = NULL; | ||
464 | pmd->details_info.value_type.dec = NULL; | ||
465 | pmd->details_info.value_type.equal = NULL; | ||
466 | |||
467 | pmd->root = 0; | ||
468 | |||
469 | init_rwsem(&pmd->root_lock); | ||
470 | pmd->time = 0; | ||
471 | pmd->need_commit = 0; | ||
472 | pmd->details_root = 0; | ||
473 | pmd->trans_id = 0; | ||
474 | pmd->flags = 0; | ||
475 | INIT_LIST_HEAD(&pmd->thin_devices); | ||
476 | |||
477 | return 0; | ||
478 | |||
479 | bad_data_sm: | ||
480 | dm_sm_destroy(data_sm); | ||
481 | bad: | ||
482 | dm_tm_destroy(tm); | ||
483 | dm_sm_destroy(sm); | ||
484 | |||
485 | return r; | ||
486 | } | ||
487 | |||
488 | static int __begin_transaction(struct dm_pool_metadata *pmd) | ||
489 | { | ||
490 | int r; | ||
491 | u32 features; | ||
492 | struct thin_disk_superblock *disk_super; | ||
493 | struct dm_block *sblock; | ||
494 | |||
495 | /* | ||
496 | * __maybe_commit_transaction() resets these | ||
497 | */ | ||
498 | WARN_ON(pmd->need_commit); | ||
499 | |||
500 | /* | ||
501 | * We re-read the superblock every time. Shouldn't need to do this | ||
502 | * really. | ||
503 | */ | ||
504 | r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
505 | &sb_validator, &sblock); | ||
506 | if (r) | ||
507 | return r; | ||
508 | |||
509 | disk_super = dm_block_data(sblock); | ||
510 | pmd->time = le32_to_cpu(disk_super->time); | ||
511 | pmd->root = le64_to_cpu(disk_super->data_mapping_root); | ||
512 | pmd->details_root = le64_to_cpu(disk_super->device_details_root); | ||
513 | pmd->trans_id = le64_to_cpu(disk_super->trans_id); | ||
514 | pmd->flags = le32_to_cpu(disk_super->flags); | ||
515 | pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); | ||
516 | |||
517 | features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; | ||
518 | if (features) { | ||
519 | DMERR("could not access metadata due to " | ||
520 | "unsupported optional features (%lx).", | ||
521 | (unsigned long)features); | ||
522 | r = -EINVAL; | ||
523 | goto out; | ||
524 | } | ||
525 | |||
526 | /* | ||
527 | * Check for read-only metadata to skip the following RDWR checks. | ||
528 | */ | ||
529 | if (get_disk_ro(pmd->bdev->bd_disk)) | ||
530 | goto out; | ||
531 | |||
532 | features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; | ||
533 | if (features) { | ||
534 | DMERR("could not access metadata RDWR due to " | ||
535 | "unsupported optional features (%lx).", | ||
536 | (unsigned long)features); | ||
537 | r = -EINVAL; | ||
538 | } | ||
539 | |||
540 | out: | ||
541 | dm_bm_unlock(sblock); | ||
542 | return r; | ||
543 | } | ||
544 | |||
545 | static int __write_changed_details(struct dm_pool_metadata *pmd) | ||
546 | { | ||
547 | int r; | ||
548 | struct dm_thin_device *td, *tmp; | ||
549 | struct disk_device_details details; | ||
550 | uint64_t key; | ||
551 | |||
552 | list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) { | ||
553 | if (!td->changed) | ||
554 | continue; | ||
555 | |||
556 | key = td->id; | ||
557 | |||
558 | details.mapped_blocks = cpu_to_le64(td->mapped_blocks); | ||
559 | details.transaction_id = cpu_to_le64(td->transaction_id); | ||
560 | details.creation_time = cpu_to_le32(td->creation_time); | ||
561 | details.snapshotted_time = cpu_to_le32(td->snapshotted_time); | ||
562 | __dm_bless_for_disk(&details); | ||
563 | |||
564 | r = dm_btree_insert(&pmd->details_info, pmd->details_root, | ||
565 | &key, &details, &pmd->details_root); | ||
566 | if (r) | ||
567 | return r; | ||
568 | |||
569 | if (td->open_count) | ||
570 | td->changed = 0; | ||
571 | else { | ||
572 | list_del(&td->list); | ||
573 | kfree(td); | ||
574 | } | ||
575 | |||
576 | pmd->need_commit = 1; | ||
577 | } | ||
578 | |||
579 | return 0; | ||
580 | } | ||
581 | |||
582 | static int __commit_transaction(struct dm_pool_metadata *pmd) | ||
583 | { | ||
584 | /* | ||
585 | * FIXME: Associated pool should be made read-only on failure. | ||
586 | */ | ||
587 | int r; | ||
588 | size_t metadata_len, data_len; | ||
589 | struct thin_disk_superblock *disk_super; | ||
590 | struct dm_block *sblock; | ||
591 | |||
592 | /* | ||
593 | * We need to know if the thin_disk_superblock exceeds a 512-byte sector. | ||
594 | */ | ||
595 | BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); | ||
596 | |||
597 | r = __write_changed_details(pmd); | ||
598 | if (r < 0) | ||
599 | goto out; | ||
600 | |||
601 | if (!pmd->need_commit) | ||
602 | goto out; | ||
603 | |||
604 | r = dm_sm_commit(pmd->data_sm); | ||
605 | if (r < 0) | ||
606 | goto out; | ||
607 | |||
608 | r = dm_tm_pre_commit(pmd->tm); | ||
609 | if (r < 0) | ||
610 | goto out; | ||
611 | |||
612 | r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); | ||
613 | if (r < 0) | ||
614 | goto out; | ||
615 | |||
616 | r = dm_sm_root_size(pmd->metadata_sm, &data_len); | ||
617 | if (r < 0) | ||
618 | goto out; | ||
619 | |||
620 | r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
621 | &sb_validator, &sblock); | ||
622 | if (r) | ||
623 | goto out; | ||
624 | |||
625 | disk_super = dm_block_data(sblock); | ||
626 | disk_super->time = cpu_to_le32(pmd->time); | ||
627 | disk_super->data_mapping_root = cpu_to_le64(pmd->root); | ||
628 | disk_super->device_details_root = cpu_to_le64(pmd->details_root); | ||
629 | disk_super->trans_id = cpu_to_le64(pmd->trans_id); | ||
630 | disk_super->flags = cpu_to_le32(pmd->flags); | ||
631 | |||
632 | r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, | ||
633 | metadata_len); | ||
634 | if (r < 0) | ||
635 | goto out_locked; | ||
636 | |||
637 | r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, | ||
638 | data_len); | ||
639 | if (r < 0) | ||
640 | goto out_locked; | ||
641 | |||
642 | r = dm_tm_commit(pmd->tm, sblock); | ||
643 | if (!r) | ||
644 | pmd->need_commit = 0; | ||
645 | |||
646 | out: | ||
647 | return r; | ||
648 | |||
649 | out_locked: | ||
650 | dm_bm_unlock(sblock); | ||
651 | return r; | ||
652 | } | ||
653 | |||
654 | struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, | ||
655 | sector_t data_block_size) | ||
656 | { | ||
657 | int r; | ||
658 | struct thin_disk_superblock *disk_super; | ||
659 | struct dm_pool_metadata *pmd; | ||
660 | sector_t bdev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; | ||
661 | struct dm_block_manager *bm; | ||
662 | int create; | ||
663 | struct dm_block *sblock; | ||
664 | |||
665 | pmd = kmalloc(sizeof(*pmd), GFP_KERNEL); | ||
666 | if (!pmd) { | ||
667 | DMERR("could not allocate metadata struct"); | ||
668 | return ERR_PTR(-ENOMEM); | ||
669 | } | ||
670 | |||
671 | /* | ||
672 | * Max hex locks: | ||
673 | * 3 for btree insert + | ||
674 | * 2 for btree lookup used within space map | ||
675 | */ | ||
676 | bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE, | ||
677 | THIN_METADATA_CACHE_SIZE, 5); | ||
678 | if (!bm) { | ||
679 | DMERR("could not create block manager"); | ||
680 | kfree(pmd); | ||
681 | return ERR_PTR(-ENOMEM); | ||
682 | } | ||
683 | |||
684 | r = superblock_all_zeroes(bm, &create); | ||
685 | if (r) { | ||
686 | dm_block_manager_destroy(bm); | ||
687 | kfree(pmd); | ||
688 | return ERR_PTR(r); | ||
689 | } | ||
690 | |||
691 | |||
692 | r = init_pmd(pmd, bm, 0, create); | ||
693 | if (r) { | ||
694 | dm_block_manager_destroy(bm); | ||
695 | kfree(pmd); | ||
696 | return ERR_PTR(r); | ||
697 | } | ||
698 | pmd->bdev = bdev; | ||
699 | |||
700 | if (!create) { | ||
701 | r = __begin_transaction(pmd); | ||
702 | if (r < 0) | ||
703 | goto bad; | ||
704 | return pmd; | ||
705 | } | ||
706 | |||
707 | /* | ||
708 | * Create. | ||
709 | */ | ||
710 | r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
711 | &sb_validator, &sblock); | ||
712 | if (r) | ||
713 | goto bad; | ||
714 | |||
715 | disk_super = dm_block_data(sblock); | ||
716 | disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); | ||
717 | disk_super->version = cpu_to_le32(THIN_VERSION); | ||
718 | disk_super->time = 0; | ||
719 | disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); | ||
720 | disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); | ||
721 | disk_super->data_block_size = cpu_to_le32(data_block_size); | ||
722 | |||
723 | r = dm_bm_unlock(sblock); | ||
724 | if (r < 0) | ||
725 | goto bad; | ||
726 | |||
727 | r = dm_btree_empty(&pmd->info, &pmd->root); | ||
728 | if (r < 0) | ||
729 | goto bad; | ||
730 | |||
731 | r = dm_btree_empty(&pmd->details_info, &pmd->details_root); | ||
732 | if (r < 0) { | ||
733 | DMERR("couldn't create devices root"); | ||
734 | goto bad; | ||
735 | } | ||
736 | |||
737 | pmd->flags = 0; | ||
738 | pmd->need_commit = 1; | ||
739 | r = dm_pool_commit_metadata(pmd); | ||
740 | if (r < 0) { | ||
741 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
742 | __func__, r); | ||
743 | goto bad; | ||
744 | } | ||
745 | |||
746 | return pmd; | ||
747 | |||
748 | bad: | ||
749 | if (dm_pool_metadata_close(pmd) < 0) | ||
750 | DMWARN("%s: dm_pool_metadata_close() failed.", __func__); | ||
751 | return ERR_PTR(r); | ||
752 | } | ||
753 | |||
754 | int dm_pool_metadata_close(struct dm_pool_metadata *pmd) | ||
755 | { | ||
756 | int r; | ||
757 | unsigned open_devices = 0; | ||
758 | struct dm_thin_device *td, *tmp; | ||
759 | |||
760 | down_read(&pmd->root_lock); | ||
761 | list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) { | ||
762 | if (td->open_count) | ||
763 | open_devices++; | ||
764 | else { | ||
765 | list_del(&td->list); | ||
766 | kfree(td); | ||
767 | } | ||
768 | } | ||
769 | up_read(&pmd->root_lock); | ||
770 | |||
771 | if (open_devices) { | ||
772 | DMERR("attempt to close pmd when %u device(s) are still open", | ||
773 | open_devices); | ||
774 | return -EBUSY; | ||
775 | } | ||
776 | |||
777 | r = __commit_transaction(pmd); | ||
778 | if (r < 0) | ||
779 | DMWARN("%s: __commit_transaction() failed, error = %d", | ||
780 | __func__, r); | ||
781 | |||
782 | dm_tm_destroy(pmd->tm); | ||
783 | dm_tm_destroy(pmd->nb_tm); | ||
784 | dm_block_manager_destroy(pmd->bm); | ||
785 | dm_sm_destroy(pmd->metadata_sm); | ||
786 | dm_sm_destroy(pmd->data_sm); | ||
787 | kfree(pmd); | ||
788 | |||
789 | return 0; | ||
790 | } | ||
791 | |||
792 | static int __open_device(struct dm_pool_metadata *pmd, | ||
793 | dm_thin_id dev, int create, | ||
794 | struct dm_thin_device **td) | ||
795 | { | ||
796 | int r, changed = 0; | ||
797 | struct dm_thin_device *td2; | ||
798 | uint64_t key = dev; | ||
799 | struct disk_device_details details_le; | ||
800 | |||
801 | /* | ||
802 | * Check the device isn't already open. | ||
803 | */ | ||
804 | list_for_each_entry(td2, &pmd->thin_devices, list) | ||
805 | if (td2->id == dev) { | ||
806 | td2->open_count++; | ||
807 | *td = td2; | ||
808 | return 0; | ||
809 | } | ||
810 | |||
811 | /* | ||
812 | * Check the device exists. | ||
813 | */ | ||
814 | r = dm_btree_lookup(&pmd->details_info, pmd->details_root, | ||
815 | &key, &details_le); | ||
816 | if (r) { | ||
817 | if (r != -ENODATA || !create) | ||
818 | return r; | ||
819 | |||
820 | changed = 1; | ||
821 | details_le.mapped_blocks = 0; | ||
822 | details_le.transaction_id = cpu_to_le64(pmd->trans_id); | ||
823 | details_le.creation_time = cpu_to_le32(pmd->time); | ||
824 | details_le.snapshotted_time = cpu_to_le32(pmd->time); | ||
825 | } | ||
826 | |||
827 | *td = kmalloc(sizeof(**td), GFP_NOIO); | ||
828 | if (!*td) | ||
829 | return -ENOMEM; | ||
830 | |||
831 | (*td)->pmd = pmd; | ||
832 | (*td)->id = dev; | ||
833 | (*td)->open_count = 1; | ||
834 | (*td)->changed = changed; | ||
835 | (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); | ||
836 | (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); | ||
837 | (*td)->creation_time = le32_to_cpu(details_le.creation_time); | ||
838 | (*td)->snapshotted_time = le32_to_cpu(details_le.snapshotted_time); | ||
839 | |||
840 | list_add(&(*td)->list, &pmd->thin_devices); | ||
841 | |||
842 | return 0; | ||
843 | } | ||
844 | |||
845 | static void __close_device(struct dm_thin_device *td) | ||
846 | { | ||
847 | --td->open_count; | ||
848 | } | ||
849 | |||
850 | static int __create_thin(struct dm_pool_metadata *pmd, | ||
851 | dm_thin_id dev) | ||
852 | { | ||
853 | int r; | ||
854 | dm_block_t dev_root; | ||
855 | uint64_t key = dev; | ||
856 | struct disk_device_details details_le; | ||
857 | struct dm_thin_device *td; | ||
858 | __le64 value; | ||
859 | |||
860 | r = dm_btree_lookup(&pmd->details_info, pmd->details_root, | ||
861 | &key, &details_le); | ||
862 | if (!r) | ||
863 | return -EEXIST; | ||
864 | |||
865 | /* | ||
866 | * Create an empty btree for the mappings. | ||
867 | */ | ||
868 | r = dm_btree_empty(&pmd->bl_info, &dev_root); | ||
869 | if (r) | ||
870 | return r; | ||
871 | |||
872 | /* | ||
873 | * Insert it into the main mapping tree. | ||
874 | */ | ||
875 | value = cpu_to_le64(dev_root); | ||
876 | __dm_bless_for_disk(&value); | ||
877 | r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root); | ||
878 | if (r) { | ||
879 | dm_btree_del(&pmd->bl_info, dev_root); | ||
880 | return r; | ||
881 | } | ||
882 | |||
883 | r = __open_device(pmd, dev, 1, &td); | ||
884 | if (r) { | ||
885 | __close_device(td); | ||
886 | dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root); | ||
887 | dm_btree_del(&pmd->bl_info, dev_root); | ||
888 | return r; | ||
889 | } | ||
890 | td->changed = 1; | ||
891 | __close_device(td); | ||
892 | |||
893 | return r; | ||
894 | } | ||
895 | |||
896 | int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) | ||
897 | { | ||
898 | int r; | ||
899 | |||
900 | down_write(&pmd->root_lock); | ||
901 | r = __create_thin(pmd, dev); | ||
902 | up_write(&pmd->root_lock); | ||
903 | |||
904 | return r; | ||
905 | } | ||
906 | |||
907 | static int __set_snapshot_details(struct dm_pool_metadata *pmd, | ||
908 | struct dm_thin_device *snap, | ||
909 | dm_thin_id origin, uint32_t time) | ||
910 | { | ||
911 | int r; | ||
912 | struct dm_thin_device *td; | ||
913 | |||
914 | r = __open_device(pmd, origin, 0, &td); | ||
915 | if (r) | ||
916 | return r; | ||
917 | |||
918 | td->changed = 1; | ||
919 | td->snapshotted_time = time; | ||
920 | |||
921 | snap->mapped_blocks = td->mapped_blocks; | ||
922 | snap->snapshotted_time = time; | ||
923 | __close_device(td); | ||
924 | |||
925 | return 0; | ||
926 | } | ||
927 | |||
928 | static int __create_snap(struct dm_pool_metadata *pmd, | ||
929 | dm_thin_id dev, dm_thin_id origin) | ||
930 | { | ||
931 | int r; | ||
932 | dm_block_t origin_root; | ||
933 | uint64_t key = origin, dev_key = dev; | ||
934 | struct dm_thin_device *td; | ||
935 | struct disk_device_details details_le; | ||
936 | __le64 value; | ||
937 | |||
938 | /* check this device is unused */ | ||
939 | r = dm_btree_lookup(&pmd->details_info, pmd->details_root, | ||
940 | &dev_key, &details_le); | ||
941 | if (!r) | ||
942 | return -EEXIST; | ||
943 | |||
944 | /* find the mapping tree for the origin */ | ||
945 | r = dm_btree_lookup(&pmd->tl_info, pmd->root, &key, &value); | ||
946 | if (r) | ||
947 | return r; | ||
948 | origin_root = le64_to_cpu(value); | ||
949 | |||
950 | /* clone the origin, an inc will do */ | ||
951 | dm_tm_inc(pmd->tm, origin_root); | ||
952 | |||
953 | /* insert into the main mapping tree */ | ||
954 | value = cpu_to_le64(origin_root); | ||
955 | __dm_bless_for_disk(&value); | ||
956 | key = dev; | ||
957 | r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root); | ||
958 | if (r) { | ||
959 | dm_tm_dec(pmd->tm, origin_root); | ||
960 | return r; | ||
961 | } | ||
962 | |||
963 | pmd->time++; | ||
964 | |||
965 | r = __open_device(pmd, dev, 1, &td); | ||
966 | if (r) | ||
967 | goto bad; | ||
968 | |||
969 | r = __set_snapshot_details(pmd, td, origin, pmd->time); | ||
970 | if (r) | ||
971 | goto bad; | ||
972 | |||
973 | __close_device(td); | ||
974 | return 0; | ||
975 | |||
976 | bad: | ||
977 | __close_device(td); | ||
978 | dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root); | ||
979 | dm_btree_remove(&pmd->details_info, pmd->details_root, | ||
980 | &key, &pmd->details_root); | ||
981 | return r; | ||
982 | } | ||
983 | |||
984 | int dm_pool_create_snap(struct dm_pool_metadata *pmd, | ||
985 | dm_thin_id dev, | ||
986 | dm_thin_id origin) | ||
987 | { | ||
988 | int r; | ||
989 | |||
990 | down_write(&pmd->root_lock); | ||
991 | r = __create_snap(pmd, dev, origin); | ||
992 | up_write(&pmd->root_lock); | ||
993 | |||
994 | return r; | ||
995 | } | ||
996 | |||
997 | static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) | ||
998 | { | ||
999 | int r; | ||
1000 | uint64_t key = dev; | ||
1001 | struct dm_thin_device *td; | ||
1002 | |||
1003 | /* TODO: failure should mark the transaction invalid */ | ||
1004 | r = __open_device(pmd, dev, 0, &td); | ||
1005 | if (r) | ||
1006 | return r; | ||
1007 | |||
1008 | if (td->open_count > 1) { | ||
1009 | __close_device(td); | ||
1010 | return -EBUSY; | ||
1011 | } | ||
1012 | |||
1013 | list_del(&td->list); | ||
1014 | kfree(td); | ||
1015 | r = dm_btree_remove(&pmd->details_info, pmd->details_root, | ||
1016 | &key, &pmd->details_root); | ||
1017 | if (r) | ||
1018 | return r; | ||
1019 | |||
1020 | r = dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root); | ||
1021 | if (r) | ||
1022 | return r; | ||
1023 | |||
1024 | pmd->need_commit = 1; | ||
1025 | |||
1026 | return 0; | ||
1027 | } | ||
1028 | |||
1029 | int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, | ||
1030 | dm_thin_id dev) | ||
1031 | { | ||
1032 | int r; | ||
1033 | |||
1034 | down_write(&pmd->root_lock); | ||
1035 | r = __delete_device(pmd, dev); | ||
1036 | up_write(&pmd->root_lock); | ||
1037 | |||
1038 | return r; | ||
1039 | } | ||
1040 | |||
1041 | int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, | ||
1042 | uint64_t current_id, | ||
1043 | uint64_t new_id) | ||
1044 | { | ||
1045 | down_write(&pmd->root_lock); | ||
1046 | if (pmd->trans_id != current_id) { | ||
1047 | up_write(&pmd->root_lock); | ||
1048 | DMERR("mismatched transaction id"); | ||
1049 | return -EINVAL; | ||
1050 | } | ||
1051 | |||
1052 | pmd->trans_id = new_id; | ||
1053 | pmd->need_commit = 1; | ||
1054 | up_write(&pmd->root_lock); | ||
1055 | |||
1056 | return 0; | ||
1057 | } | ||
1058 | |||
1059 | int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, | ||
1060 | uint64_t *result) | ||
1061 | { | ||
1062 | down_read(&pmd->root_lock); | ||
1063 | *result = pmd->trans_id; | ||
1064 | up_read(&pmd->root_lock); | ||
1065 | |||
1066 | return 0; | ||
1067 | } | ||
1068 | |||
1069 | static int __get_held_metadata_root(struct dm_pool_metadata *pmd, | ||
1070 | dm_block_t *result) | ||
1071 | { | ||
1072 | int r; | ||
1073 | struct thin_disk_superblock *disk_super; | ||
1074 | struct dm_block *sblock; | ||
1075 | |||
1076 | r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
1077 | &sb_validator, &sblock); | ||
1078 | if (r) | ||
1079 | return r; | ||
1080 | |||
1081 | disk_super = dm_block_data(sblock); | ||
1082 | *result = le64_to_cpu(disk_super->held_root); | ||
1083 | |||
1084 | return dm_bm_unlock(sblock); | ||
1085 | } | ||
1086 | |||
1087 | int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd, | ||
1088 | dm_block_t *result) | ||
1089 | { | ||
1090 | int r; | ||
1091 | |||
1092 | down_read(&pmd->root_lock); | ||
1093 | r = __get_held_metadata_root(pmd, result); | ||
1094 | up_read(&pmd->root_lock); | ||
1095 | |||
1096 | return r; | ||
1097 | } | ||
1098 | |||
1099 | int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, | ||
1100 | struct dm_thin_device **td) | ||
1101 | { | ||
1102 | int r; | ||
1103 | |||
1104 | down_write(&pmd->root_lock); | ||
1105 | r = __open_device(pmd, dev, 0, td); | ||
1106 | up_write(&pmd->root_lock); | ||
1107 | |||
1108 | return r; | ||
1109 | } | ||
1110 | |||
1111 | int dm_pool_close_thin_device(struct dm_thin_device *td) | ||
1112 | { | ||
1113 | down_write(&td->pmd->root_lock); | ||
1114 | __close_device(td); | ||
1115 | up_write(&td->pmd->root_lock); | ||
1116 | |||
1117 | return 0; | ||
1118 | } | ||
1119 | |||
1120 | dm_thin_id dm_thin_dev_id(struct dm_thin_device *td) | ||
1121 | { | ||
1122 | return td->id; | ||
1123 | } | ||
1124 | |||
1125 | static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) | ||
1126 | { | ||
1127 | return td->snapshotted_time > time; | ||
1128 | } | ||
1129 | |||
1130 | int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, | ||
1131 | int can_block, struct dm_thin_lookup_result *result) | ||
1132 | { | ||
1133 | int r; | ||
1134 | uint64_t block_time = 0; | ||
1135 | __le64 value; | ||
1136 | struct dm_pool_metadata *pmd = td->pmd; | ||
1137 | dm_block_t keys[2] = { td->id, block }; | ||
1138 | |||
1139 | if (can_block) { | ||
1140 | down_read(&pmd->root_lock); | ||
1141 | r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value); | ||
1142 | if (!r) | ||
1143 | block_time = le64_to_cpu(value); | ||
1144 | up_read(&pmd->root_lock); | ||
1145 | |||
1146 | } else if (down_read_trylock(&pmd->root_lock)) { | ||
1147 | r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value); | ||
1148 | if (!r) | ||
1149 | block_time = le64_to_cpu(value); | ||
1150 | up_read(&pmd->root_lock); | ||
1151 | |||
1152 | } else | ||
1153 | return -EWOULDBLOCK; | ||
1154 | |||
1155 | if (!r) { | ||
1156 | dm_block_t exception_block; | ||
1157 | uint32_t exception_time; | ||
1158 | unpack_block_time(block_time, &exception_block, | ||
1159 | &exception_time); | ||
1160 | result->block = exception_block; | ||
1161 | result->shared = __snapshotted_since(td, exception_time); | ||
1162 | } | ||
1163 | |||
1164 | return r; | ||
1165 | } | ||
1166 | |||
1167 | static int __insert(struct dm_thin_device *td, dm_block_t block, | ||
1168 | dm_block_t data_block) | ||
1169 | { | ||
1170 | int r, inserted; | ||
1171 | __le64 value; | ||
1172 | struct dm_pool_metadata *pmd = td->pmd; | ||
1173 | dm_block_t keys[2] = { td->id, block }; | ||
1174 | |||
1175 | pmd->need_commit = 1; | ||
1176 | value = cpu_to_le64(pack_block_time(data_block, pmd->time)); | ||
1177 | __dm_bless_for_disk(&value); | ||
1178 | |||
1179 | r = dm_btree_insert_notify(&pmd->info, pmd->root, keys, &value, | ||
1180 | &pmd->root, &inserted); | ||
1181 | if (r) | ||
1182 | return r; | ||
1183 | |||
1184 | if (inserted) { | ||
1185 | td->mapped_blocks++; | ||
1186 | td->changed = 1; | ||
1187 | } | ||
1188 | |||
1189 | return 0; | ||
1190 | } | ||
1191 | |||
1192 | int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, | ||
1193 | dm_block_t data_block) | ||
1194 | { | ||
1195 | int r; | ||
1196 | |||
1197 | down_write(&td->pmd->root_lock); | ||
1198 | r = __insert(td, block, data_block); | ||
1199 | up_write(&td->pmd->root_lock); | ||
1200 | |||
1201 | return r; | ||
1202 | } | ||
1203 | |||
1204 | static int __remove(struct dm_thin_device *td, dm_block_t block) | ||
1205 | { | ||
1206 | int r; | ||
1207 | struct dm_pool_metadata *pmd = td->pmd; | ||
1208 | dm_block_t keys[2] = { td->id, block }; | ||
1209 | |||
1210 | r = dm_btree_remove(&pmd->info, pmd->root, keys, &pmd->root); | ||
1211 | if (r) | ||
1212 | return r; | ||
1213 | |||
1214 | pmd->need_commit = 1; | ||
1215 | |||
1216 | return 0; | ||
1217 | } | ||
1218 | |||
1219 | int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) | ||
1220 | { | ||
1221 | int r; | ||
1222 | |||
1223 | down_write(&td->pmd->root_lock); | ||
1224 | r = __remove(td, block); | ||
1225 | up_write(&td->pmd->root_lock); | ||
1226 | |||
1227 | return r; | ||
1228 | } | ||
1229 | |||
1230 | int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) | ||
1231 | { | ||
1232 | int r; | ||
1233 | |||
1234 | down_write(&pmd->root_lock); | ||
1235 | |||
1236 | r = dm_sm_new_block(pmd->data_sm, result); | ||
1237 | pmd->need_commit = 1; | ||
1238 | |||
1239 | up_write(&pmd->root_lock); | ||
1240 | |||
1241 | return r; | ||
1242 | } | ||
1243 | |||
1244 | int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) | ||
1245 | { | ||
1246 | int r; | ||
1247 | |||
1248 | down_write(&pmd->root_lock); | ||
1249 | |||
1250 | r = __commit_transaction(pmd); | ||
1251 | if (r <= 0) | ||
1252 | goto out; | ||
1253 | |||
1254 | /* | ||
1255 | * Open the next transaction. | ||
1256 | */ | ||
1257 | r = __begin_transaction(pmd); | ||
1258 | out: | ||
1259 | up_write(&pmd->root_lock); | ||
1260 | return r; | ||
1261 | } | ||
1262 | |||
1263 | int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) | ||
1264 | { | ||
1265 | int r; | ||
1266 | |||
1267 | down_read(&pmd->root_lock); | ||
1268 | r = dm_sm_get_nr_free(pmd->data_sm, result); | ||
1269 | up_read(&pmd->root_lock); | ||
1270 | |||
1271 | return r; | ||
1272 | } | ||
1273 | |||
1274 | int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, | ||
1275 | dm_block_t *result) | ||
1276 | { | ||
1277 | int r; | ||
1278 | |||
1279 | down_read(&pmd->root_lock); | ||
1280 | r = dm_sm_get_nr_free(pmd->metadata_sm, result); | ||
1281 | up_read(&pmd->root_lock); | ||
1282 | |||
1283 | return r; | ||
1284 | } | ||
1285 | |||
1286 | int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, | ||
1287 | dm_block_t *result) | ||
1288 | { | ||
1289 | int r; | ||
1290 | |||
1291 | down_read(&pmd->root_lock); | ||
1292 | r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); | ||
1293 | up_read(&pmd->root_lock); | ||
1294 | |||
1295 | return r; | ||
1296 | } | ||
1297 | |||
1298 | int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) | ||
1299 | { | ||
1300 | down_read(&pmd->root_lock); | ||
1301 | *result = pmd->data_block_size; | ||
1302 | up_read(&pmd->root_lock); | ||
1303 | |||
1304 | return 0; | ||
1305 | } | ||
1306 | |||
1307 | int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) | ||
1308 | { | ||
1309 | int r; | ||
1310 | |||
1311 | down_read(&pmd->root_lock); | ||
1312 | r = dm_sm_get_nr_blocks(pmd->data_sm, result); | ||
1313 | up_read(&pmd->root_lock); | ||
1314 | |||
1315 | return r; | ||
1316 | } | ||
1317 | |||
1318 | int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) | ||
1319 | { | ||
1320 | struct dm_pool_metadata *pmd = td->pmd; | ||
1321 | |||
1322 | down_read(&pmd->root_lock); | ||
1323 | *result = td->mapped_blocks; | ||
1324 | up_read(&pmd->root_lock); | ||
1325 | |||
1326 | return 0; | ||
1327 | } | ||
1328 | |||
1329 | static int __highest_block(struct dm_thin_device *td, dm_block_t *result) | ||
1330 | { | ||
1331 | int r; | ||
1332 | __le64 value_le; | ||
1333 | dm_block_t thin_root; | ||
1334 | struct dm_pool_metadata *pmd = td->pmd; | ||
1335 | |||
1336 | r = dm_btree_lookup(&pmd->tl_info, pmd->root, &td->id, &value_le); | ||
1337 | if (r) | ||
1338 | return r; | ||
1339 | |||
1340 | thin_root = le64_to_cpu(value_le); | ||
1341 | |||
1342 | return dm_btree_find_highest_key(&pmd->bl_info, thin_root, result); | ||
1343 | } | ||
1344 | |||
1345 | int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, | ||
1346 | dm_block_t *result) | ||
1347 | { | ||
1348 | int r; | ||
1349 | struct dm_pool_metadata *pmd = td->pmd; | ||
1350 | |||
1351 | down_read(&pmd->root_lock); | ||
1352 | r = __highest_block(td, result); | ||
1353 | up_read(&pmd->root_lock); | ||
1354 | |||
1355 | return r; | ||
1356 | } | ||
1357 | |||
1358 | static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) | ||
1359 | { | ||
1360 | int r; | ||
1361 | dm_block_t old_count; | ||
1362 | |||
1363 | r = dm_sm_get_nr_blocks(pmd->data_sm, &old_count); | ||
1364 | if (r) | ||
1365 | return r; | ||
1366 | |||
1367 | if (new_count == old_count) | ||
1368 | return 0; | ||
1369 | |||
1370 | if (new_count < old_count) { | ||
1371 | DMERR("cannot reduce size of data device"); | ||
1372 | return -EINVAL; | ||
1373 | } | ||
1374 | |||
1375 | r = dm_sm_extend(pmd->data_sm, new_count - old_count); | ||
1376 | if (!r) | ||
1377 | pmd->need_commit = 1; | ||
1378 | |||
1379 | return r; | ||
1380 | } | ||
1381 | |||
1382 | int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) | ||
1383 | { | ||
1384 | int r; | ||
1385 | |||
1386 | down_write(&pmd->root_lock); | ||
1387 | r = __resize_data_dev(pmd, new_count); | ||
1388 | up_write(&pmd->root_lock); | ||
1389 | |||
1390 | return r; | ||
1391 | } | ||