diff options
author | Alasdair G Kergon <agk@redhat.com> | 2009-01-05 22:05:17 -0500 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2009-01-05 22:05:17 -0500 |
commit | 4db6bfe02bdc7dc5048f46dd682a94801d029adc (patch) | |
tree | 780a41560ea05266288853204f0d7e4eef4f6355 /drivers/md | |
parent | 1ae25f9c933d1432fbffdf3e126051a974608abf (diff) |
dm snapshot: split out exception store implementations
Move the existing snapshot exception store implementations out into
separate files. Later patches will place these behind a new
interface in preparation for alternative implementations.
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Makefile | 3 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 749 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.h | 9 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 694 | ||||
-rw-r--r-- | drivers/md/dm-snap-transient.c | 95 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 20 |
6 files changed, 833 insertions, 737 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 63f0ae94415d..72880b7e28d9 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -5,7 +5,8 @@ | |||
5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ | 5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ |
6 | dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o | 6 | dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o |
7 | dm-multipath-objs := dm-path-selector.o dm-mpath.o | 7 | dm-multipath-objs := dm-path-selector.o dm-mpath.o |
8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o | 8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \ |
9 | dm-snap-persistent.o | ||
9 | dm-mirror-objs := dm-raid1.o | 10 | dm-mirror-objs := dm-raid1.o |
10 | md-mod-objs := md.o bitmap.o | 11 | md-mod-objs := md.o bitmap.o |
11 | raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ | 12 | raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index c5c9a2652315..74777e0f80df 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
@@ -1,757 +1,46 @@ | |||
1 | /* | 1 | /* |
2 | * dm-exception-store.c | ||
3 | * | ||
4 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
5 | * Copyright (C) 2006 Red Hat GmbH | 3 | * Copyright (C) 2006-2008 Red Hat GmbH |
6 | * | 4 | * |
7 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
8 | */ | 6 | */ |
9 | 7 | ||
10 | #include "dm-exception-store.h" | 8 | #include "dm-exception-store.h" |
11 | #include "dm-snap.h" | ||
12 | 9 | ||
13 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
14 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
15 | #include <linux/vmalloc.h> | 12 | #include <linux/vmalloc.h> |
16 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
17 | #include <linux/dm-io.h> | 14 | #include <linux/device-mapper.h> |
18 | #include <linux/dm-kcopyd.h> | ||
19 | |||
20 | #define DM_MSG_PREFIX "snapshots" | ||
21 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | ||
22 | |||
23 | /*----------------------------------------------------------------- | ||
24 | * Persistent snapshots, by persistent we mean that the snapshot | ||
25 | * will survive a reboot. | ||
26 | *---------------------------------------------------------------*/ | ||
27 | |||
28 | /* | ||
29 | * We need to store a record of which parts of the origin have | ||
30 | * been copied to the snapshot device. The snapshot code | ||
31 | * requires that we copy exception chunks to chunk aligned areas | ||
32 | * of the COW store. It makes sense therefore, to store the | ||
33 | * metadata in chunk size blocks. | ||
34 | * | ||
35 | * There is no backward or forward compatibility implemented, | ||
36 | * snapshots with different disk versions than the kernel will | ||
37 | * not be usable. It is expected that "lvcreate" will blank out | ||
38 | * the start of a fresh COW device before calling the snapshot | ||
39 | * constructor. | ||
40 | * | ||
41 | * The first chunk of the COW device just contains the header. | ||
42 | * After this there is a chunk filled with exception metadata, | ||
43 | * followed by as many exception chunks as can fit in the | ||
44 | * metadata areas. | ||
45 | * | ||
46 | * All on disk structures are in little-endian format. The end | ||
47 | * of the exceptions info is indicated by an exception with a | ||
48 | * new_chunk of 0, which is invalid since it would point to the | ||
49 | * header chunk. | ||
50 | */ | ||
51 | |||
52 | /* | ||
53 | * Magic for persistent snapshots: "SnAp" - Feeble isn't it. | ||
54 | */ | ||
55 | #define SNAP_MAGIC 0x70416e53 | ||
56 | |||
57 | /* | ||
58 | * The on-disk version of the metadata. | ||
59 | */ | ||
60 | #define SNAPSHOT_DISK_VERSION 1 | ||
61 | |||
62 | struct disk_header { | ||
63 | uint32_t magic; | ||
64 | |||
65 | /* | ||
66 | * Is this snapshot valid. There is no way of recovering | ||
67 | * an invalid snapshot. | ||
68 | */ | ||
69 | uint32_t valid; | ||
70 | |||
71 | /* | ||
72 | * Simple, incrementing version. no backward | ||
73 | * compatibility. | ||
74 | */ | ||
75 | uint32_t version; | ||
76 | |||
77 | /* In sectors */ | ||
78 | uint32_t chunk_size; | ||
79 | }; | ||
80 | |||
81 | struct disk_exception { | ||
82 | uint64_t old_chunk; | ||
83 | uint64_t new_chunk; | ||
84 | }; | ||
85 | |||
86 | struct commit_callback { | ||
87 | void (*callback)(void *, int success); | ||
88 | void *context; | ||
89 | }; | ||
90 | |||
91 | /* | ||
92 | * The top level structure for a persistent exception store. | ||
93 | */ | ||
94 | struct pstore { | ||
95 | struct dm_snapshot *snap; /* up pointer to my snapshot */ | ||
96 | int version; | ||
97 | int valid; | ||
98 | uint32_t exceptions_per_area; | ||
99 | |||
100 | /* | ||
101 | * Now that we have an asynchronous kcopyd there is no | ||
102 | * need for large chunk sizes, so it wont hurt to have a | ||
103 | * whole chunks worth of metadata in memory at once. | ||
104 | */ | ||
105 | void *area; | ||
106 | |||
107 | /* | ||
108 | * An area of zeros used to clear the next area. | ||
109 | */ | ||
110 | void *zero_area; | ||
111 | |||
112 | /* | ||
113 | * Used to keep track of which metadata area the data in | ||
114 | * 'chunk' refers to. | ||
115 | */ | ||
116 | chunk_t current_area; | ||
117 | |||
118 | /* | ||
119 | * The next free chunk for an exception. | ||
120 | */ | ||
121 | chunk_t next_free; | ||
122 | |||
123 | /* | ||
124 | * The index of next free exception in the current | ||
125 | * metadata area. | ||
126 | */ | ||
127 | uint32_t current_committed; | ||
128 | |||
129 | atomic_t pending_count; | ||
130 | uint32_t callback_count; | ||
131 | struct commit_callback *callbacks; | ||
132 | struct dm_io_client *io_client; | ||
133 | |||
134 | struct workqueue_struct *metadata_wq; | ||
135 | }; | ||
136 | |||
137 | static unsigned sectors_to_pages(unsigned sectors) | ||
138 | { | ||
139 | return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); | ||
140 | } | ||
141 | |||
142 | static int alloc_area(struct pstore *ps) | ||
143 | { | ||
144 | int r = -ENOMEM; | ||
145 | size_t len; | ||
146 | |||
147 | len = ps->snap->chunk_size << SECTOR_SHIFT; | ||
148 | |||
149 | /* | ||
150 | * Allocate the chunk_size block of memory that will hold | ||
151 | * a single metadata area. | ||
152 | */ | ||
153 | ps->area = vmalloc(len); | ||
154 | if (!ps->area) | ||
155 | return r; | ||
156 | |||
157 | ps->zero_area = vmalloc(len); | ||
158 | if (!ps->zero_area) { | ||
159 | vfree(ps->area); | ||
160 | return r; | ||
161 | } | ||
162 | memset(ps->zero_area, 0, len); | ||
163 | |||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static void free_area(struct pstore *ps) | ||
168 | { | ||
169 | vfree(ps->area); | ||
170 | ps->area = NULL; | ||
171 | vfree(ps->zero_area); | ||
172 | ps->zero_area = NULL; | ||
173 | } | ||
174 | |||
175 | struct mdata_req { | ||
176 | struct dm_io_region *where; | ||
177 | struct dm_io_request *io_req; | ||
178 | struct work_struct work; | ||
179 | int result; | ||
180 | }; | ||
181 | |||
182 | static void do_metadata(struct work_struct *work) | ||
183 | { | ||
184 | struct mdata_req *req = container_of(work, struct mdata_req, work); | ||
185 | |||
186 | req->result = dm_io(req->io_req, 1, req->where, NULL); | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Read or write a chunk aligned and sized block of data from a device. | ||
191 | */ | ||
192 | static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) | ||
193 | { | ||
194 | struct dm_io_region where = { | ||
195 | .bdev = ps->snap->cow->bdev, | ||
196 | .sector = ps->snap->chunk_size * chunk, | ||
197 | .count = ps->snap->chunk_size, | ||
198 | }; | ||
199 | struct dm_io_request io_req = { | ||
200 | .bi_rw = rw, | ||
201 | .mem.type = DM_IO_VMA, | ||
202 | .mem.ptr.vma = ps->area, | ||
203 | .client = ps->io_client, | ||
204 | .notify.fn = NULL, | ||
205 | }; | ||
206 | struct mdata_req req; | ||
207 | |||
208 | if (!metadata) | ||
209 | return dm_io(&io_req, 1, &where, NULL); | ||
210 | |||
211 | req.where = &where; | ||
212 | req.io_req = &io_req; | ||
213 | |||
214 | /* | ||
215 | * Issue the synchronous I/O from a different thread | ||
216 | * to avoid generic_make_request recursion. | ||
217 | */ | ||
218 | INIT_WORK(&req.work, do_metadata); | ||
219 | queue_work(ps->metadata_wq, &req.work); | ||
220 | flush_workqueue(ps->metadata_wq); | ||
221 | |||
222 | return req.result; | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Convert a metadata area index to a chunk index. | ||
227 | */ | ||
228 | static chunk_t area_location(struct pstore *ps, chunk_t area) | ||
229 | { | ||
230 | return 1 + ((ps->exceptions_per_area + 1) * area); | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * Read or write a metadata area. Remembering to skip the first | ||
235 | * chunk which holds the header. | ||
236 | */ | ||
237 | static int area_io(struct pstore *ps, int rw) | ||
238 | { | ||
239 | int r; | ||
240 | chunk_t chunk; | ||
241 | |||
242 | chunk = area_location(ps, ps->current_area); | ||
243 | |||
244 | r = chunk_io(ps, chunk, rw, 0); | ||
245 | if (r) | ||
246 | return r; | ||
247 | |||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | static void zero_memory_area(struct pstore *ps) | ||
252 | { | ||
253 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
254 | } | ||
255 | |||
256 | static int zero_disk_area(struct pstore *ps, chunk_t area) | ||
257 | { | ||
258 | struct dm_io_region where = { | ||
259 | .bdev = ps->snap->cow->bdev, | ||
260 | .sector = ps->snap->chunk_size * area_location(ps, area), | ||
261 | .count = ps->snap->chunk_size, | ||
262 | }; | ||
263 | struct dm_io_request io_req = { | ||
264 | .bi_rw = WRITE, | ||
265 | .mem.type = DM_IO_VMA, | ||
266 | .mem.ptr.vma = ps->zero_area, | ||
267 | .client = ps->io_client, | ||
268 | .notify.fn = NULL, | ||
269 | }; | ||
270 | |||
271 | return dm_io(&io_req, 1, &where, NULL); | ||
272 | } | ||
273 | |||
274 | static int read_header(struct pstore *ps, int *new_snapshot) | ||
275 | { | ||
276 | int r; | ||
277 | struct disk_header *dh; | ||
278 | chunk_t chunk_size; | ||
279 | int chunk_size_supplied = 1; | ||
280 | |||
281 | /* | ||
282 | * Use default chunk size (or hardsect_size, if larger) if none supplied | ||
283 | */ | ||
284 | if (!ps->snap->chunk_size) { | ||
285 | ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, | ||
286 | bdev_hardsect_size(ps->snap->cow->bdev) >> 9); | ||
287 | ps->snap->chunk_mask = ps->snap->chunk_size - 1; | ||
288 | ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; | ||
289 | chunk_size_supplied = 0; | ||
290 | } | ||
291 | |||
292 | ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> | ||
293 | chunk_size)); | ||
294 | if (IS_ERR(ps->io_client)) | ||
295 | return PTR_ERR(ps->io_client); | ||
296 | |||
297 | r = alloc_area(ps); | ||
298 | if (r) | ||
299 | return r; | ||
300 | |||
301 | r = chunk_io(ps, 0, READ, 1); | ||
302 | if (r) | ||
303 | goto bad; | ||
304 | |||
305 | dh = (struct disk_header *) ps->area; | ||
306 | |||
307 | if (le32_to_cpu(dh->magic) == 0) { | ||
308 | *new_snapshot = 1; | ||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { | ||
313 | DMWARN("Invalid or corrupt snapshot"); | ||
314 | r = -ENXIO; | ||
315 | goto bad; | ||
316 | } | ||
317 | |||
318 | *new_snapshot = 0; | ||
319 | ps->valid = le32_to_cpu(dh->valid); | ||
320 | ps->version = le32_to_cpu(dh->version); | ||
321 | chunk_size = le32_to_cpu(dh->chunk_size); | ||
322 | |||
323 | if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) | ||
324 | return 0; | ||
325 | |||
326 | DMWARN("chunk size %llu in device metadata overrides " | ||
327 | "table chunk size of %llu.", | ||
328 | (unsigned long long)chunk_size, | ||
329 | (unsigned long long)ps->snap->chunk_size); | ||
330 | |||
331 | /* We had a bogus chunk_size. Fix stuff up. */ | ||
332 | free_area(ps); | ||
333 | |||
334 | ps->snap->chunk_size = chunk_size; | ||
335 | ps->snap->chunk_mask = chunk_size - 1; | ||
336 | ps->snap->chunk_shift = ffs(chunk_size) - 1; | ||
337 | |||
338 | r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), | ||
339 | ps->io_client); | ||
340 | if (r) | ||
341 | return r; | ||
342 | |||
343 | r = alloc_area(ps); | ||
344 | return r; | ||
345 | |||
346 | bad: | ||
347 | free_area(ps); | ||
348 | return r; | ||
349 | } | ||
350 | |||
351 | static int write_header(struct pstore *ps) | ||
352 | { | ||
353 | struct disk_header *dh; | ||
354 | |||
355 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
356 | |||
357 | dh = (struct disk_header *) ps->area; | ||
358 | dh->magic = cpu_to_le32(SNAP_MAGIC); | ||
359 | dh->valid = cpu_to_le32(ps->valid); | ||
360 | dh->version = cpu_to_le32(ps->version); | ||
361 | dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); | ||
362 | |||
363 | return chunk_io(ps, 0, WRITE, 1); | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * Access functions for the disk exceptions, these do the endian conversions. | ||
368 | */ | ||
369 | static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | ||
370 | { | ||
371 | BUG_ON(index >= ps->exceptions_per_area); | ||
372 | |||
373 | return ((struct disk_exception *) ps->area) + index; | ||
374 | } | ||
375 | 15 | ||
376 | static void read_exception(struct pstore *ps, | 16 | #define DM_MSG_PREFIX "snapshot exception stores" |
377 | uint32_t index, struct disk_exception *result) | ||
378 | { | ||
379 | struct disk_exception *e = get_exception(ps, index); | ||
380 | |||
381 | /* copy it */ | ||
382 | result->old_chunk = le64_to_cpu(e->old_chunk); | ||
383 | result->new_chunk = le64_to_cpu(e->new_chunk); | ||
384 | } | ||
385 | |||
386 | static void write_exception(struct pstore *ps, | ||
387 | uint32_t index, struct disk_exception *de) | ||
388 | { | ||
389 | struct disk_exception *e = get_exception(ps, index); | ||
390 | |||
391 | /* copy it */ | ||
392 | e->old_chunk = cpu_to_le64(de->old_chunk); | ||
393 | e->new_chunk = cpu_to_le64(de->new_chunk); | ||
394 | } | ||
395 | 17 | ||
396 | /* | 18 | int dm_exception_store_init(void) |
397 | * Registers the exceptions that are present in the current area. | ||
398 | * 'full' is filled in to indicate if the area has been | ||
399 | * filled. | ||
400 | */ | ||
401 | static int insert_exceptions(struct pstore *ps, int *full) | ||
402 | { | 19 | { |
403 | int r; | 20 | int r; |
404 | unsigned int i; | ||
405 | struct disk_exception de; | ||
406 | |||
407 | /* presume the area is full */ | ||
408 | *full = 1; | ||
409 | |||
410 | for (i = 0; i < ps->exceptions_per_area; i++) { | ||
411 | read_exception(ps, i, &de); | ||
412 | |||
413 | /* | ||
414 | * If the new_chunk is pointing at the start of | ||
415 | * the COW device, where the first metadata area | ||
416 | * is we know that we've hit the end of the | ||
417 | * exceptions. Therefore the area is not full. | ||
418 | */ | ||
419 | if (de.new_chunk == 0LL) { | ||
420 | ps->current_committed = i; | ||
421 | *full = 0; | ||
422 | break; | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * Keep track of the start of the free chunks. | ||
427 | */ | ||
428 | if (ps->next_free <= de.new_chunk) | ||
429 | ps->next_free = de.new_chunk + 1; | ||
430 | |||
431 | /* | ||
432 | * Otherwise we add the exception to the snapshot. | ||
433 | */ | ||
434 | r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); | ||
435 | if (r) | ||
436 | return r; | ||
437 | } | ||
438 | |||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | static int read_exceptions(struct pstore *ps) | ||
443 | { | ||
444 | int r, full = 1; | ||
445 | |||
446 | /* | ||
447 | * Keeping reading chunks and inserting exceptions until | ||
448 | * we find a partially full area. | ||
449 | */ | ||
450 | for (ps->current_area = 0; full; ps->current_area++) { | ||
451 | r = area_io(ps, READ); | ||
452 | if (r) | ||
453 | return r; | ||
454 | 21 | ||
455 | r = insert_exceptions(ps, &full); | 22 | r = dm_transient_snapshot_init(); |
456 | if (r) | 23 | if (r) { |
457 | return r; | 24 | DMERR("Unable to register transient exception store type."); |
25 | goto transient_fail; | ||
458 | } | 26 | } |
459 | 27 | ||
460 | ps->current_area--; | 28 | r = dm_persistent_snapshot_init(); |
461 | 29 | if (r) { | |
462 | return 0; | 30 | DMERR("Unable to register persistent exception store type"); |
463 | } | 31 | goto persistent_fail; |
464 | |||
465 | static struct pstore *get_info(struct dm_exception_store *store) | ||
466 | { | ||
467 | return (struct pstore *) store->context; | ||
468 | } | ||
469 | |||
470 | static void persistent_fraction_full(struct dm_exception_store *store, | ||
471 | sector_t *numerator, sector_t *denominator) | ||
472 | { | ||
473 | *numerator = get_info(store)->next_free * store->snap->chunk_size; | ||
474 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
475 | } | ||
476 | |||
477 | static void persistent_destroy(struct dm_exception_store *store) | ||
478 | { | ||
479 | struct pstore *ps = get_info(store); | ||
480 | |||
481 | destroy_workqueue(ps->metadata_wq); | ||
482 | dm_io_client_destroy(ps->io_client); | ||
483 | vfree(ps->callbacks); | ||
484 | free_area(ps); | ||
485 | kfree(ps); | ||
486 | } | ||
487 | |||
488 | static int persistent_read_metadata(struct dm_exception_store *store) | ||
489 | { | ||
490 | int r, uninitialized_var(new_snapshot); | ||
491 | struct pstore *ps = get_info(store); | ||
492 | |||
493 | /* | ||
494 | * Read the snapshot header. | ||
495 | */ | ||
496 | r = read_header(ps, &new_snapshot); | ||
497 | if (r) | ||
498 | return r; | ||
499 | |||
500 | /* | ||
501 | * Now we know correct chunk_size, complete the initialisation. | ||
502 | */ | ||
503 | ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / | ||
504 | sizeof(struct disk_exception); | ||
505 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | ||
506 | sizeof(*ps->callbacks)); | ||
507 | if (!ps->callbacks) | ||
508 | return -ENOMEM; | ||
509 | |||
510 | /* | ||
511 | * Do we need to setup a new snapshot ? | ||
512 | */ | ||
513 | if (new_snapshot) { | ||
514 | r = write_header(ps); | ||
515 | if (r) { | ||
516 | DMWARN("write_header failed"); | ||
517 | return r; | ||
518 | } | ||
519 | |||
520 | ps->current_area = 0; | ||
521 | zero_memory_area(ps); | ||
522 | r = zero_disk_area(ps, 0); | ||
523 | if (r) { | ||
524 | DMWARN("zero_disk_area(0) failed"); | ||
525 | return r; | ||
526 | } | ||
527 | } else { | ||
528 | /* | ||
529 | * Sanity checks. | ||
530 | */ | ||
531 | if (ps->version != SNAPSHOT_DISK_VERSION) { | ||
532 | DMWARN("unable to handle snapshot disk version %d", | ||
533 | ps->version); | ||
534 | return -EINVAL; | ||
535 | } | ||
536 | |||
537 | /* | ||
538 | * Metadata are valid, but snapshot is invalidated | ||
539 | */ | ||
540 | if (!ps->valid) | ||
541 | return 1; | ||
542 | |||
543 | /* | ||
544 | * Read the metadata. | ||
545 | */ | ||
546 | r = read_exceptions(ps); | ||
547 | if (r) | ||
548 | return r; | ||
549 | } | 32 | } |
550 | 33 | ||
551 | return 0; | 34 | return 0; |
552 | } | ||
553 | |||
554 | static int persistent_prepare(struct dm_exception_store *store, | ||
555 | struct dm_snap_exception *e) | ||
556 | { | ||
557 | struct pstore *ps = get_info(store); | ||
558 | uint32_t stride; | ||
559 | chunk_t next_free; | ||
560 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
561 | |||
562 | /* Is there enough room ? */ | ||
563 | if (size < ((ps->next_free + 1) * store->snap->chunk_size)) | ||
564 | return -ENOSPC; | ||
565 | 35 | ||
566 | e->new_chunk = ps->next_free; | 36 | persistent_fail: |
567 | 37 | dm_persistent_snapshot_exit(); | |
568 | /* | 38 | transient_fail: |
569 | * Move onto the next free pending, making sure to take | 39 | return r; |
570 | * into account the location of the metadata chunks. | ||
571 | */ | ||
572 | stride = (ps->exceptions_per_area + 1); | ||
573 | next_free = ++ps->next_free; | ||
574 | if (sector_div(next_free, stride) == 1) | ||
575 | ps->next_free++; | ||
576 | |||
577 | atomic_inc(&ps->pending_count); | ||
578 | return 0; | ||
579 | } | ||
580 | |||
581 | static void persistent_commit(struct dm_exception_store *store, | ||
582 | struct dm_snap_exception *e, | ||
583 | void (*callback) (void *, int success), | ||
584 | void *callback_context) | ||
585 | { | ||
586 | unsigned int i; | ||
587 | struct pstore *ps = get_info(store); | ||
588 | struct disk_exception de; | ||
589 | struct commit_callback *cb; | ||
590 | |||
591 | de.old_chunk = e->old_chunk; | ||
592 | de.new_chunk = e->new_chunk; | ||
593 | write_exception(ps, ps->current_committed++, &de); | ||
594 | |||
595 | /* | ||
596 | * Add the callback to the back of the array. This code | ||
597 | * is the only place where the callback array is | ||
598 | * manipulated, and we know that it will never be called | ||
599 | * multiple times concurrently. | ||
600 | */ | ||
601 | cb = ps->callbacks + ps->callback_count++; | ||
602 | cb->callback = callback; | ||
603 | cb->context = callback_context; | ||
604 | |||
605 | /* | ||
606 | * If there are exceptions in flight and we have not yet | ||
607 | * filled this metadata area there's nothing more to do. | ||
608 | */ | ||
609 | if (!atomic_dec_and_test(&ps->pending_count) && | ||
610 | (ps->current_committed != ps->exceptions_per_area)) | ||
611 | return; | ||
612 | |||
613 | /* | ||
614 | * If we completely filled the current area, then wipe the next one. | ||
615 | */ | ||
616 | if ((ps->current_committed == ps->exceptions_per_area) && | ||
617 | zero_disk_area(ps, ps->current_area + 1)) | ||
618 | ps->valid = 0; | ||
619 | |||
620 | /* | ||
621 | * Commit exceptions to disk. | ||
622 | */ | ||
623 | if (ps->valid && area_io(ps, WRITE)) | ||
624 | ps->valid = 0; | ||
625 | |||
626 | /* | ||
627 | * Advance to the next area if this one is full. | ||
628 | */ | ||
629 | if (ps->current_committed == ps->exceptions_per_area) { | ||
630 | ps->current_committed = 0; | ||
631 | ps->current_area++; | ||
632 | zero_memory_area(ps); | ||
633 | } | ||
634 | |||
635 | for (i = 0; i < ps->callback_count; i++) { | ||
636 | cb = ps->callbacks + i; | ||
637 | cb->callback(cb->context, ps->valid); | ||
638 | } | ||
639 | |||
640 | ps->callback_count = 0; | ||
641 | } | ||
642 | |||
643 | static void persistent_drop(struct dm_exception_store *store) | ||
644 | { | ||
645 | struct pstore *ps = get_info(store); | ||
646 | |||
647 | ps->valid = 0; | ||
648 | if (write_header(ps)) | ||
649 | DMWARN("write header failed"); | ||
650 | } | ||
651 | |||
652 | int dm_create_persistent(struct dm_exception_store *store) | ||
653 | { | ||
654 | struct pstore *ps; | ||
655 | |||
656 | /* allocate the pstore */ | ||
657 | ps = kmalloc(sizeof(*ps), GFP_KERNEL); | ||
658 | if (!ps) | ||
659 | return -ENOMEM; | ||
660 | |||
661 | ps->snap = store->snap; | ||
662 | ps->valid = 1; | ||
663 | ps->version = SNAPSHOT_DISK_VERSION; | ||
664 | ps->area = NULL; | ||
665 | ps->next_free = 2; /* skipping the header and first area */ | ||
666 | ps->current_committed = 0; | ||
667 | |||
668 | ps->callback_count = 0; | ||
669 | atomic_set(&ps->pending_count, 0); | ||
670 | ps->callbacks = NULL; | ||
671 | |||
672 | ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); | ||
673 | if (!ps->metadata_wq) { | ||
674 | kfree(ps); | ||
675 | DMERR("couldn't start header metadata update thread"); | ||
676 | return -ENOMEM; | ||
677 | } | ||
678 | |||
679 | store->destroy = persistent_destroy; | ||
680 | store->read_metadata = persistent_read_metadata; | ||
681 | store->prepare_exception = persistent_prepare; | ||
682 | store->commit_exception = persistent_commit; | ||
683 | store->drop_snapshot = persistent_drop; | ||
684 | store->fraction_full = persistent_fraction_full; | ||
685 | store->context = ps; | ||
686 | |||
687 | return 0; | ||
688 | } | ||
689 | |||
690 | /*----------------------------------------------------------------- | ||
691 | * Implementation of the store for non-persistent snapshots. | ||
692 | *---------------------------------------------------------------*/ | ||
693 | struct transient_c { | ||
694 | sector_t next_free; | ||
695 | }; | ||
696 | |||
697 | static void transient_destroy(struct dm_exception_store *store) | ||
698 | { | ||
699 | kfree(store->context); | ||
700 | } | ||
701 | |||
702 | static int transient_read_metadata(struct dm_exception_store *store) | ||
703 | { | ||
704 | return 0; | ||
705 | } | ||
706 | |||
707 | static int transient_prepare(struct dm_exception_store *store, | ||
708 | struct dm_snap_exception *e) | ||
709 | { | ||
710 | struct transient_c *tc = (struct transient_c *) store->context; | ||
711 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
712 | |||
713 | if (size < (tc->next_free + store->snap->chunk_size)) | ||
714 | return -1; | ||
715 | |||
716 | e->new_chunk = sector_to_chunk(store->snap, tc->next_free); | ||
717 | tc->next_free += store->snap->chunk_size; | ||
718 | |||
719 | return 0; | ||
720 | } | ||
721 | |||
722 | static void transient_commit(struct dm_exception_store *store, | ||
723 | struct dm_snap_exception *e, | ||
724 | void (*callback) (void *, int success), | ||
725 | void *callback_context) | ||
726 | { | ||
727 | /* Just succeed */ | ||
728 | callback(callback_context, 1); | ||
729 | } | ||
730 | |||
731 | static void transient_fraction_full(struct dm_exception_store *store, | ||
732 | sector_t *numerator, sector_t *denominator) | ||
733 | { | ||
734 | *numerator = ((struct transient_c *) store->context)->next_free; | ||
735 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
736 | } | 40 | } |
737 | 41 | ||
738 | int dm_create_transient(struct dm_exception_store *store) | 42 | void dm_exception_store_exit(void) |
739 | { | 43 | { |
740 | struct transient_c *tc; | 44 | dm_persistent_snapshot_exit(); |
741 | 45 | dm_transient_snapshot_exit(); | |
742 | store->destroy = transient_destroy; | ||
743 | store->read_metadata = transient_read_metadata; | ||
744 | store->prepare_exception = transient_prepare; | ||
745 | store->commit_exception = transient_commit; | ||
746 | store->drop_snapshot = NULL; | ||
747 | store->fraction_full = transient_fraction_full; | ||
748 | |||
749 | tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); | ||
750 | if (!tc) | ||
751 | return -ENOMEM; | ||
752 | |||
753 | tc->next_free = 0; | ||
754 | store->context = tc; | ||
755 | |||
756 | return 0; | ||
757 | } | 46 | } |
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 25677df8dd59..78d1acec77e9 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h | |||
@@ -122,9 +122,18 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | |||
122 | 122 | ||
123 | # endif | 123 | # endif |
124 | 124 | ||
125 | int dm_exception_store_init(void); | ||
126 | void dm_exception_store_exit(void); | ||
127 | |||
125 | /* | 128 | /* |
126 | * Two exception store implementations. | 129 | * Two exception store implementations. |
127 | */ | 130 | */ |
131 | int dm_persistent_snapshot_init(void); | ||
132 | void dm_persistent_snapshot_exit(void); | ||
133 | |||
134 | int dm_transient_snapshot_init(void); | ||
135 | void dm_transient_snapshot_exit(void); | ||
136 | |||
128 | int dm_create_persistent(struct dm_exception_store *store); | 137 | int dm_create_persistent(struct dm_exception_store *store); |
129 | 138 | ||
130 | int dm_create_transient(struct dm_exception_store *store); | 139 | int dm_create_transient(struct dm_exception_store *store); |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c new file mode 100644 index 000000000000..57c946c69ee7 --- /dev/null +++ b/drivers/md/dm-snap-persistent.c | |||
@@ -0,0 +1,694 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
3 | * Copyright (C) 2006-2008 Red Hat GmbH | ||
4 | * | ||
5 | * This file is released under the GPL. | ||
6 | */ | ||
7 | |||
8 | #include "dm-exception-store.h" | ||
9 | #include "dm-snap.h" | ||
10 | |||
11 | #include <linux/mm.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/vmalloc.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/dm-io.h> | ||
16 | |||
17 | #define DM_MSG_PREFIX "persistent snapshot" | ||
18 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | ||
19 | |||
20 | /*----------------------------------------------------------------- | ||
21 | * Persistent snapshots, by persistent we mean that the snapshot | ||
22 | * will survive a reboot. | ||
23 | *---------------------------------------------------------------*/ | ||
24 | |||
25 | /* | ||
26 | * We need to store a record of which parts of the origin have | ||
27 | * been copied to the snapshot device. The snapshot code | ||
28 | * requires that we copy exception chunks to chunk aligned areas | ||
29 | * of the COW store. It makes sense therefore, to store the | ||
30 | * metadata in chunk size blocks. | ||
31 | * | ||
32 | * There is no backward or forward compatibility implemented, | ||
33 | * snapshots with different disk versions than the kernel will | ||
34 | * not be usable. It is expected that "lvcreate" will blank out | ||
35 | * the start of a fresh COW device before calling the snapshot | ||
36 | * constructor. | ||
37 | * | ||
38 | * The first chunk of the COW device just contains the header. | ||
39 | * After this there is a chunk filled with exception metadata, | ||
40 | * followed by as many exception chunks as can fit in the | ||
41 | * metadata areas. | ||
42 | * | ||
43 | * All on disk structures are in little-endian format. The end | ||
44 | * of the exceptions info is indicated by an exception with a | ||
45 | * new_chunk of 0, which is invalid since it would point to the | ||
46 | * header chunk. | ||
47 | */ | ||
48 | |||
49 | /* | ||
50 | * Magic for persistent snapshots: "SnAp" - Feeble isn't it. | ||
51 | */ | ||
52 | #define SNAP_MAGIC 0x70416e53 | ||
53 | |||
54 | /* | ||
55 | * The on-disk version of the metadata. | ||
56 | */ | ||
57 | #define SNAPSHOT_DISK_VERSION 1 | ||
58 | |||
59 | struct disk_header { | ||
60 | uint32_t magic; | ||
61 | |||
62 | /* | ||
63 | * Is this snapshot valid. There is no way of recovering | ||
64 | * an invalid snapshot. | ||
65 | */ | ||
66 | uint32_t valid; | ||
67 | |||
68 | /* | ||
69 | * Simple, incrementing version. no backward | ||
70 | * compatibility. | ||
71 | */ | ||
72 | uint32_t version; | ||
73 | |||
74 | /* In sectors */ | ||
75 | uint32_t chunk_size; | ||
76 | }; | ||
77 | |||
78 | struct disk_exception { | ||
79 | uint64_t old_chunk; | ||
80 | uint64_t new_chunk; | ||
81 | }; | ||
82 | |||
83 | struct commit_callback { | ||
84 | void (*callback)(void *, int success); | ||
85 | void *context; | ||
86 | }; | ||
87 | |||
88 | /* | ||
89 | * The top level structure for a persistent exception store. | ||
90 | */ | ||
91 | struct pstore { | ||
92 | struct dm_snapshot *snap; /* up pointer to my snapshot */ | ||
93 | int version; | ||
94 | int valid; | ||
95 | uint32_t exceptions_per_area; | ||
96 | |||
97 | /* | ||
98 | * Now that we have an asynchronous kcopyd there is no | ||
99 | * need for large chunk sizes, so it wont hurt to have a | ||
100 | * whole chunks worth of metadata in memory at once. | ||
101 | */ | ||
102 | void *area; | ||
103 | |||
104 | /* | ||
105 | * An area of zeros used to clear the next area. | ||
106 | */ | ||
107 | void *zero_area; | ||
108 | |||
109 | /* | ||
110 | * Used to keep track of which metadata area the data in | ||
111 | * 'chunk' refers to. | ||
112 | */ | ||
113 | chunk_t current_area; | ||
114 | |||
115 | /* | ||
116 | * The next free chunk for an exception. | ||
117 | */ | ||
118 | chunk_t next_free; | ||
119 | |||
120 | /* | ||
121 | * The index of next free exception in the current | ||
122 | * metadata area. | ||
123 | */ | ||
124 | uint32_t current_committed; | ||
125 | |||
126 | atomic_t pending_count; | ||
127 | uint32_t callback_count; | ||
128 | struct commit_callback *callbacks; | ||
129 | struct dm_io_client *io_client; | ||
130 | |||
131 | struct workqueue_struct *metadata_wq; | ||
132 | }; | ||
133 | |||
134 | static unsigned sectors_to_pages(unsigned sectors) | ||
135 | { | ||
136 | return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); | ||
137 | } | ||
138 | |||
139 | static int alloc_area(struct pstore *ps) | ||
140 | { | ||
141 | int r = -ENOMEM; | ||
142 | size_t len; | ||
143 | |||
144 | len = ps->snap->chunk_size << SECTOR_SHIFT; | ||
145 | |||
146 | /* | ||
147 | * Allocate the chunk_size block of memory that will hold | ||
148 | * a single metadata area. | ||
149 | */ | ||
150 | ps->area = vmalloc(len); | ||
151 | if (!ps->area) | ||
152 | return r; | ||
153 | |||
154 | ps->zero_area = vmalloc(len); | ||
155 | if (!ps->zero_area) { | ||
156 | vfree(ps->area); | ||
157 | return r; | ||
158 | } | ||
159 | memset(ps->zero_area, 0, len); | ||
160 | |||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | static void free_area(struct pstore *ps) | ||
165 | { | ||
166 | vfree(ps->area); | ||
167 | ps->area = NULL; | ||
168 | vfree(ps->zero_area); | ||
169 | ps->zero_area = NULL; | ||
170 | } | ||
171 | |||
172 | struct mdata_req { | ||
173 | struct dm_io_region *where; | ||
174 | struct dm_io_request *io_req; | ||
175 | struct work_struct work; | ||
176 | int result; | ||
177 | }; | ||
178 | |||
179 | static void do_metadata(struct work_struct *work) | ||
180 | { | ||
181 | struct mdata_req *req = container_of(work, struct mdata_req, work); | ||
182 | |||
183 | req->result = dm_io(req->io_req, 1, req->where, NULL); | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Read or write a chunk aligned and sized block of data from a device. | ||
188 | */ | ||
189 | static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) | ||
190 | { | ||
191 | struct dm_io_region where = { | ||
192 | .bdev = ps->snap->cow->bdev, | ||
193 | .sector = ps->snap->chunk_size * chunk, | ||
194 | .count = ps->snap->chunk_size, | ||
195 | }; | ||
196 | struct dm_io_request io_req = { | ||
197 | .bi_rw = rw, | ||
198 | .mem.type = DM_IO_VMA, | ||
199 | .mem.ptr.vma = ps->area, | ||
200 | .client = ps->io_client, | ||
201 | .notify.fn = NULL, | ||
202 | }; | ||
203 | struct mdata_req req; | ||
204 | |||
205 | if (!metadata) | ||
206 | return dm_io(&io_req, 1, &where, NULL); | ||
207 | |||
208 | req.where = &where; | ||
209 | req.io_req = &io_req; | ||
210 | |||
211 | /* | ||
212 | * Issue the synchronous I/O from a different thread | ||
213 | * to avoid generic_make_request recursion. | ||
214 | */ | ||
215 | INIT_WORK(&req.work, do_metadata); | ||
216 | queue_work(ps->metadata_wq, &req.work); | ||
217 | flush_workqueue(ps->metadata_wq); | ||
218 | |||
219 | return req.result; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Convert a metadata area index to a chunk index. | ||
224 | */ | ||
225 | static chunk_t area_location(struct pstore *ps, chunk_t area) | ||
226 | { | ||
227 | return 1 + ((ps->exceptions_per_area + 1) * area); | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Read or write a metadata area. Remembering to skip the first | ||
232 | * chunk which holds the header. | ||
233 | */ | ||
234 | static int area_io(struct pstore *ps, int rw) | ||
235 | { | ||
236 | int r; | ||
237 | chunk_t chunk; | ||
238 | |||
239 | chunk = area_location(ps, ps->current_area); | ||
240 | |||
241 | r = chunk_io(ps, chunk, rw, 0); | ||
242 | if (r) | ||
243 | return r; | ||
244 | |||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | static void zero_memory_area(struct pstore *ps) | ||
249 | { | ||
250 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
251 | } | ||
252 | |||
253 | static int zero_disk_area(struct pstore *ps, chunk_t area) | ||
254 | { | ||
255 | struct dm_io_region where = { | ||
256 | .bdev = ps->snap->cow->bdev, | ||
257 | .sector = ps->snap->chunk_size * area_location(ps, area), | ||
258 | .count = ps->snap->chunk_size, | ||
259 | }; | ||
260 | struct dm_io_request io_req = { | ||
261 | .bi_rw = WRITE, | ||
262 | .mem.type = DM_IO_VMA, | ||
263 | .mem.ptr.vma = ps->zero_area, | ||
264 | .client = ps->io_client, | ||
265 | .notify.fn = NULL, | ||
266 | }; | ||
267 | |||
268 | return dm_io(&io_req, 1, &where, NULL); | ||
269 | } | ||
270 | |||
271 | static int read_header(struct pstore *ps, int *new_snapshot) | ||
272 | { | ||
273 | int r; | ||
274 | struct disk_header *dh; | ||
275 | chunk_t chunk_size; | ||
276 | int chunk_size_supplied = 1; | ||
277 | |||
278 | /* | ||
279 | * Use default chunk size (or hardsect_size, if larger) if none supplied | ||
280 | */ | ||
281 | if (!ps->snap->chunk_size) { | ||
282 | ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, | ||
283 | bdev_hardsect_size(ps->snap->cow->bdev) >> 9); | ||
284 | ps->snap->chunk_mask = ps->snap->chunk_size - 1; | ||
285 | ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; | ||
286 | chunk_size_supplied = 0; | ||
287 | } | ||
288 | |||
289 | ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> | ||
290 | chunk_size)); | ||
291 | if (IS_ERR(ps->io_client)) | ||
292 | return PTR_ERR(ps->io_client); | ||
293 | |||
294 | r = alloc_area(ps); | ||
295 | if (r) | ||
296 | return r; | ||
297 | |||
298 | r = chunk_io(ps, 0, READ, 1); | ||
299 | if (r) | ||
300 | goto bad; | ||
301 | |||
302 | dh = (struct disk_header *) ps->area; | ||
303 | |||
304 | if (le32_to_cpu(dh->magic) == 0) { | ||
305 | *new_snapshot = 1; | ||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { | ||
310 | DMWARN("Invalid or corrupt snapshot"); | ||
311 | r = -ENXIO; | ||
312 | goto bad; | ||
313 | } | ||
314 | |||
315 | *new_snapshot = 0; | ||
316 | ps->valid = le32_to_cpu(dh->valid); | ||
317 | ps->version = le32_to_cpu(dh->version); | ||
318 | chunk_size = le32_to_cpu(dh->chunk_size); | ||
319 | |||
320 | if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) | ||
321 | return 0; | ||
322 | |||
323 | DMWARN("chunk size %llu in device metadata overrides " | ||
324 | "table chunk size of %llu.", | ||
325 | (unsigned long long)chunk_size, | ||
326 | (unsigned long long)ps->snap->chunk_size); | ||
327 | |||
328 | /* We had a bogus chunk_size. Fix stuff up. */ | ||
329 | free_area(ps); | ||
330 | |||
331 | ps->snap->chunk_size = chunk_size; | ||
332 | ps->snap->chunk_mask = chunk_size - 1; | ||
333 | ps->snap->chunk_shift = ffs(chunk_size) - 1; | ||
334 | |||
335 | r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), | ||
336 | ps->io_client); | ||
337 | if (r) | ||
338 | return r; | ||
339 | |||
340 | r = alloc_area(ps); | ||
341 | return r; | ||
342 | |||
343 | bad: | ||
344 | free_area(ps); | ||
345 | return r; | ||
346 | } | ||
347 | |||
348 | static int write_header(struct pstore *ps) | ||
349 | { | ||
350 | struct disk_header *dh; | ||
351 | |||
352 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
353 | |||
354 | dh = (struct disk_header *) ps->area; | ||
355 | dh->magic = cpu_to_le32(SNAP_MAGIC); | ||
356 | dh->valid = cpu_to_le32(ps->valid); | ||
357 | dh->version = cpu_to_le32(ps->version); | ||
358 | dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); | ||
359 | |||
360 | return chunk_io(ps, 0, WRITE, 1); | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * Access functions for the disk exceptions, these do the endian conversions. | ||
365 | */ | ||
366 | static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | ||
367 | { | ||
368 | BUG_ON(index >= ps->exceptions_per_area); | ||
369 | |||
370 | return ((struct disk_exception *) ps->area) + index; | ||
371 | } | ||
372 | |||
373 | static void read_exception(struct pstore *ps, | ||
374 | uint32_t index, struct disk_exception *result) | ||
375 | { | ||
376 | struct disk_exception *e = get_exception(ps, index); | ||
377 | |||
378 | /* copy it */ | ||
379 | result->old_chunk = le64_to_cpu(e->old_chunk); | ||
380 | result->new_chunk = le64_to_cpu(e->new_chunk); | ||
381 | } | ||
382 | |||
383 | static void write_exception(struct pstore *ps, | ||
384 | uint32_t index, struct disk_exception *de) | ||
385 | { | ||
386 | struct disk_exception *e = get_exception(ps, index); | ||
387 | |||
388 | /* copy it */ | ||
389 | e->old_chunk = cpu_to_le64(de->old_chunk); | ||
390 | e->new_chunk = cpu_to_le64(de->new_chunk); | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * Registers the exceptions that are present in the current area. | ||
395 | * 'full' is filled in to indicate if the area has been | ||
396 | * filled. | ||
397 | */ | ||
398 | static int insert_exceptions(struct pstore *ps, int *full) | ||
399 | { | ||
400 | int r; | ||
401 | unsigned int i; | ||
402 | struct disk_exception de; | ||
403 | |||
404 | /* presume the area is full */ | ||
405 | *full = 1; | ||
406 | |||
407 | for (i = 0; i < ps->exceptions_per_area; i++) { | ||
408 | read_exception(ps, i, &de); | ||
409 | |||
410 | /* | ||
411 | * If the new_chunk is pointing at the start of | ||
412 | * the COW device, where the first metadata area | ||
413 | * is we know that we've hit the end of the | ||
414 | * exceptions. Therefore the area is not full. | ||
415 | */ | ||
416 | if (de.new_chunk == 0LL) { | ||
417 | ps->current_committed = i; | ||
418 | *full = 0; | ||
419 | break; | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * Keep track of the start of the free chunks. | ||
424 | */ | ||
425 | if (ps->next_free <= de.new_chunk) | ||
426 | ps->next_free = de.new_chunk + 1; | ||
427 | |||
428 | /* | ||
429 | * Otherwise we add the exception to the snapshot. | ||
430 | */ | ||
431 | r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); | ||
432 | if (r) | ||
433 | return r; | ||
434 | } | ||
435 | |||
436 | return 0; | ||
437 | } | ||
438 | |||
439 | static int read_exceptions(struct pstore *ps) | ||
440 | { | ||
441 | int r, full = 1; | ||
442 | |||
443 | /* | ||
444 | * Keeping reading chunks and inserting exceptions until | ||
445 | * we find a partially full area. | ||
446 | */ | ||
447 | for (ps->current_area = 0; full; ps->current_area++) { | ||
448 | r = area_io(ps, READ); | ||
449 | if (r) | ||
450 | return r; | ||
451 | |||
452 | r = insert_exceptions(ps, &full); | ||
453 | if (r) | ||
454 | return r; | ||
455 | } | ||
456 | |||
457 | ps->current_area--; | ||
458 | |||
459 | return 0; | ||
460 | } | ||
461 | |||
462 | static struct pstore *get_info(struct dm_exception_store *store) | ||
463 | { | ||
464 | return (struct pstore *) store->context; | ||
465 | } | ||
466 | |||
467 | static void persistent_fraction_full(struct dm_exception_store *store, | ||
468 | sector_t *numerator, sector_t *denominator) | ||
469 | { | ||
470 | *numerator = get_info(store)->next_free * store->snap->chunk_size; | ||
471 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
472 | } | ||
473 | |||
474 | static void persistent_destroy(struct dm_exception_store *store) | ||
475 | { | ||
476 | struct pstore *ps = get_info(store); | ||
477 | |||
478 | destroy_workqueue(ps->metadata_wq); | ||
479 | dm_io_client_destroy(ps->io_client); | ||
480 | vfree(ps->callbacks); | ||
481 | free_area(ps); | ||
482 | kfree(ps); | ||
483 | } | ||
484 | |||
485 | static int persistent_read_metadata(struct dm_exception_store *store) | ||
486 | { | ||
487 | int r, uninitialized_var(new_snapshot); | ||
488 | struct pstore *ps = get_info(store); | ||
489 | |||
490 | /* | ||
491 | * Read the snapshot header. | ||
492 | */ | ||
493 | r = read_header(ps, &new_snapshot); | ||
494 | if (r) | ||
495 | return r; | ||
496 | |||
497 | /* | ||
498 | * Now we know correct chunk_size, complete the initialisation. | ||
499 | */ | ||
500 | ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / | ||
501 | sizeof(struct disk_exception); | ||
502 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | ||
503 | sizeof(*ps->callbacks)); | ||
504 | if (!ps->callbacks) | ||
505 | return -ENOMEM; | ||
506 | |||
507 | /* | ||
508 | * Do we need to setup a new snapshot ? | ||
509 | */ | ||
510 | if (new_snapshot) { | ||
511 | r = write_header(ps); | ||
512 | if (r) { | ||
513 | DMWARN("write_header failed"); | ||
514 | return r; | ||
515 | } | ||
516 | |||
517 | ps->current_area = 0; | ||
518 | zero_memory_area(ps); | ||
519 | r = zero_disk_area(ps, 0); | ||
520 | if (r) { | ||
521 | DMWARN("zero_disk_area(0) failed"); | ||
522 | return r; | ||
523 | } | ||
524 | } else { | ||
525 | /* | ||
526 | * Sanity checks. | ||
527 | */ | ||
528 | if (ps->version != SNAPSHOT_DISK_VERSION) { | ||
529 | DMWARN("unable to handle snapshot disk version %d", | ||
530 | ps->version); | ||
531 | return -EINVAL; | ||
532 | } | ||
533 | |||
534 | /* | ||
535 | * Metadata are valid, but snapshot is invalidated | ||
536 | */ | ||
537 | if (!ps->valid) | ||
538 | return 1; | ||
539 | |||
540 | /* | ||
541 | * Read the metadata. | ||
542 | */ | ||
543 | r = read_exceptions(ps); | ||
544 | if (r) | ||
545 | return r; | ||
546 | } | ||
547 | |||
548 | return 0; | ||
549 | } | ||
550 | |||
551 | static int persistent_prepare(struct dm_exception_store *store, | ||
552 | struct dm_snap_exception *e) | ||
553 | { | ||
554 | struct pstore *ps = get_info(store); | ||
555 | uint32_t stride; | ||
556 | chunk_t next_free; | ||
557 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
558 | |||
559 | /* Is there enough room ? */ | ||
560 | if (size < ((ps->next_free + 1) * store->snap->chunk_size)) | ||
561 | return -ENOSPC; | ||
562 | |||
563 | e->new_chunk = ps->next_free; | ||
564 | |||
565 | /* | ||
566 | * Move onto the next free pending, making sure to take | ||
567 | * into account the location of the metadata chunks. | ||
568 | */ | ||
569 | stride = (ps->exceptions_per_area + 1); | ||
570 | next_free = ++ps->next_free; | ||
571 | if (sector_div(next_free, stride) == 1) | ||
572 | ps->next_free++; | ||
573 | |||
574 | atomic_inc(&ps->pending_count); | ||
575 | return 0; | ||
576 | } | ||
577 | |||
578 | static void persistent_commit(struct dm_exception_store *store, | ||
579 | struct dm_snap_exception *e, | ||
580 | void (*callback) (void *, int success), | ||
581 | void *callback_context) | ||
582 | { | ||
583 | unsigned int i; | ||
584 | struct pstore *ps = get_info(store); | ||
585 | struct disk_exception de; | ||
586 | struct commit_callback *cb; | ||
587 | |||
588 | de.old_chunk = e->old_chunk; | ||
589 | de.new_chunk = e->new_chunk; | ||
590 | write_exception(ps, ps->current_committed++, &de); | ||
591 | |||
592 | /* | ||
593 | * Add the callback to the back of the array. This code | ||
594 | * is the only place where the callback array is | ||
595 | * manipulated, and we know that it will never be called | ||
596 | * multiple times concurrently. | ||
597 | */ | ||
598 | cb = ps->callbacks + ps->callback_count++; | ||
599 | cb->callback = callback; | ||
600 | cb->context = callback_context; | ||
601 | |||
602 | /* | ||
603 | * If there are exceptions in flight and we have not yet | ||
604 | * filled this metadata area there's nothing more to do. | ||
605 | */ | ||
606 | if (!atomic_dec_and_test(&ps->pending_count) && | ||
607 | (ps->current_committed != ps->exceptions_per_area)) | ||
608 | return; | ||
609 | |||
610 | /* | ||
611 | * If we completely filled the current area, then wipe the next one. | ||
612 | */ | ||
613 | if ((ps->current_committed == ps->exceptions_per_area) && | ||
614 | zero_disk_area(ps, ps->current_area + 1)) | ||
615 | ps->valid = 0; | ||
616 | |||
617 | /* | ||
618 | * Commit exceptions to disk. | ||
619 | */ | ||
620 | if (ps->valid && area_io(ps, WRITE)) | ||
621 | ps->valid = 0; | ||
622 | |||
623 | /* | ||
624 | * Advance to the next area if this one is full. | ||
625 | */ | ||
626 | if (ps->current_committed == ps->exceptions_per_area) { | ||
627 | ps->current_committed = 0; | ||
628 | ps->current_area++; | ||
629 | zero_memory_area(ps); | ||
630 | } | ||
631 | |||
632 | for (i = 0; i < ps->callback_count; i++) { | ||
633 | cb = ps->callbacks + i; | ||
634 | cb->callback(cb->context, ps->valid); | ||
635 | } | ||
636 | |||
637 | ps->callback_count = 0; | ||
638 | } | ||
639 | |||
640 | static void persistent_drop(struct dm_exception_store *store) | ||
641 | { | ||
642 | struct pstore *ps = get_info(store); | ||
643 | |||
644 | ps->valid = 0; | ||
645 | if (write_header(ps)) | ||
646 | DMWARN("write header failed"); | ||
647 | } | ||
648 | |||
649 | int dm_create_persistent(struct dm_exception_store *store) | ||
650 | { | ||
651 | struct pstore *ps; | ||
652 | |||
653 | /* allocate the pstore */ | ||
654 | ps = kmalloc(sizeof(*ps), GFP_KERNEL); | ||
655 | if (!ps) | ||
656 | return -ENOMEM; | ||
657 | |||
658 | ps->snap = store->snap; | ||
659 | ps->valid = 1; | ||
660 | ps->version = SNAPSHOT_DISK_VERSION; | ||
661 | ps->area = NULL; | ||
662 | ps->next_free = 2; /* skipping the header and first area */ | ||
663 | ps->current_committed = 0; | ||
664 | |||
665 | ps->callback_count = 0; | ||
666 | atomic_set(&ps->pending_count, 0); | ||
667 | ps->callbacks = NULL; | ||
668 | |||
669 | ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); | ||
670 | if (!ps->metadata_wq) { | ||
671 | kfree(ps); | ||
672 | DMERR("couldn't start header metadata update thread"); | ||
673 | return -ENOMEM; | ||
674 | } | ||
675 | |||
676 | store->destroy = persistent_destroy; | ||
677 | store->read_metadata = persistent_read_metadata; | ||
678 | store->prepare_exception = persistent_prepare; | ||
679 | store->commit_exception = persistent_commit; | ||
680 | store->drop_snapshot = persistent_drop; | ||
681 | store->fraction_full = persistent_fraction_full; | ||
682 | store->context = ps; | ||
683 | |||
684 | return 0; | ||
685 | } | ||
686 | |||
687 | int dm_persistent_snapshot_init(void) | ||
688 | { | ||
689 | return 0; | ||
690 | } | ||
691 | |||
692 | void dm_persistent_snapshot_exit(void) | ||
693 | { | ||
694 | } | ||
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c new file mode 100644 index 000000000000..2a781df57fef --- /dev/null +++ b/drivers/md/dm-snap-transient.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
3 | * Copyright (C) 2006-2008 Red Hat GmbH | ||
4 | * | ||
5 | * This file is released under the GPL. | ||
6 | */ | ||
7 | |||
8 | #include "dm-exception-store.h" | ||
9 | #include "dm-snap.h" | ||
10 | |||
11 | #include <linux/mm.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/vmalloc.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/dm-io.h> | ||
16 | |||
17 | #define DM_MSG_PREFIX "transient snapshot" | ||
18 | |||
19 | /*----------------------------------------------------------------- | ||
20 | * Implementation of the store for non-persistent snapshots. | ||
21 | *---------------------------------------------------------------*/ | ||
22 | struct transient_c { | ||
23 | sector_t next_free; | ||
24 | }; | ||
25 | |||
26 | static void transient_destroy(struct dm_exception_store *store) | ||
27 | { | ||
28 | kfree(store->context); | ||
29 | } | ||
30 | |||
31 | static int transient_read_metadata(struct dm_exception_store *store) | ||
32 | { | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | static int transient_prepare(struct dm_exception_store *store, | ||
37 | struct dm_snap_exception *e) | ||
38 | { | ||
39 | struct transient_c *tc = (struct transient_c *) store->context; | ||
40 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
41 | |||
42 | if (size < (tc->next_free + store->snap->chunk_size)) | ||
43 | return -1; | ||
44 | |||
45 | e->new_chunk = sector_to_chunk(store->snap, tc->next_free); | ||
46 | tc->next_free += store->snap->chunk_size; | ||
47 | |||
48 | return 0; | ||
49 | } | ||
50 | |||
51 | static void transient_commit(struct dm_exception_store *store, | ||
52 | struct dm_snap_exception *e, | ||
53 | void (*callback) (void *, int success), | ||
54 | void *callback_context) | ||
55 | { | ||
56 | /* Just succeed */ | ||
57 | callback(callback_context, 1); | ||
58 | } | ||
59 | |||
60 | static void transient_fraction_full(struct dm_exception_store *store, | ||
61 | sector_t *numerator, sector_t *denominator) | ||
62 | { | ||
63 | *numerator = ((struct transient_c *) store->context)->next_free; | ||
64 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
65 | } | ||
66 | |||
67 | int dm_create_transient(struct dm_exception_store *store) | ||
68 | { | ||
69 | struct transient_c *tc; | ||
70 | |||
71 | store->destroy = transient_destroy; | ||
72 | store->read_metadata = transient_read_metadata; | ||
73 | store->prepare_exception = transient_prepare; | ||
74 | store->commit_exception = transient_commit; | ||
75 | store->drop_snapshot = NULL; | ||
76 | store->fraction_full = transient_fraction_full; | ||
77 | |||
78 | tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); | ||
79 | if (!tc) | ||
80 | return -ENOMEM; | ||
81 | |||
82 | tc->next_free = 0; | ||
83 | store->context = tc; | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | int dm_transient_snapshot_init(void) | ||
89 | { | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | void dm_transient_snapshot_exit(void) | ||
94 | { | ||
95 | } | ||
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 81f03a0e7838..018b567fc758 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -1406,6 +1406,12 @@ static int __init dm_snapshot_init(void) | |||
1406 | { | 1406 | { |
1407 | int r; | 1407 | int r; |
1408 | 1408 | ||
1409 | r = dm_exception_store_init(); | ||
1410 | if (r) { | ||
1411 | DMERR("Failed to initialize exception stores"); | ||
1412 | return r; | ||
1413 | } | ||
1414 | |||
1409 | r = dm_register_target(&snapshot_target); | 1415 | r = dm_register_target(&snapshot_target); |
1410 | if (r) { | 1416 | if (r) { |
1411 | DMERR("snapshot target register failed %d", r); | 1417 | DMERR("snapshot target register failed %d", r); |
@@ -1454,17 +1460,17 @@ static int __init dm_snapshot_init(void) | |||
1454 | 1460 | ||
1455 | return 0; | 1461 | return 0; |
1456 | 1462 | ||
1457 | bad_pending_pool: | 1463 | bad_pending_pool: |
1458 | kmem_cache_destroy(tracked_chunk_cache); | 1464 | kmem_cache_destroy(tracked_chunk_cache); |
1459 | bad5: | 1465 | bad5: |
1460 | kmem_cache_destroy(pending_cache); | 1466 | kmem_cache_destroy(pending_cache); |
1461 | bad4: | 1467 | bad4: |
1462 | kmem_cache_destroy(exception_cache); | 1468 | kmem_cache_destroy(exception_cache); |
1463 | bad3: | 1469 | bad3: |
1464 | exit_origin_hash(); | 1470 | exit_origin_hash(); |
1465 | bad2: | 1471 | bad2: |
1466 | dm_unregister_target(&origin_target); | 1472 | dm_unregister_target(&origin_target); |
1467 | bad1: | 1473 | bad1: |
1468 | dm_unregister_target(&snapshot_target); | 1474 | dm_unregister_target(&snapshot_target); |
1469 | return r; | 1475 | return r; |
1470 | } | 1476 | } |
@@ -1480,6 +1486,8 @@ static void __exit dm_snapshot_exit(void) | |||
1480 | kmem_cache_destroy(pending_cache); | 1486 | kmem_cache_destroy(pending_cache); |
1481 | kmem_cache_destroy(exception_cache); | 1487 | kmem_cache_destroy(exception_cache); |
1482 | kmem_cache_destroy(tracked_chunk_cache); | 1488 | kmem_cache_destroy(tracked_chunk_cache); |
1489 | |||
1490 | dm_exception_store_exit(); | ||
1483 | } | 1491 | } |
1484 | 1492 | ||
1485 | /* Module hooks */ | 1493 | /* Module hooks */ |