diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-05 22:20:59 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-05 22:20:59 -0500 |
commit | 238c6d54830c624f34ac9cf123ac04aebfca5013 (patch) | |
tree | 43b7f595013483382a3053237c45d9d2824e0295 | |
parent | 8e128ce3318a147903c893de1891f6c2306f8a61 (diff) | |
parent | a159c1ac5f33c6cf0f5aa3c9d1ccdc82c907ee46 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm:
dm snapshot: extend exception store functions
dm snapshot: split out exception store implementations
dm snapshot: rename struct exception_store
dm snapshot: separate out exception store interface
dm mpath: move trigger_event to system workqueue
dm: add name and uuid to sysfs
dm table: rework reference counting
dm: support barriers on simple devices
dm request: extend target interface
dm request: add caches
dm ioctl: allow dm_copy_name_and_uuid to return only one field
dm log: ensure log bitmap fits on log device
dm log: move region_size validation
dm log: avoid reinitialising io_req on every operation
dm: consolidate target deregistration error handling
dm raid1: fix error count
dm log: fix dm_io_client leak on error paths
dm snapshot: change yield to msleep
dm table: drop reference at unbind
-rw-r--r-- | drivers/md/Makefile | 5 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 749 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.h | 148 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 16 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 40 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 14 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 24 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 704 | ||||
-rw-r--r-- | drivers/md/dm-snap-transient.c | 98 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 48 | ||||
-rw-r--r-- | drivers/md/dm-snap.h | 129 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-sysfs.c | 99 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 47 | ||||
-rw-r--r-- | drivers/md/dm-target.c | 15 | ||||
-rw-r--r-- | drivers/md/dm-zero.c | 5 | ||||
-rw-r--r-- | drivers/md/dm.c | 101 | ||||
-rw-r--r-- | drivers/md/dm.h | 10 | ||||
-rw-r--r-- | include/linux/device-mapper.h | 28 |
22 files changed, 1319 insertions, 983 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 1c615804ea76..72880b7e28d9 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -3,9 +3,10 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ | 5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ |
6 | dm-ioctl.o dm-io.o dm-kcopyd.o | 6 | dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o |
7 | dm-multipath-objs := dm-path-selector.o dm-mpath.o | 7 | dm-multipath-objs := dm-path-selector.o dm-mpath.o |
8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o | 8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \ |
9 | dm-snap-persistent.o | ||
9 | dm-mirror-objs := dm-raid1.o | 10 | dm-mirror-objs := dm-raid1.o |
10 | md-mod-objs := md.o bitmap.o | 11 | md-mod-objs := md.o bitmap.o |
11 | raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ | 12 | raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3326750ec02c..35bda49796fb 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -1322,11 +1322,7 @@ static int __init dm_crypt_init(void) | |||
1322 | 1322 | ||
1323 | static void __exit dm_crypt_exit(void) | 1323 | static void __exit dm_crypt_exit(void) |
1324 | { | 1324 | { |
1325 | int r = dm_unregister_target(&crypt_target); | 1325 | dm_unregister_target(&crypt_target); |
1326 | |||
1327 | if (r < 0) | ||
1328 | DMERR("unregister failed %d", r); | ||
1329 | |||
1330 | kmem_cache_destroy(_crypt_io_pool); | 1326 | kmem_cache_destroy(_crypt_io_pool); |
1331 | } | 1327 | } |
1332 | 1328 | ||
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 848b381f1173..59ee1b015d2d 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
@@ -364,11 +364,7 @@ bad_queue: | |||
364 | 364 | ||
365 | static void __exit dm_delay_exit(void) | 365 | static void __exit dm_delay_exit(void) |
366 | { | 366 | { |
367 | int r = dm_unregister_target(&delay_target); | 367 | dm_unregister_target(&delay_target); |
368 | |||
369 | if (r < 0) | ||
370 | DMERR("unregister failed %d", r); | ||
371 | |||
372 | kmem_cache_destroy(delayed_cache); | 368 | kmem_cache_destroy(delayed_cache); |
373 | destroy_workqueue(kdelayd_wq); | 369 | destroy_workqueue(kdelayd_wq); |
374 | } | 370 | } |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 01590f3e0009..dccbfb0e010f 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
@@ -1,756 +1,45 @@ | |||
1 | /* | 1 | /* |
2 | * dm-exception-store.c | ||
3 | * | ||
4 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
5 | * Copyright (C) 2006 Red Hat GmbH | 3 | * Copyright (C) 2006-2008 Red Hat GmbH |
6 | * | 4 | * |
7 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
8 | */ | 6 | */ |
9 | 7 | ||
10 | #include "dm-snap.h" | 8 | #include "dm-exception-store.h" |
11 | 9 | ||
12 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
13 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
14 | #include <linux/vmalloc.h> | 12 | #include <linux/vmalloc.h> |
15 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
16 | #include <linux/dm-io.h> | ||
17 | #include <linux/dm-kcopyd.h> | ||
18 | |||
19 | #define DM_MSG_PREFIX "snapshots" | ||
20 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | ||
21 | |||
22 | /*----------------------------------------------------------------- | ||
23 | * Persistent snapshots, by persistent we mean that the snapshot | ||
24 | * will survive a reboot. | ||
25 | *---------------------------------------------------------------*/ | ||
26 | |||
27 | /* | ||
28 | * We need to store a record of which parts of the origin have | ||
29 | * been copied to the snapshot device. The snapshot code | ||
30 | * requires that we copy exception chunks to chunk aligned areas | ||
31 | * of the COW store. It makes sense therefore, to store the | ||
32 | * metadata in chunk size blocks. | ||
33 | * | ||
34 | * There is no backward or forward compatibility implemented, | ||
35 | * snapshots with different disk versions than the kernel will | ||
36 | * not be usable. It is expected that "lvcreate" will blank out | ||
37 | * the start of a fresh COW device before calling the snapshot | ||
38 | * constructor. | ||
39 | * | ||
40 | * The first chunk of the COW device just contains the header. | ||
41 | * After this there is a chunk filled with exception metadata, | ||
42 | * followed by as many exception chunks as can fit in the | ||
43 | * metadata areas. | ||
44 | * | ||
45 | * All on disk structures are in little-endian format. The end | ||
46 | * of the exceptions info is indicated by an exception with a | ||
47 | * new_chunk of 0, which is invalid since it would point to the | ||
48 | * header chunk. | ||
49 | */ | ||
50 | |||
51 | /* | ||
52 | * Magic for persistent snapshots: "SnAp" - Feeble isn't it. | ||
53 | */ | ||
54 | #define SNAP_MAGIC 0x70416e53 | ||
55 | |||
56 | /* | ||
57 | * The on-disk version of the metadata. | ||
58 | */ | ||
59 | #define SNAPSHOT_DISK_VERSION 1 | ||
60 | |||
61 | struct disk_header { | ||
62 | uint32_t magic; | ||
63 | |||
64 | /* | ||
65 | * Is this snapshot valid. There is no way of recovering | ||
66 | * an invalid snapshot. | ||
67 | */ | ||
68 | uint32_t valid; | ||
69 | |||
70 | /* | ||
71 | * Simple, incrementing version. no backward | ||
72 | * compatibility. | ||
73 | */ | ||
74 | uint32_t version; | ||
75 | |||
76 | /* In sectors */ | ||
77 | uint32_t chunk_size; | ||
78 | }; | ||
79 | |||
80 | struct disk_exception { | ||
81 | uint64_t old_chunk; | ||
82 | uint64_t new_chunk; | ||
83 | }; | ||
84 | |||
85 | struct commit_callback { | ||
86 | void (*callback)(void *, int success); | ||
87 | void *context; | ||
88 | }; | ||
89 | |||
90 | /* | ||
91 | * The top level structure for a persistent exception store. | ||
92 | */ | ||
93 | struct pstore { | ||
94 | struct dm_snapshot *snap; /* up pointer to my snapshot */ | ||
95 | int version; | ||
96 | int valid; | ||
97 | uint32_t exceptions_per_area; | ||
98 | |||
99 | /* | ||
100 | * Now that we have an asynchronous kcopyd there is no | ||
101 | * need for large chunk sizes, so it wont hurt to have a | ||
102 | * whole chunks worth of metadata in memory at once. | ||
103 | */ | ||
104 | void *area; | ||
105 | |||
106 | /* | ||
107 | * An area of zeros used to clear the next area. | ||
108 | */ | ||
109 | void *zero_area; | ||
110 | |||
111 | /* | ||
112 | * Used to keep track of which metadata area the data in | ||
113 | * 'chunk' refers to. | ||
114 | */ | ||
115 | chunk_t current_area; | ||
116 | |||
117 | /* | ||
118 | * The next free chunk for an exception. | ||
119 | */ | ||
120 | chunk_t next_free; | ||
121 | |||
122 | /* | ||
123 | * The index of next free exception in the current | ||
124 | * metadata area. | ||
125 | */ | ||
126 | uint32_t current_committed; | ||
127 | |||
128 | atomic_t pending_count; | ||
129 | uint32_t callback_count; | ||
130 | struct commit_callback *callbacks; | ||
131 | struct dm_io_client *io_client; | ||
132 | |||
133 | struct workqueue_struct *metadata_wq; | ||
134 | }; | ||
135 | |||
136 | static unsigned sectors_to_pages(unsigned sectors) | ||
137 | { | ||
138 | return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); | ||
139 | } | ||
140 | |||
141 | static int alloc_area(struct pstore *ps) | ||
142 | { | ||
143 | int r = -ENOMEM; | ||
144 | size_t len; | ||
145 | |||
146 | len = ps->snap->chunk_size << SECTOR_SHIFT; | ||
147 | |||
148 | /* | ||
149 | * Allocate the chunk_size block of memory that will hold | ||
150 | * a single metadata area. | ||
151 | */ | ||
152 | ps->area = vmalloc(len); | ||
153 | if (!ps->area) | ||
154 | return r; | ||
155 | |||
156 | ps->zero_area = vmalloc(len); | ||
157 | if (!ps->zero_area) { | ||
158 | vfree(ps->area); | ||
159 | return r; | ||
160 | } | ||
161 | memset(ps->zero_area, 0, len); | ||
162 | |||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | static void free_area(struct pstore *ps) | ||
167 | { | ||
168 | vfree(ps->area); | ||
169 | ps->area = NULL; | ||
170 | vfree(ps->zero_area); | ||
171 | ps->zero_area = NULL; | ||
172 | } | ||
173 | |||
174 | struct mdata_req { | ||
175 | struct dm_io_region *where; | ||
176 | struct dm_io_request *io_req; | ||
177 | struct work_struct work; | ||
178 | int result; | ||
179 | }; | ||
180 | |||
181 | static void do_metadata(struct work_struct *work) | ||
182 | { | ||
183 | struct mdata_req *req = container_of(work, struct mdata_req, work); | ||
184 | |||
185 | req->result = dm_io(req->io_req, 1, req->where, NULL); | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Read or write a chunk aligned and sized block of data from a device. | ||
190 | */ | ||
191 | static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) | ||
192 | { | ||
193 | struct dm_io_region where = { | ||
194 | .bdev = ps->snap->cow->bdev, | ||
195 | .sector = ps->snap->chunk_size * chunk, | ||
196 | .count = ps->snap->chunk_size, | ||
197 | }; | ||
198 | struct dm_io_request io_req = { | ||
199 | .bi_rw = rw, | ||
200 | .mem.type = DM_IO_VMA, | ||
201 | .mem.ptr.vma = ps->area, | ||
202 | .client = ps->io_client, | ||
203 | .notify.fn = NULL, | ||
204 | }; | ||
205 | struct mdata_req req; | ||
206 | |||
207 | if (!metadata) | ||
208 | return dm_io(&io_req, 1, &where, NULL); | ||
209 | |||
210 | req.where = &where; | ||
211 | req.io_req = &io_req; | ||
212 | |||
213 | /* | ||
214 | * Issue the synchronous I/O from a different thread | ||
215 | * to avoid generic_make_request recursion. | ||
216 | */ | ||
217 | INIT_WORK(&req.work, do_metadata); | ||
218 | queue_work(ps->metadata_wq, &req.work); | ||
219 | flush_workqueue(ps->metadata_wq); | ||
220 | |||
221 | return req.result; | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * Convert a metadata area index to a chunk index. | ||
226 | */ | ||
227 | static chunk_t area_location(struct pstore *ps, chunk_t area) | ||
228 | { | ||
229 | return 1 + ((ps->exceptions_per_area + 1) * area); | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * Read or write a metadata area. Remembering to skip the first | ||
234 | * chunk which holds the header. | ||
235 | */ | ||
236 | static int area_io(struct pstore *ps, int rw) | ||
237 | { | ||
238 | int r; | ||
239 | chunk_t chunk; | ||
240 | |||
241 | chunk = area_location(ps, ps->current_area); | ||
242 | |||
243 | r = chunk_io(ps, chunk, rw, 0); | ||
244 | if (r) | ||
245 | return r; | ||
246 | |||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static void zero_memory_area(struct pstore *ps) | ||
251 | { | ||
252 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
253 | } | ||
254 | |||
255 | static int zero_disk_area(struct pstore *ps, chunk_t area) | ||
256 | { | ||
257 | struct dm_io_region where = { | ||
258 | .bdev = ps->snap->cow->bdev, | ||
259 | .sector = ps->snap->chunk_size * area_location(ps, area), | ||
260 | .count = ps->snap->chunk_size, | ||
261 | }; | ||
262 | struct dm_io_request io_req = { | ||
263 | .bi_rw = WRITE, | ||
264 | .mem.type = DM_IO_VMA, | ||
265 | .mem.ptr.vma = ps->zero_area, | ||
266 | .client = ps->io_client, | ||
267 | .notify.fn = NULL, | ||
268 | }; | ||
269 | |||
270 | return dm_io(&io_req, 1, &where, NULL); | ||
271 | } | ||
272 | |||
273 | static int read_header(struct pstore *ps, int *new_snapshot) | ||
274 | { | ||
275 | int r; | ||
276 | struct disk_header *dh; | ||
277 | chunk_t chunk_size; | ||
278 | int chunk_size_supplied = 1; | ||
279 | |||
280 | /* | ||
281 | * Use default chunk size (or hardsect_size, if larger) if none supplied | ||
282 | */ | ||
283 | if (!ps->snap->chunk_size) { | ||
284 | ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, | ||
285 | bdev_hardsect_size(ps->snap->cow->bdev) >> 9); | ||
286 | ps->snap->chunk_mask = ps->snap->chunk_size - 1; | ||
287 | ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; | ||
288 | chunk_size_supplied = 0; | ||
289 | } | ||
290 | |||
291 | ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> | ||
292 | chunk_size)); | ||
293 | if (IS_ERR(ps->io_client)) | ||
294 | return PTR_ERR(ps->io_client); | ||
295 | |||
296 | r = alloc_area(ps); | ||
297 | if (r) | ||
298 | return r; | ||
299 | |||
300 | r = chunk_io(ps, 0, READ, 1); | ||
301 | if (r) | ||
302 | goto bad; | ||
303 | |||
304 | dh = (struct disk_header *) ps->area; | ||
305 | |||
306 | if (le32_to_cpu(dh->magic) == 0) { | ||
307 | *new_snapshot = 1; | ||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { | ||
312 | DMWARN("Invalid or corrupt snapshot"); | ||
313 | r = -ENXIO; | ||
314 | goto bad; | ||
315 | } | ||
316 | |||
317 | *new_snapshot = 0; | ||
318 | ps->valid = le32_to_cpu(dh->valid); | ||
319 | ps->version = le32_to_cpu(dh->version); | ||
320 | chunk_size = le32_to_cpu(dh->chunk_size); | ||
321 | |||
322 | if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) | ||
323 | return 0; | ||
324 | |||
325 | DMWARN("chunk size %llu in device metadata overrides " | ||
326 | "table chunk size of %llu.", | ||
327 | (unsigned long long)chunk_size, | ||
328 | (unsigned long long)ps->snap->chunk_size); | ||
329 | |||
330 | /* We had a bogus chunk_size. Fix stuff up. */ | ||
331 | free_area(ps); | ||
332 | |||
333 | ps->snap->chunk_size = chunk_size; | ||
334 | ps->snap->chunk_mask = chunk_size - 1; | ||
335 | ps->snap->chunk_shift = ffs(chunk_size) - 1; | ||
336 | |||
337 | r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), | ||
338 | ps->io_client); | ||
339 | if (r) | ||
340 | return r; | ||
341 | |||
342 | r = alloc_area(ps); | ||
343 | return r; | ||
344 | |||
345 | bad: | ||
346 | free_area(ps); | ||
347 | return r; | ||
348 | } | ||
349 | |||
350 | static int write_header(struct pstore *ps) | ||
351 | { | ||
352 | struct disk_header *dh; | ||
353 | |||
354 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
355 | |||
356 | dh = (struct disk_header *) ps->area; | ||
357 | dh->magic = cpu_to_le32(SNAP_MAGIC); | ||
358 | dh->valid = cpu_to_le32(ps->valid); | ||
359 | dh->version = cpu_to_le32(ps->version); | ||
360 | dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); | ||
361 | |||
362 | return chunk_io(ps, 0, WRITE, 1); | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * Access functions for the disk exceptions, these do the endian conversions. | ||
367 | */ | ||
368 | static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | ||
369 | { | ||
370 | BUG_ON(index >= ps->exceptions_per_area); | ||
371 | |||
372 | return ((struct disk_exception *) ps->area) + index; | ||
373 | } | ||
374 | 14 | ||
375 | static void read_exception(struct pstore *ps, | 15 | #define DM_MSG_PREFIX "snapshot exception stores" |
376 | uint32_t index, struct disk_exception *result) | ||
377 | { | ||
378 | struct disk_exception *e = get_exception(ps, index); | ||
379 | |||
380 | /* copy it */ | ||
381 | result->old_chunk = le64_to_cpu(e->old_chunk); | ||
382 | result->new_chunk = le64_to_cpu(e->new_chunk); | ||
383 | } | ||
384 | |||
385 | static void write_exception(struct pstore *ps, | ||
386 | uint32_t index, struct disk_exception *de) | ||
387 | { | ||
388 | struct disk_exception *e = get_exception(ps, index); | ||
389 | |||
390 | /* copy it */ | ||
391 | e->old_chunk = cpu_to_le64(de->old_chunk); | ||
392 | e->new_chunk = cpu_to_le64(de->new_chunk); | ||
393 | } | ||
394 | 16 | ||
395 | /* | 17 | int dm_exception_store_init(void) |
396 | * Registers the exceptions that are present in the current area. | ||
397 | * 'full' is filled in to indicate if the area has been | ||
398 | * filled. | ||
399 | */ | ||
400 | static int insert_exceptions(struct pstore *ps, int *full) | ||
401 | { | 18 | { |
402 | int r; | 19 | int r; |
403 | unsigned int i; | ||
404 | struct disk_exception de; | ||
405 | |||
406 | /* presume the area is full */ | ||
407 | *full = 1; | ||
408 | |||
409 | for (i = 0; i < ps->exceptions_per_area; i++) { | ||
410 | read_exception(ps, i, &de); | ||
411 | |||
412 | /* | ||
413 | * If the new_chunk is pointing at the start of | ||
414 | * the COW device, where the first metadata area | ||
415 | * is we know that we've hit the end of the | ||
416 | * exceptions. Therefore the area is not full. | ||
417 | */ | ||
418 | if (de.new_chunk == 0LL) { | ||
419 | ps->current_committed = i; | ||
420 | *full = 0; | ||
421 | break; | ||
422 | } | ||
423 | |||
424 | /* | ||
425 | * Keep track of the start of the free chunks. | ||
426 | */ | ||
427 | if (ps->next_free <= de.new_chunk) | ||
428 | ps->next_free = de.new_chunk + 1; | ||
429 | |||
430 | /* | ||
431 | * Otherwise we add the exception to the snapshot. | ||
432 | */ | ||
433 | r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); | ||
434 | if (r) | ||
435 | return r; | ||
436 | } | ||
437 | |||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | static int read_exceptions(struct pstore *ps) | ||
442 | { | ||
443 | int r, full = 1; | ||
444 | |||
445 | /* | ||
446 | * Keeping reading chunks and inserting exceptions until | ||
447 | * we find a partially full area. | ||
448 | */ | ||
449 | for (ps->current_area = 0; full; ps->current_area++) { | ||
450 | r = area_io(ps, READ); | ||
451 | if (r) | ||
452 | return r; | ||
453 | 20 | ||
454 | r = insert_exceptions(ps, &full); | 21 | r = dm_transient_snapshot_init(); |
455 | if (r) | 22 | if (r) { |
456 | return r; | 23 | DMERR("Unable to register transient exception store type."); |
24 | goto transient_fail; | ||
457 | } | 25 | } |
458 | 26 | ||
459 | ps->current_area--; | 27 | r = dm_persistent_snapshot_init(); |
460 | 28 | if (r) { | |
461 | return 0; | 29 | DMERR("Unable to register persistent exception store type"); |
462 | } | 30 | goto persistent_fail; |
463 | |||
464 | static struct pstore *get_info(struct exception_store *store) | ||
465 | { | ||
466 | return (struct pstore *) store->context; | ||
467 | } | ||
468 | |||
469 | static void persistent_fraction_full(struct exception_store *store, | ||
470 | sector_t *numerator, sector_t *denominator) | ||
471 | { | ||
472 | *numerator = get_info(store)->next_free * store->snap->chunk_size; | ||
473 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
474 | } | ||
475 | |||
476 | static void persistent_destroy(struct exception_store *store) | ||
477 | { | ||
478 | struct pstore *ps = get_info(store); | ||
479 | |||
480 | destroy_workqueue(ps->metadata_wq); | ||
481 | dm_io_client_destroy(ps->io_client); | ||
482 | vfree(ps->callbacks); | ||
483 | free_area(ps); | ||
484 | kfree(ps); | ||
485 | } | ||
486 | |||
487 | static int persistent_read_metadata(struct exception_store *store) | ||
488 | { | ||
489 | int r, uninitialized_var(new_snapshot); | ||
490 | struct pstore *ps = get_info(store); | ||
491 | |||
492 | /* | ||
493 | * Read the snapshot header. | ||
494 | */ | ||
495 | r = read_header(ps, &new_snapshot); | ||
496 | if (r) | ||
497 | return r; | ||
498 | |||
499 | /* | ||
500 | * Now we know correct chunk_size, complete the initialisation. | ||
501 | */ | ||
502 | ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / | ||
503 | sizeof(struct disk_exception); | ||
504 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | ||
505 | sizeof(*ps->callbacks)); | ||
506 | if (!ps->callbacks) | ||
507 | return -ENOMEM; | ||
508 | |||
509 | /* | ||
510 | * Do we need to setup a new snapshot ? | ||
511 | */ | ||
512 | if (new_snapshot) { | ||
513 | r = write_header(ps); | ||
514 | if (r) { | ||
515 | DMWARN("write_header failed"); | ||
516 | return r; | ||
517 | } | ||
518 | |||
519 | ps->current_area = 0; | ||
520 | zero_memory_area(ps); | ||
521 | r = zero_disk_area(ps, 0); | ||
522 | if (r) { | ||
523 | DMWARN("zero_disk_area(0) failed"); | ||
524 | return r; | ||
525 | } | ||
526 | } else { | ||
527 | /* | ||
528 | * Sanity checks. | ||
529 | */ | ||
530 | if (ps->version != SNAPSHOT_DISK_VERSION) { | ||
531 | DMWARN("unable to handle snapshot disk version %d", | ||
532 | ps->version); | ||
533 | return -EINVAL; | ||
534 | } | ||
535 | |||
536 | /* | ||
537 | * Metadata are valid, but snapshot is invalidated | ||
538 | */ | ||
539 | if (!ps->valid) | ||
540 | return 1; | ||
541 | |||
542 | /* | ||
543 | * Read the metadata. | ||
544 | */ | ||
545 | r = read_exceptions(ps); | ||
546 | if (r) | ||
547 | return r; | ||
548 | } | 31 | } |
549 | 32 | ||
550 | return 0; | 33 | return 0; |
551 | } | ||
552 | |||
553 | static int persistent_prepare(struct exception_store *store, | ||
554 | struct dm_snap_exception *e) | ||
555 | { | ||
556 | struct pstore *ps = get_info(store); | ||
557 | uint32_t stride; | ||
558 | chunk_t next_free; | ||
559 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
560 | |||
561 | /* Is there enough room ? */ | ||
562 | if (size < ((ps->next_free + 1) * store->snap->chunk_size)) | ||
563 | return -ENOSPC; | ||
564 | 34 | ||
565 | e->new_chunk = ps->next_free; | 35 | persistent_fail: |
566 | 36 | dm_persistent_snapshot_exit(); | |
567 | /* | 37 | transient_fail: |
568 | * Move onto the next free pending, making sure to take | 38 | return r; |
569 | * into account the location of the metadata chunks. | ||
570 | */ | ||
571 | stride = (ps->exceptions_per_area + 1); | ||
572 | next_free = ++ps->next_free; | ||
573 | if (sector_div(next_free, stride) == 1) | ||
574 | ps->next_free++; | ||
575 | |||
576 | atomic_inc(&ps->pending_count); | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | static void persistent_commit(struct exception_store *store, | ||
581 | struct dm_snap_exception *e, | ||
582 | void (*callback) (void *, int success), | ||
583 | void *callback_context) | ||
584 | { | ||
585 | unsigned int i; | ||
586 | struct pstore *ps = get_info(store); | ||
587 | struct disk_exception de; | ||
588 | struct commit_callback *cb; | ||
589 | |||
590 | de.old_chunk = e->old_chunk; | ||
591 | de.new_chunk = e->new_chunk; | ||
592 | write_exception(ps, ps->current_committed++, &de); | ||
593 | |||
594 | /* | ||
595 | * Add the callback to the back of the array. This code | ||
596 | * is the only place where the callback array is | ||
597 | * manipulated, and we know that it will never be called | ||
598 | * multiple times concurrently. | ||
599 | */ | ||
600 | cb = ps->callbacks + ps->callback_count++; | ||
601 | cb->callback = callback; | ||
602 | cb->context = callback_context; | ||
603 | |||
604 | /* | ||
605 | * If there are exceptions in flight and we have not yet | ||
606 | * filled this metadata area there's nothing more to do. | ||
607 | */ | ||
608 | if (!atomic_dec_and_test(&ps->pending_count) && | ||
609 | (ps->current_committed != ps->exceptions_per_area)) | ||
610 | return; | ||
611 | |||
612 | /* | ||
613 | * If we completely filled the current area, then wipe the next one. | ||
614 | */ | ||
615 | if ((ps->current_committed == ps->exceptions_per_area) && | ||
616 | zero_disk_area(ps, ps->current_area + 1)) | ||
617 | ps->valid = 0; | ||
618 | |||
619 | /* | ||
620 | * Commit exceptions to disk. | ||
621 | */ | ||
622 | if (ps->valid && area_io(ps, WRITE)) | ||
623 | ps->valid = 0; | ||
624 | |||
625 | /* | ||
626 | * Advance to the next area if this one is full. | ||
627 | */ | ||
628 | if (ps->current_committed == ps->exceptions_per_area) { | ||
629 | ps->current_committed = 0; | ||
630 | ps->current_area++; | ||
631 | zero_memory_area(ps); | ||
632 | } | ||
633 | |||
634 | for (i = 0; i < ps->callback_count; i++) { | ||
635 | cb = ps->callbacks + i; | ||
636 | cb->callback(cb->context, ps->valid); | ||
637 | } | ||
638 | |||
639 | ps->callback_count = 0; | ||
640 | } | ||
641 | |||
642 | static void persistent_drop(struct exception_store *store) | ||
643 | { | ||
644 | struct pstore *ps = get_info(store); | ||
645 | |||
646 | ps->valid = 0; | ||
647 | if (write_header(ps)) | ||
648 | DMWARN("write header failed"); | ||
649 | } | ||
650 | |||
651 | int dm_create_persistent(struct exception_store *store) | ||
652 | { | ||
653 | struct pstore *ps; | ||
654 | |||
655 | /* allocate the pstore */ | ||
656 | ps = kmalloc(sizeof(*ps), GFP_KERNEL); | ||
657 | if (!ps) | ||
658 | return -ENOMEM; | ||
659 | |||
660 | ps->snap = store->snap; | ||
661 | ps->valid = 1; | ||
662 | ps->version = SNAPSHOT_DISK_VERSION; | ||
663 | ps->area = NULL; | ||
664 | ps->next_free = 2; /* skipping the header and first area */ | ||
665 | ps->current_committed = 0; | ||
666 | |||
667 | ps->callback_count = 0; | ||
668 | atomic_set(&ps->pending_count, 0); | ||
669 | ps->callbacks = NULL; | ||
670 | |||
671 | ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); | ||
672 | if (!ps->metadata_wq) { | ||
673 | kfree(ps); | ||
674 | DMERR("couldn't start header metadata update thread"); | ||
675 | return -ENOMEM; | ||
676 | } | ||
677 | |||
678 | store->destroy = persistent_destroy; | ||
679 | store->read_metadata = persistent_read_metadata; | ||
680 | store->prepare_exception = persistent_prepare; | ||
681 | store->commit_exception = persistent_commit; | ||
682 | store->drop_snapshot = persistent_drop; | ||
683 | store->fraction_full = persistent_fraction_full; | ||
684 | store->context = ps; | ||
685 | |||
686 | return 0; | ||
687 | } | ||
688 | |||
689 | /*----------------------------------------------------------------- | ||
690 | * Implementation of the store for non-persistent snapshots. | ||
691 | *---------------------------------------------------------------*/ | ||
692 | struct transient_c { | ||
693 | sector_t next_free; | ||
694 | }; | ||
695 | |||
696 | static void transient_destroy(struct exception_store *store) | ||
697 | { | ||
698 | kfree(store->context); | ||
699 | } | ||
700 | |||
701 | static int transient_read_metadata(struct exception_store *store) | ||
702 | { | ||
703 | return 0; | ||
704 | } | ||
705 | |||
706 | static int transient_prepare(struct exception_store *store, | ||
707 | struct dm_snap_exception *e) | ||
708 | { | ||
709 | struct transient_c *tc = (struct transient_c *) store->context; | ||
710 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
711 | |||
712 | if (size < (tc->next_free + store->snap->chunk_size)) | ||
713 | return -1; | ||
714 | |||
715 | e->new_chunk = sector_to_chunk(store->snap, tc->next_free); | ||
716 | tc->next_free += store->snap->chunk_size; | ||
717 | |||
718 | return 0; | ||
719 | } | ||
720 | |||
721 | static void transient_commit(struct exception_store *store, | ||
722 | struct dm_snap_exception *e, | ||
723 | void (*callback) (void *, int success), | ||
724 | void *callback_context) | ||
725 | { | ||
726 | /* Just succeed */ | ||
727 | callback(callback_context, 1); | ||
728 | } | ||
729 | |||
730 | static void transient_fraction_full(struct exception_store *store, | ||
731 | sector_t *numerator, sector_t *denominator) | ||
732 | { | ||
733 | *numerator = ((struct transient_c *) store->context)->next_free; | ||
734 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
735 | } | 39 | } |
736 | 40 | ||
737 | int dm_create_transient(struct exception_store *store) | 41 | void dm_exception_store_exit(void) |
738 | { | 42 | { |
739 | struct transient_c *tc; | 43 | dm_persistent_snapshot_exit(); |
740 | 44 | dm_transient_snapshot_exit(); | |
741 | store->destroy = transient_destroy; | ||
742 | store->read_metadata = transient_read_metadata; | ||
743 | store->prepare_exception = transient_prepare; | ||
744 | store->commit_exception = transient_commit; | ||
745 | store->drop_snapshot = NULL; | ||
746 | store->fraction_full = transient_fraction_full; | ||
747 | |||
748 | tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); | ||
749 | if (!tc) | ||
750 | return -ENOMEM; | ||
751 | |||
752 | tc->next_free = 0; | ||
753 | store->context = tc; | ||
754 | |||
755 | return 0; | ||
756 | } | 45 | } |
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h new file mode 100644 index 000000000000..bb9f33d5daa2 --- /dev/null +++ b/drivers/md/dm-exception-store.h | |||
@@ -0,0 +1,148 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * Device-mapper snapshot exception store. | ||
6 | * | ||
7 | * This file is released under the GPL. | ||
8 | */ | ||
9 | |||
10 | #ifndef _LINUX_DM_EXCEPTION_STORE | ||
11 | #define _LINUX_DM_EXCEPTION_STORE | ||
12 | |||
13 | #include <linux/blkdev.h> | ||
14 | #include <linux/device-mapper.h> | ||
15 | |||
16 | /* | ||
17 | * The snapshot code deals with largish chunks of the disk at a | ||
18 | * time. Typically 32k - 512k. | ||
19 | */ | ||
20 | typedef sector_t chunk_t; | ||
21 | |||
22 | /* | ||
23 | * An exception is used where an old chunk of data has been | ||
24 | * replaced by a new one. | ||
25 | * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number | ||
26 | * of chunks that follow contiguously. Remaining bits hold the number of the | ||
27 | * chunk within the device. | ||
28 | */ | ||
29 | struct dm_snap_exception { | ||
30 | struct list_head hash_list; | ||
31 | |||
32 | chunk_t old_chunk; | ||
33 | chunk_t new_chunk; | ||
34 | }; | ||
35 | |||
36 | /* | ||
37 | * Abstraction to handle the meta/layout of exception stores (the | ||
38 | * COW device). | ||
39 | */ | ||
40 | struct dm_exception_store { | ||
41 | /* | ||
42 | * Destroys this object when you've finished with it. | ||
43 | */ | ||
44 | void (*destroy) (struct dm_exception_store *store); | ||
45 | |||
46 | /* | ||
47 | * The target shouldn't read the COW device until this is | ||
48 | * called. As exceptions are read from the COW, they are | ||
49 | * reported back via the callback. | ||
50 | */ | ||
51 | int (*read_metadata) (struct dm_exception_store *store, | ||
52 | int (*callback)(void *callback_context, | ||
53 | chunk_t old, chunk_t new), | ||
54 | void *callback_context); | ||
55 | |||
56 | /* | ||
57 | * Find somewhere to store the next exception. | ||
58 | */ | ||
59 | int (*prepare_exception) (struct dm_exception_store *store, | ||
60 | struct dm_snap_exception *e); | ||
61 | |||
62 | /* | ||
63 | * Update the metadata with this exception. | ||
64 | */ | ||
65 | void (*commit_exception) (struct dm_exception_store *store, | ||
66 | struct dm_snap_exception *e, | ||
67 | void (*callback) (void *, int success), | ||
68 | void *callback_context); | ||
69 | |||
70 | /* | ||
71 | * The snapshot is invalid, note this in the metadata. | ||
72 | */ | ||
73 | void (*drop_snapshot) (struct dm_exception_store *store); | ||
74 | |||
75 | int (*status) (struct dm_exception_store *store, status_type_t status, | ||
76 | char *result, unsigned int maxlen); | ||
77 | |||
78 | /* | ||
79 | * Return how full the snapshot is. | ||
80 | */ | ||
81 | void (*fraction_full) (struct dm_exception_store *store, | ||
82 | sector_t *numerator, | ||
83 | sector_t *denominator); | ||
84 | |||
85 | struct dm_snapshot *snap; | ||
86 | void *context; | ||
87 | }; | ||
88 | |||
89 | /* | ||
90 | * Funtions to manipulate consecutive chunks | ||
91 | */ | ||
92 | # if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) | ||
93 | # define DM_CHUNK_CONSECUTIVE_BITS 8 | ||
94 | # define DM_CHUNK_NUMBER_BITS 56 | ||
95 | |||
96 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
97 | { | ||
98 | return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); | ||
99 | } | ||
100 | |||
101 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
102 | { | ||
103 | return e->new_chunk >> DM_CHUNK_NUMBER_BITS; | ||
104 | } | ||
105 | |||
106 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
107 | { | ||
108 | e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); | ||
109 | |||
110 | BUG_ON(!dm_consecutive_chunk_count(e)); | ||
111 | } | ||
112 | |||
113 | # else | ||
114 | # define DM_CHUNK_CONSECUTIVE_BITS 0 | ||
115 | |||
116 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
117 | { | ||
118 | return chunk; | ||
119 | } | ||
120 | |||
121 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
122 | { | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
127 | { | ||
128 | } | ||
129 | |||
130 | # endif | ||
131 | |||
132 | int dm_exception_store_init(void); | ||
133 | void dm_exception_store_exit(void); | ||
134 | |||
135 | /* | ||
136 | * Two exception store implementations. | ||
137 | */ | ||
138 | int dm_persistent_snapshot_init(void); | ||
139 | void dm_persistent_snapshot_exit(void); | ||
140 | |||
141 | int dm_transient_snapshot_init(void); | ||
142 | void dm_transient_snapshot_exit(void); | ||
143 | |||
144 | int dm_create_persistent(struct dm_exception_store *store); | ||
145 | |||
146 | int dm_create_transient(struct dm_exception_store *store); | ||
147 | |||
148 | #endif /* _LINUX_DM_EXCEPTION_STORE */ | ||
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 777c948180f9..54d0588fc1f6 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
@@ -233,7 +233,7 @@ static void __hash_remove(struct hash_cell *hc) | |||
233 | } | 233 | } |
234 | 234 | ||
235 | if (hc->new_map) | 235 | if (hc->new_map) |
236 | dm_table_put(hc->new_map); | 236 | dm_table_destroy(hc->new_map); |
237 | dm_put(hc->md); | 237 | dm_put(hc->md); |
238 | free_cell(hc); | 238 | free_cell(hc); |
239 | } | 239 | } |
@@ -827,8 +827,8 @@ static int do_resume(struct dm_ioctl *param) | |||
827 | 827 | ||
828 | r = dm_swap_table(md, new_map); | 828 | r = dm_swap_table(md, new_map); |
829 | if (r) { | 829 | if (r) { |
830 | dm_table_destroy(new_map); | ||
830 | dm_put(md); | 831 | dm_put(md); |
831 | dm_table_put(new_map); | ||
832 | return r; | 832 | return r; |
833 | } | 833 | } |
834 | 834 | ||
@@ -836,8 +836,6 @@ static int do_resume(struct dm_ioctl *param) | |||
836 | set_disk_ro(dm_disk(md), 0); | 836 | set_disk_ro(dm_disk(md), 0); |
837 | else | 837 | else |
838 | set_disk_ro(dm_disk(md), 1); | 838 | set_disk_ro(dm_disk(md), 1); |
839 | |||
840 | dm_table_put(new_map); | ||
841 | } | 839 | } |
842 | 840 | ||
843 | if (dm_suspended(md)) | 841 | if (dm_suspended(md)) |
@@ -1080,7 +1078,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size) | |||
1080 | } | 1078 | } |
1081 | 1079 | ||
1082 | if (hc->new_map) | 1080 | if (hc->new_map) |
1083 | dm_table_put(hc->new_map); | 1081 | dm_table_destroy(hc->new_map); |
1084 | hc->new_map = t; | 1082 | hc->new_map = t; |
1085 | up_write(&_hash_lock); | 1083 | up_write(&_hash_lock); |
1086 | 1084 | ||
@@ -1109,7 +1107,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) | |||
1109 | } | 1107 | } |
1110 | 1108 | ||
1111 | if (hc->new_map) { | 1109 | if (hc->new_map) { |
1112 | dm_table_put(hc->new_map); | 1110 | dm_table_destroy(hc->new_map); |
1113 | hc->new_map = NULL; | 1111 | hc->new_map = NULL; |
1114 | } | 1112 | } |
1115 | 1113 | ||
@@ -1550,8 +1548,10 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid) | |||
1550 | goto out; | 1548 | goto out; |
1551 | } | 1549 | } |
1552 | 1550 | ||
1553 | strcpy(name, hc->name); | 1551 | if (name) |
1554 | strcpy(uuid, hc->uuid ? : ""); | 1552 | strcpy(name, hc->name); |
1553 | if (uuid) | ||
1554 | strcpy(uuid, hc->uuid ? : ""); | ||
1555 | 1555 | ||
1556 | out: | 1556 | out: |
1557 | up_read(&_hash_lock); | 1557 | up_read(&_hash_lock); |
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 44042becad8a..bfa107f59d96 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c | |||
@@ -142,6 +142,7 @@ static struct target_type linear_target = { | |||
142 | .status = linear_status, | 142 | .status = linear_status, |
143 | .ioctl = linear_ioctl, | 143 | .ioctl = linear_ioctl, |
144 | .merge = linear_merge, | 144 | .merge = linear_merge, |
145 | .features = DM_TARGET_SUPPORTS_BARRIERS, | ||
145 | }; | 146 | }; |
146 | 147 | ||
147 | int __init dm_linear_init(void) | 148 | int __init dm_linear_init(void) |
@@ -156,8 +157,5 @@ int __init dm_linear_init(void) | |||
156 | 157 | ||
157 | void dm_linear_exit(void) | 158 | void dm_linear_exit(void) |
158 | { | 159 | { |
159 | int r = dm_unregister_target(&linear_target); | 160 | dm_unregister_target(&linear_target); |
160 | |||
161 | if (r < 0) | ||
162 | DMERR("unregister failed %d", r); | ||
163 | } | 161 | } |
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index a8c0fc79ca78..737961f275c1 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -326,8 +326,6 @@ static void header_from_disk(struct log_header *core, struct log_header *disk) | |||
326 | static int rw_header(struct log_c *lc, int rw) | 326 | static int rw_header(struct log_c *lc, int rw) |
327 | { | 327 | { |
328 | lc->io_req.bi_rw = rw; | 328 | lc->io_req.bi_rw = rw; |
329 | lc->io_req.mem.ptr.vma = lc->disk_header; | ||
330 | lc->io_req.notify.fn = NULL; | ||
331 | 329 | ||
332 | return dm_io(&lc->io_req, 1, &lc->header_location, NULL); | 330 | return dm_io(&lc->io_req, 1, &lc->header_location, NULL); |
333 | } | 331 | } |
@@ -362,10 +360,15 @@ static int read_header(struct log_c *log) | |||
362 | return 0; | 360 | return 0; |
363 | } | 361 | } |
364 | 362 | ||
365 | static inline int write_header(struct log_c *log) | 363 | static int _check_region_size(struct dm_target *ti, uint32_t region_size) |
366 | { | 364 | { |
367 | header_to_disk(&log->header, log->disk_header); | 365 | if (region_size < 2 || region_size > ti->len) |
368 | return rw_header(log, WRITE); | 366 | return 0; |
367 | |||
368 | if (!is_power_of_2(region_size)) | ||
369 | return 0; | ||
370 | |||
371 | return 1; | ||
369 | } | 372 | } |
370 | 373 | ||
371 | /*---------------------------------------------------------------- | 374 | /*---------------------------------------------------------------- |
@@ -403,8 +406,9 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
403 | } | 406 | } |
404 | } | 407 | } |
405 | 408 | ||
406 | if (sscanf(argv[0], "%u", ®ion_size) != 1) { | 409 | if (sscanf(argv[0], "%u", ®ion_size) != 1 || |
407 | DMWARN("invalid region size string"); | 410 | !_check_region_size(ti, region_size)) { |
411 | DMWARN("invalid region size %s", argv[0]); | ||
408 | return -EINVAL; | 412 | return -EINVAL; |
409 | } | 413 | } |
410 | 414 | ||
@@ -453,8 +457,18 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
453 | */ | 457 | */ |
454 | buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + | 458 | buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + |
455 | bitset_size, ti->limits.hardsect_size); | 459 | bitset_size, ti->limits.hardsect_size); |
460 | |||
461 | if (buf_size > dev->bdev->bd_inode->i_size) { | ||
462 | DMWARN("log device %s too small: need %llu bytes", | ||
463 | dev->name, (unsigned long long)buf_size); | ||
464 | kfree(lc); | ||
465 | return -EINVAL; | ||
466 | } | ||
467 | |||
456 | lc->header_location.count = buf_size >> SECTOR_SHIFT; | 468 | lc->header_location.count = buf_size >> SECTOR_SHIFT; |
469 | |||
457 | lc->io_req.mem.type = DM_IO_VMA; | 470 | lc->io_req.mem.type = DM_IO_VMA; |
471 | lc->io_req.notify.fn = NULL; | ||
458 | lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, | 472 | lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, |
459 | PAGE_SIZE)); | 473 | PAGE_SIZE)); |
460 | if (IS_ERR(lc->io_req.client)) { | 474 | if (IS_ERR(lc->io_req.client)) { |
@@ -467,10 +481,12 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
467 | lc->disk_header = vmalloc(buf_size); | 481 | lc->disk_header = vmalloc(buf_size); |
468 | if (!lc->disk_header) { | 482 | if (!lc->disk_header) { |
469 | DMWARN("couldn't allocate disk log buffer"); | 483 | DMWARN("couldn't allocate disk log buffer"); |
484 | dm_io_client_destroy(lc->io_req.client); | ||
470 | kfree(lc); | 485 | kfree(lc); |
471 | return -ENOMEM; | 486 | return -ENOMEM; |
472 | } | 487 | } |
473 | 488 | ||
489 | lc->io_req.mem.ptr.vma = lc->disk_header; | ||
474 | lc->clean_bits = (void *)lc->disk_header + | 490 | lc->clean_bits = (void *)lc->disk_header + |
475 | (LOG_OFFSET << SECTOR_SHIFT); | 491 | (LOG_OFFSET << SECTOR_SHIFT); |
476 | } | 492 | } |
@@ -482,6 +498,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
482 | DMWARN("couldn't allocate sync bitset"); | 498 | DMWARN("couldn't allocate sync bitset"); |
483 | if (!dev) | 499 | if (!dev) |
484 | vfree(lc->clean_bits); | 500 | vfree(lc->clean_bits); |
501 | else | ||
502 | dm_io_client_destroy(lc->io_req.client); | ||
485 | vfree(lc->disk_header); | 503 | vfree(lc->disk_header); |
486 | kfree(lc); | 504 | kfree(lc); |
487 | return -ENOMEM; | 505 | return -ENOMEM; |
@@ -495,6 +513,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
495 | vfree(lc->sync_bits); | 513 | vfree(lc->sync_bits); |
496 | if (!dev) | 514 | if (!dev) |
497 | vfree(lc->clean_bits); | 515 | vfree(lc->clean_bits); |
516 | else | ||
517 | dm_io_client_destroy(lc->io_req.client); | ||
498 | vfree(lc->disk_header); | 518 | vfree(lc->disk_header); |
499 | kfree(lc); | 519 | kfree(lc); |
500 | return -ENOMEM; | 520 | return -ENOMEM; |
@@ -631,8 +651,10 @@ static int disk_resume(struct dm_dirty_log *log) | |||
631 | /* set the correct number of regions in the header */ | 651 | /* set the correct number of regions in the header */ |
632 | lc->header.nr_regions = lc->region_count; | 652 | lc->header.nr_regions = lc->region_count; |
633 | 653 | ||
654 | header_to_disk(&lc->header, lc->disk_header); | ||
655 | |||
634 | /* write the new header */ | 656 | /* write the new header */ |
635 | r = write_header(lc); | 657 | r = rw_header(lc, WRITE); |
636 | if (r) { | 658 | if (r) { |
637 | DMWARN("%s: Failed to write header on dirty region log device", | 659 | DMWARN("%s: Failed to write header on dirty region log device", |
638 | lc->log_dev->name); | 660 | lc->log_dev->name); |
@@ -682,7 +704,7 @@ static int disk_flush(struct dm_dirty_log *log) | |||
682 | if (!lc->touched) | 704 | if (!lc->touched) |
683 | return 0; | 705 | return 0; |
684 | 706 | ||
685 | r = write_header(lc); | 707 | r = rw_header(lc, WRITE); |
686 | if (r) | 708 | if (r) |
687 | fail_log_device(lc); | 709 | fail_log_device(lc); |
688 | else | 710 | else |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3d7f4923cd13..095f77bf9681 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -889,7 +889,7 @@ static int fail_path(struct pgpath *pgpath) | |||
889 | dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, | 889 | dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, |
890 | pgpath->path.dev->name, m->nr_valid_paths); | 890 | pgpath->path.dev->name, m->nr_valid_paths); |
891 | 891 | ||
892 | queue_work(kmultipathd, &m->trigger_event); | 892 | schedule_work(&m->trigger_event); |
893 | queue_work(kmultipathd, &pgpath->deactivate_path); | 893 | queue_work(kmultipathd, &pgpath->deactivate_path); |
894 | 894 | ||
895 | out: | 895 | out: |
@@ -932,7 +932,7 @@ static int reinstate_path(struct pgpath *pgpath) | |||
932 | dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, | 932 | dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, |
933 | pgpath->path.dev->name, m->nr_valid_paths); | 933 | pgpath->path.dev->name, m->nr_valid_paths); |
934 | 934 | ||
935 | queue_work(kmultipathd, &m->trigger_event); | 935 | schedule_work(&m->trigger_event); |
936 | 936 | ||
937 | out: | 937 | out: |
938 | spin_unlock_irqrestore(&m->lock, flags); | 938 | spin_unlock_irqrestore(&m->lock, flags); |
@@ -976,7 +976,7 @@ static void bypass_pg(struct multipath *m, struct priority_group *pg, | |||
976 | 976 | ||
977 | spin_unlock_irqrestore(&m->lock, flags); | 977 | spin_unlock_irqrestore(&m->lock, flags); |
978 | 978 | ||
979 | queue_work(kmultipathd, &m->trigger_event); | 979 | schedule_work(&m->trigger_event); |
980 | } | 980 | } |
981 | 981 | ||
982 | /* | 982 | /* |
@@ -1006,7 +1006,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr) | |||
1006 | } | 1006 | } |
1007 | spin_unlock_irqrestore(&m->lock, flags); | 1007 | spin_unlock_irqrestore(&m->lock, flags); |
1008 | 1008 | ||
1009 | queue_work(kmultipathd, &m->trigger_event); | 1009 | schedule_work(&m->trigger_event); |
1010 | return 0; | 1010 | return 0; |
1011 | } | 1011 | } |
1012 | 1012 | ||
@@ -1495,14 +1495,10 @@ static int __init dm_multipath_init(void) | |||
1495 | 1495 | ||
1496 | static void __exit dm_multipath_exit(void) | 1496 | static void __exit dm_multipath_exit(void) |
1497 | { | 1497 | { |
1498 | int r; | ||
1499 | |||
1500 | destroy_workqueue(kmpath_handlerd); | 1498 | destroy_workqueue(kmpath_handlerd); |
1501 | destroy_workqueue(kmultipathd); | 1499 | destroy_workqueue(kmultipathd); |
1502 | 1500 | ||
1503 | r = dm_unregister_target(&multipath_target); | 1501 | dm_unregister_target(&multipath_target); |
1504 | if (r < 0) | ||
1505 | DMERR("target unregister failed %d", r); | ||
1506 | kmem_cache_destroy(_mpio_cache); | 1502 | kmem_cache_destroy(_mpio_cache); |
1507 | } | 1503 | } |
1508 | 1504 | ||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ec43f9fa4b2a..4d6bc101962e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -197,9 +197,6 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) | |||
197 | struct mirror_set *ms = m->ms; | 197 | struct mirror_set *ms = m->ms; |
198 | struct mirror *new; | 198 | struct mirror *new; |
199 | 199 | ||
200 | if (!errors_handled(ms)) | ||
201 | return; | ||
202 | |||
203 | /* | 200 | /* |
204 | * error_count is used for nothing more than a | 201 | * error_count is used for nothing more than a |
205 | * simple way to tell if a device has encountered | 202 | * simple way to tell if a device has encountered |
@@ -210,6 +207,9 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) | |||
210 | if (test_and_set_bit(error_type, &m->error_type)) | 207 | if (test_and_set_bit(error_type, &m->error_type)) |
211 | return; | 208 | return; |
212 | 209 | ||
210 | if (!errors_handled(ms)) | ||
211 | return; | ||
212 | |||
213 | if (m != get_default_mirror(ms)) | 213 | if (m != get_default_mirror(ms)) |
214 | goto out; | 214 | goto out; |
215 | 215 | ||
@@ -808,12 +808,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, | |||
808 | kfree(ms); | 808 | kfree(ms); |
809 | } | 809 | } |
810 | 810 | ||
811 | static inline int _check_region_size(struct dm_target *ti, uint32_t size) | ||
812 | { | ||
813 | return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) || | ||
814 | size > ti->len); | ||
815 | } | ||
816 | |||
817 | static int get_mirror(struct mirror_set *ms, struct dm_target *ti, | 811 | static int get_mirror(struct mirror_set *ms, struct dm_target *ti, |
818 | unsigned int mirror, char **argv) | 812 | unsigned int mirror, char **argv) |
819 | { | 813 | { |
@@ -872,12 +866,6 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, | |||
872 | return NULL; | 866 | return NULL; |
873 | } | 867 | } |
874 | 868 | ||
875 | if (!_check_region_size(ti, dl->type->get_region_size(dl))) { | ||
876 | ti->error = "Invalid region size"; | ||
877 | dm_dirty_log_destroy(dl); | ||
878 | return NULL; | ||
879 | } | ||
880 | |||
881 | return dl; | 869 | return dl; |
882 | } | 870 | } |
883 | 871 | ||
@@ -1300,11 +1288,7 @@ static int __init dm_mirror_init(void) | |||
1300 | 1288 | ||
1301 | static void __exit dm_mirror_exit(void) | 1289 | static void __exit dm_mirror_exit(void) |
1302 | { | 1290 | { |
1303 | int r; | 1291 | dm_unregister_target(&mirror_target); |
1304 | |||
1305 | r = dm_unregister_target(&mirror_target); | ||
1306 | if (r < 0) | ||
1307 | DMERR("unregister failed %d", r); | ||
1308 | } | 1292 | } |
1309 | 1293 | ||
1310 | /* Module hooks */ | 1294 | /* Module hooks */ |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c new file mode 100644 index 000000000000..936b34e0959f --- /dev/null +++ b/drivers/md/dm-snap-persistent.c | |||
@@ -0,0 +1,704 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
3 | * Copyright (C) 2006-2008 Red Hat GmbH | ||
4 | * | ||
5 | * This file is released under the GPL. | ||
6 | */ | ||
7 | |||
8 | #include "dm-exception-store.h" | ||
9 | #include "dm-snap.h" | ||
10 | |||
11 | #include <linux/mm.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/vmalloc.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/dm-io.h> | ||
16 | |||
17 | #define DM_MSG_PREFIX "persistent snapshot" | ||
18 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | ||
19 | |||
20 | /*----------------------------------------------------------------- | ||
21 | * Persistent snapshots, by persistent we mean that the snapshot | ||
22 | * will survive a reboot. | ||
23 | *---------------------------------------------------------------*/ | ||
24 | |||
25 | /* | ||
26 | * We need to store a record of which parts of the origin have | ||
27 | * been copied to the snapshot device. The snapshot code | ||
28 | * requires that we copy exception chunks to chunk aligned areas | ||
29 | * of the COW store. It makes sense therefore, to store the | ||
30 | * metadata in chunk size blocks. | ||
31 | * | ||
32 | * There is no backward or forward compatibility implemented, | ||
33 | * snapshots with different disk versions than the kernel will | ||
34 | * not be usable. It is expected that "lvcreate" will blank out | ||
35 | * the start of a fresh COW device before calling the snapshot | ||
36 | * constructor. | ||
37 | * | ||
38 | * The first chunk of the COW device just contains the header. | ||
39 | * After this there is a chunk filled with exception metadata, | ||
40 | * followed by as many exception chunks as can fit in the | ||
41 | * metadata areas. | ||
42 | * | ||
43 | * All on disk structures are in little-endian format. The end | ||
44 | * of the exceptions info is indicated by an exception with a | ||
45 | * new_chunk of 0, which is invalid since it would point to the | ||
46 | * header chunk. | ||
47 | */ | ||
48 | |||
49 | /* | ||
50 | * Magic for persistent snapshots: "SnAp" - Feeble isn't it. | ||
51 | */ | ||
52 | #define SNAP_MAGIC 0x70416e53 | ||
53 | |||
54 | /* | ||
55 | * The on-disk version of the metadata. | ||
56 | */ | ||
57 | #define SNAPSHOT_DISK_VERSION 1 | ||
58 | |||
59 | struct disk_header { | ||
60 | uint32_t magic; | ||
61 | |||
62 | /* | ||
63 | * Is this snapshot valid. There is no way of recovering | ||
64 | * an invalid snapshot. | ||
65 | */ | ||
66 | uint32_t valid; | ||
67 | |||
68 | /* | ||
69 | * Simple, incrementing version. no backward | ||
70 | * compatibility. | ||
71 | */ | ||
72 | uint32_t version; | ||
73 | |||
74 | /* In sectors */ | ||
75 | uint32_t chunk_size; | ||
76 | }; | ||
77 | |||
78 | struct disk_exception { | ||
79 | uint64_t old_chunk; | ||
80 | uint64_t new_chunk; | ||
81 | }; | ||
82 | |||
83 | struct commit_callback { | ||
84 | void (*callback)(void *, int success); | ||
85 | void *context; | ||
86 | }; | ||
87 | |||
88 | /* | ||
89 | * The top level structure for a persistent exception store. | ||
90 | */ | ||
91 | struct pstore { | ||
92 | struct dm_snapshot *snap; /* up pointer to my snapshot */ | ||
93 | int version; | ||
94 | int valid; | ||
95 | uint32_t exceptions_per_area; | ||
96 | |||
97 | /* | ||
98 | * Now that we have an asynchronous kcopyd there is no | ||
99 | * need for large chunk sizes, so it wont hurt to have a | ||
100 | * whole chunks worth of metadata in memory at once. | ||
101 | */ | ||
102 | void *area; | ||
103 | |||
104 | /* | ||
105 | * An area of zeros used to clear the next area. | ||
106 | */ | ||
107 | void *zero_area; | ||
108 | |||
109 | /* | ||
110 | * Used to keep track of which metadata area the data in | ||
111 | * 'chunk' refers to. | ||
112 | */ | ||
113 | chunk_t current_area; | ||
114 | |||
115 | /* | ||
116 | * The next free chunk for an exception. | ||
117 | */ | ||
118 | chunk_t next_free; | ||
119 | |||
120 | /* | ||
121 | * The index of next free exception in the current | ||
122 | * metadata area. | ||
123 | */ | ||
124 | uint32_t current_committed; | ||
125 | |||
126 | atomic_t pending_count; | ||
127 | uint32_t callback_count; | ||
128 | struct commit_callback *callbacks; | ||
129 | struct dm_io_client *io_client; | ||
130 | |||
131 | struct workqueue_struct *metadata_wq; | ||
132 | }; | ||
133 | |||
134 | static unsigned sectors_to_pages(unsigned sectors) | ||
135 | { | ||
136 | return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); | ||
137 | } | ||
138 | |||
139 | static int alloc_area(struct pstore *ps) | ||
140 | { | ||
141 | int r = -ENOMEM; | ||
142 | size_t len; | ||
143 | |||
144 | len = ps->snap->chunk_size << SECTOR_SHIFT; | ||
145 | |||
146 | /* | ||
147 | * Allocate the chunk_size block of memory that will hold | ||
148 | * a single metadata area. | ||
149 | */ | ||
150 | ps->area = vmalloc(len); | ||
151 | if (!ps->area) | ||
152 | return r; | ||
153 | |||
154 | ps->zero_area = vmalloc(len); | ||
155 | if (!ps->zero_area) { | ||
156 | vfree(ps->area); | ||
157 | return r; | ||
158 | } | ||
159 | memset(ps->zero_area, 0, len); | ||
160 | |||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | static void free_area(struct pstore *ps) | ||
165 | { | ||
166 | vfree(ps->area); | ||
167 | ps->area = NULL; | ||
168 | vfree(ps->zero_area); | ||
169 | ps->zero_area = NULL; | ||
170 | } | ||
171 | |||
172 | struct mdata_req { | ||
173 | struct dm_io_region *where; | ||
174 | struct dm_io_request *io_req; | ||
175 | struct work_struct work; | ||
176 | int result; | ||
177 | }; | ||
178 | |||
179 | static void do_metadata(struct work_struct *work) | ||
180 | { | ||
181 | struct mdata_req *req = container_of(work, struct mdata_req, work); | ||
182 | |||
183 | req->result = dm_io(req->io_req, 1, req->where, NULL); | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Read or write a chunk aligned and sized block of data from a device. | ||
188 | */ | ||
189 | static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) | ||
190 | { | ||
191 | struct dm_io_region where = { | ||
192 | .bdev = ps->snap->cow->bdev, | ||
193 | .sector = ps->snap->chunk_size * chunk, | ||
194 | .count = ps->snap->chunk_size, | ||
195 | }; | ||
196 | struct dm_io_request io_req = { | ||
197 | .bi_rw = rw, | ||
198 | .mem.type = DM_IO_VMA, | ||
199 | .mem.ptr.vma = ps->area, | ||
200 | .client = ps->io_client, | ||
201 | .notify.fn = NULL, | ||
202 | }; | ||
203 | struct mdata_req req; | ||
204 | |||
205 | if (!metadata) | ||
206 | return dm_io(&io_req, 1, &where, NULL); | ||
207 | |||
208 | req.where = &where; | ||
209 | req.io_req = &io_req; | ||
210 | |||
211 | /* | ||
212 | * Issue the synchronous I/O from a different thread | ||
213 | * to avoid generic_make_request recursion. | ||
214 | */ | ||
215 | INIT_WORK(&req.work, do_metadata); | ||
216 | queue_work(ps->metadata_wq, &req.work); | ||
217 | flush_workqueue(ps->metadata_wq); | ||
218 | |||
219 | return req.result; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Convert a metadata area index to a chunk index. | ||
224 | */ | ||
225 | static chunk_t area_location(struct pstore *ps, chunk_t area) | ||
226 | { | ||
227 | return 1 + ((ps->exceptions_per_area + 1) * area); | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Read or write a metadata area. Remembering to skip the first | ||
232 | * chunk which holds the header. | ||
233 | */ | ||
234 | static int area_io(struct pstore *ps, int rw) | ||
235 | { | ||
236 | int r; | ||
237 | chunk_t chunk; | ||
238 | |||
239 | chunk = area_location(ps, ps->current_area); | ||
240 | |||
241 | r = chunk_io(ps, chunk, rw, 0); | ||
242 | if (r) | ||
243 | return r; | ||
244 | |||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | static void zero_memory_area(struct pstore *ps) | ||
249 | { | ||
250 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
251 | } | ||
252 | |||
253 | static int zero_disk_area(struct pstore *ps, chunk_t area) | ||
254 | { | ||
255 | struct dm_io_region where = { | ||
256 | .bdev = ps->snap->cow->bdev, | ||
257 | .sector = ps->snap->chunk_size * area_location(ps, area), | ||
258 | .count = ps->snap->chunk_size, | ||
259 | }; | ||
260 | struct dm_io_request io_req = { | ||
261 | .bi_rw = WRITE, | ||
262 | .mem.type = DM_IO_VMA, | ||
263 | .mem.ptr.vma = ps->zero_area, | ||
264 | .client = ps->io_client, | ||
265 | .notify.fn = NULL, | ||
266 | }; | ||
267 | |||
268 | return dm_io(&io_req, 1, &where, NULL); | ||
269 | } | ||
270 | |||
271 | static int read_header(struct pstore *ps, int *new_snapshot) | ||
272 | { | ||
273 | int r; | ||
274 | struct disk_header *dh; | ||
275 | chunk_t chunk_size; | ||
276 | int chunk_size_supplied = 1; | ||
277 | |||
278 | /* | ||
279 | * Use default chunk size (or hardsect_size, if larger) if none supplied | ||
280 | */ | ||
281 | if (!ps->snap->chunk_size) { | ||
282 | ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, | ||
283 | bdev_hardsect_size(ps->snap->cow->bdev) >> 9); | ||
284 | ps->snap->chunk_mask = ps->snap->chunk_size - 1; | ||
285 | ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; | ||
286 | chunk_size_supplied = 0; | ||
287 | } | ||
288 | |||
289 | ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> | ||
290 | chunk_size)); | ||
291 | if (IS_ERR(ps->io_client)) | ||
292 | return PTR_ERR(ps->io_client); | ||
293 | |||
294 | r = alloc_area(ps); | ||
295 | if (r) | ||
296 | return r; | ||
297 | |||
298 | r = chunk_io(ps, 0, READ, 1); | ||
299 | if (r) | ||
300 | goto bad; | ||
301 | |||
302 | dh = (struct disk_header *) ps->area; | ||
303 | |||
304 | if (le32_to_cpu(dh->magic) == 0) { | ||
305 | *new_snapshot = 1; | ||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { | ||
310 | DMWARN("Invalid or corrupt snapshot"); | ||
311 | r = -ENXIO; | ||
312 | goto bad; | ||
313 | } | ||
314 | |||
315 | *new_snapshot = 0; | ||
316 | ps->valid = le32_to_cpu(dh->valid); | ||
317 | ps->version = le32_to_cpu(dh->version); | ||
318 | chunk_size = le32_to_cpu(dh->chunk_size); | ||
319 | |||
320 | if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) | ||
321 | return 0; | ||
322 | |||
323 | DMWARN("chunk size %llu in device metadata overrides " | ||
324 | "table chunk size of %llu.", | ||
325 | (unsigned long long)chunk_size, | ||
326 | (unsigned long long)ps->snap->chunk_size); | ||
327 | |||
328 | /* We had a bogus chunk_size. Fix stuff up. */ | ||
329 | free_area(ps); | ||
330 | |||
331 | ps->snap->chunk_size = chunk_size; | ||
332 | ps->snap->chunk_mask = chunk_size - 1; | ||
333 | ps->snap->chunk_shift = ffs(chunk_size) - 1; | ||
334 | |||
335 | r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), | ||
336 | ps->io_client); | ||
337 | if (r) | ||
338 | return r; | ||
339 | |||
340 | r = alloc_area(ps); | ||
341 | return r; | ||
342 | |||
343 | bad: | ||
344 | free_area(ps); | ||
345 | return r; | ||
346 | } | ||
347 | |||
348 | static int write_header(struct pstore *ps) | ||
349 | { | ||
350 | struct disk_header *dh; | ||
351 | |||
352 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
353 | |||
354 | dh = (struct disk_header *) ps->area; | ||
355 | dh->magic = cpu_to_le32(SNAP_MAGIC); | ||
356 | dh->valid = cpu_to_le32(ps->valid); | ||
357 | dh->version = cpu_to_le32(ps->version); | ||
358 | dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); | ||
359 | |||
360 | return chunk_io(ps, 0, WRITE, 1); | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * Access functions for the disk exceptions, these do the endian conversions. | ||
365 | */ | ||
366 | static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | ||
367 | { | ||
368 | BUG_ON(index >= ps->exceptions_per_area); | ||
369 | |||
370 | return ((struct disk_exception *) ps->area) + index; | ||
371 | } | ||
372 | |||
373 | static void read_exception(struct pstore *ps, | ||
374 | uint32_t index, struct disk_exception *result) | ||
375 | { | ||
376 | struct disk_exception *e = get_exception(ps, index); | ||
377 | |||
378 | /* copy it */ | ||
379 | result->old_chunk = le64_to_cpu(e->old_chunk); | ||
380 | result->new_chunk = le64_to_cpu(e->new_chunk); | ||
381 | } | ||
382 | |||
383 | static void write_exception(struct pstore *ps, | ||
384 | uint32_t index, struct disk_exception *de) | ||
385 | { | ||
386 | struct disk_exception *e = get_exception(ps, index); | ||
387 | |||
388 | /* copy it */ | ||
389 | e->old_chunk = cpu_to_le64(de->old_chunk); | ||
390 | e->new_chunk = cpu_to_le64(de->new_chunk); | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * Registers the exceptions that are present in the current area. | ||
395 | * 'full' is filled in to indicate if the area has been | ||
396 | * filled. | ||
397 | */ | ||
398 | static int insert_exceptions(struct pstore *ps, | ||
399 | int (*callback)(void *callback_context, | ||
400 | chunk_t old, chunk_t new), | ||
401 | void *callback_context, | ||
402 | int *full) | ||
403 | { | ||
404 | int r; | ||
405 | unsigned int i; | ||
406 | struct disk_exception de; | ||
407 | |||
408 | /* presume the area is full */ | ||
409 | *full = 1; | ||
410 | |||
411 | for (i = 0; i < ps->exceptions_per_area; i++) { | ||
412 | read_exception(ps, i, &de); | ||
413 | |||
414 | /* | ||
415 | * If the new_chunk is pointing at the start of | ||
416 | * the COW device, where the first metadata area | ||
417 | * is we know that we've hit the end of the | ||
418 | * exceptions. Therefore the area is not full. | ||
419 | */ | ||
420 | if (de.new_chunk == 0LL) { | ||
421 | ps->current_committed = i; | ||
422 | *full = 0; | ||
423 | break; | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Keep track of the start of the free chunks. | ||
428 | */ | ||
429 | if (ps->next_free <= de.new_chunk) | ||
430 | ps->next_free = de.new_chunk + 1; | ||
431 | |||
432 | /* | ||
433 | * Otherwise we add the exception to the snapshot. | ||
434 | */ | ||
435 | r = callback(callback_context, de.old_chunk, de.new_chunk); | ||
436 | if (r) | ||
437 | return r; | ||
438 | } | ||
439 | |||
440 | return 0; | ||
441 | } | ||
442 | |||
443 | static int read_exceptions(struct pstore *ps, | ||
444 | int (*callback)(void *callback_context, chunk_t old, | ||
445 | chunk_t new), | ||
446 | void *callback_context) | ||
447 | { | ||
448 | int r, full = 1; | ||
449 | |||
450 | /* | ||
451 | * Keeping reading chunks and inserting exceptions until | ||
452 | * we find a partially full area. | ||
453 | */ | ||
454 | for (ps->current_area = 0; full; ps->current_area++) { | ||
455 | r = area_io(ps, READ); | ||
456 | if (r) | ||
457 | return r; | ||
458 | |||
459 | r = insert_exceptions(ps, callback, callback_context, &full); | ||
460 | if (r) | ||
461 | return r; | ||
462 | } | ||
463 | |||
464 | ps->current_area--; | ||
465 | |||
466 | return 0; | ||
467 | } | ||
468 | |||
469 | static struct pstore *get_info(struct dm_exception_store *store) | ||
470 | { | ||
471 | return (struct pstore *) store->context; | ||
472 | } | ||
473 | |||
474 | static void persistent_fraction_full(struct dm_exception_store *store, | ||
475 | sector_t *numerator, sector_t *denominator) | ||
476 | { | ||
477 | *numerator = get_info(store)->next_free * store->snap->chunk_size; | ||
478 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
479 | } | ||
480 | |||
481 | static void persistent_destroy(struct dm_exception_store *store) | ||
482 | { | ||
483 | struct pstore *ps = get_info(store); | ||
484 | |||
485 | destroy_workqueue(ps->metadata_wq); | ||
486 | dm_io_client_destroy(ps->io_client); | ||
487 | vfree(ps->callbacks); | ||
488 | free_area(ps); | ||
489 | kfree(ps); | ||
490 | } | ||
491 | |||
492 | static int persistent_read_metadata(struct dm_exception_store *store, | ||
493 | int (*callback)(void *callback_context, | ||
494 | chunk_t old, chunk_t new), | ||
495 | void *callback_context) | ||
496 | { | ||
497 | int r, uninitialized_var(new_snapshot); | ||
498 | struct pstore *ps = get_info(store); | ||
499 | |||
500 | /* | ||
501 | * Read the snapshot header. | ||
502 | */ | ||
503 | r = read_header(ps, &new_snapshot); | ||
504 | if (r) | ||
505 | return r; | ||
506 | |||
507 | /* | ||
508 | * Now we know correct chunk_size, complete the initialisation. | ||
509 | */ | ||
510 | ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / | ||
511 | sizeof(struct disk_exception); | ||
512 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | ||
513 | sizeof(*ps->callbacks)); | ||
514 | if (!ps->callbacks) | ||
515 | return -ENOMEM; | ||
516 | |||
517 | /* | ||
518 | * Do we need to setup a new snapshot ? | ||
519 | */ | ||
520 | if (new_snapshot) { | ||
521 | r = write_header(ps); | ||
522 | if (r) { | ||
523 | DMWARN("write_header failed"); | ||
524 | return r; | ||
525 | } | ||
526 | |||
527 | ps->current_area = 0; | ||
528 | zero_memory_area(ps); | ||
529 | r = zero_disk_area(ps, 0); | ||
530 | if (r) { | ||
531 | DMWARN("zero_disk_area(0) failed"); | ||
532 | return r; | ||
533 | } | ||
534 | } else { | ||
535 | /* | ||
536 | * Sanity checks. | ||
537 | */ | ||
538 | if (ps->version != SNAPSHOT_DISK_VERSION) { | ||
539 | DMWARN("unable to handle snapshot disk version %d", | ||
540 | ps->version); | ||
541 | return -EINVAL; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * Metadata are valid, but snapshot is invalidated | ||
546 | */ | ||
547 | if (!ps->valid) | ||
548 | return 1; | ||
549 | |||
550 | /* | ||
551 | * Read the metadata. | ||
552 | */ | ||
553 | r = read_exceptions(ps, callback, callback_context); | ||
554 | if (r) | ||
555 | return r; | ||
556 | } | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | static int persistent_prepare_exception(struct dm_exception_store *store, | ||
562 | struct dm_snap_exception *e) | ||
563 | { | ||
564 | struct pstore *ps = get_info(store); | ||
565 | uint32_t stride; | ||
566 | chunk_t next_free; | ||
567 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
568 | |||
569 | /* Is there enough room ? */ | ||
570 | if (size < ((ps->next_free + 1) * store->snap->chunk_size)) | ||
571 | return -ENOSPC; | ||
572 | |||
573 | e->new_chunk = ps->next_free; | ||
574 | |||
575 | /* | ||
576 | * Move onto the next free pending, making sure to take | ||
577 | * into account the location of the metadata chunks. | ||
578 | */ | ||
579 | stride = (ps->exceptions_per_area + 1); | ||
580 | next_free = ++ps->next_free; | ||
581 | if (sector_div(next_free, stride) == 1) | ||
582 | ps->next_free++; | ||
583 | |||
584 | atomic_inc(&ps->pending_count); | ||
585 | return 0; | ||
586 | } | ||
587 | |||
588 | static void persistent_commit_exception(struct dm_exception_store *store, | ||
589 | struct dm_snap_exception *e, | ||
590 | void (*callback) (void *, int success), | ||
591 | void *callback_context) | ||
592 | { | ||
593 | unsigned int i; | ||
594 | struct pstore *ps = get_info(store); | ||
595 | struct disk_exception de; | ||
596 | struct commit_callback *cb; | ||
597 | |||
598 | de.old_chunk = e->old_chunk; | ||
599 | de.new_chunk = e->new_chunk; | ||
600 | write_exception(ps, ps->current_committed++, &de); | ||
601 | |||
602 | /* | ||
603 | * Add the callback to the back of the array. This code | ||
604 | * is the only place where the callback array is | ||
605 | * manipulated, and we know that it will never be called | ||
606 | * multiple times concurrently. | ||
607 | */ | ||
608 | cb = ps->callbacks + ps->callback_count++; | ||
609 | cb->callback = callback; | ||
610 | cb->context = callback_context; | ||
611 | |||
612 | /* | ||
613 | * If there are exceptions in flight and we have not yet | ||
614 | * filled this metadata area there's nothing more to do. | ||
615 | */ | ||
616 | if (!atomic_dec_and_test(&ps->pending_count) && | ||
617 | (ps->current_committed != ps->exceptions_per_area)) | ||
618 | return; | ||
619 | |||
620 | /* | ||
621 | * If we completely filled the current area, then wipe the next one. | ||
622 | */ | ||
623 | if ((ps->current_committed == ps->exceptions_per_area) && | ||
624 | zero_disk_area(ps, ps->current_area + 1)) | ||
625 | ps->valid = 0; | ||
626 | |||
627 | /* | ||
628 | * Commit exceptions to disk. | ||
629 | */ | ||
630 | if (ps->valid && area_io(ps, WRITE)) | ||
631 | ps->valid = 0; | ||
632 | |||
633 | /* | ||
634 | * Advance to the next area if this one is full. | ||
635 | */ | ||
636 | if (ps->current_committed == ps->exceptions_per_area) { | ||
637 | ps->current_committed = 0; | ||
638 | ps->current_area++; | ||
639 | zero_memory_area(ps); | ||
640 | } | ||
641 | |||
642 | for (i = 0; i < ps->callback_count; i++) { | ||
643 | cb = ps->callbacks + i; | ||
644 | cb->callback(cb->context, ps->valid); | ||
645 | } | ||
646 | |||
647 | ps->callback_count = 0; | ||
648 | } | ||
649 | |||
650 | static void persistent_drop_snapshot(struct dm_exception_store *store) | ||
651 | { | ||
652 | struct pstore *ps = get_info(store); | ||
653 | |||
654 | ps->valid = 0; | ||
655 | if (write_header(ps)) | ||
656 | DMWARN("write header failed"); | ||
657 | } | ||
658 | |||
659 | int dm_create_persistent(struct dm_exception_store *store) | ||
660 | { | ||
661 | struct pstore *ps; | ||
662 | |||
663 | /* allocate the pstore */ | ||
664 | ps = kmalloc(sizeof(*ps), GFP_KERNEL); | ||
665 | if (!ps) | ||
666 | return -ENOMEM; | ||
667 | |||
668 | ps->snap = store->snap; | ||
669 | ps->valid = 1; | ||
670 | ps->version = SNAPSHOT_DISK_VERSION; | ||
671 | ps->area = NULL; | ||
672 | ps->next_free = 2; /* skipping the header and first area */ | ||
673 | ps->current_committed = 0; | ||
674 | |||
675 | ps->callback_count = 0; | ||
676 | atomic_set(&ps->pending_count, 0); | ||
677 | ps->callbacks = NULL; | ||
678 | |||
679 | ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); | ||
680 | if (!ps->metadata_wq) { | ||
681 | kfree(ps); | ||
682 | DMERR("couldn't start header metadata update thread"); | ||
683 | return -ENOMEM; | ||
684 | } | ||
685 | |||
686 | store->destroy = persistent_destroy; | ||
687 | store->read_metadata = persistent_read_metadata; | ||
688 | store->prepare_exception = persistent_prepare_exception; | ||
689 | store->commit_exception = persistent_commit_exception; | ||
690 | store->drop_snapshot = persistent_drop_snapshot; | ||
691 | store->fraction_full = persistent_fraction_full; | ||
692 | store->context = ps; | ||
693 | |||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | int dm_persistent_snapshot_init(void) | ||
698 | { | ||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | void dm_persistent_snapshot_exit(void) | ||
703 | { | ||
704 | } | ||
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c new file mode 100644 index 000000000000..7f6e2e6dcb0d --- /dev/null +++ b/drivers/md/dm-snap-transient.c | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
3 | * Copyright (C) 2006-2008 Red Hat GmbH | ||
4 | * | ||
5 | * This file is released under the GPL. | ||
6 | */ | ||
7 | |||
8 | #include "dm-exception-store.h" | ||
9 | #include "dm-snap.h" | ||
10 | |||
11 | #include <linux/mm.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/vmalloc.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/dm-io.h> | ||
16 | |||
17 | #define DM_MSG_PREFIX "transient snapshot" | ||
18 | |||
19 | /*----------------------------------------------------------------- | ||
20 | * Implementation of the store for non-persistent snapshots. | ||
21 | *---------------------------------------------------------------*/ | ||
22 | struct transient_c { | ||
23 | sector_t next_free; | ||
24 | }; | ||
25 | |||
26 | static void transient_destroy(struct dm_exception_store *store) | ||
27 | { | ||
28 | kfree(store->context); | ||
29 | } | ||
30 | |||
31 | static int transient_read_metadata(struct dm_exception_store *store, | ||
32 | int (*callback)(void *callback_context, | ||
33 | chunk_t old, chunk_t new), | ||
34 | void *callback_context) | ||
35 | { | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | static int transient_prepare_exception(struct dm_exception_store *store, | ||
40 | struct dm_snap_exception *e) | ||
41 | { | ||
42 | struct transient_c *tc = (struct transient_c *) store->context; | ||
43 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
44 | |||
45 | if (size < (tc->next_free + store->snap->chunk_size)) | ||
46 | return -1; | ||
47 | |||
48 | e->new_chunk = sector_to_chunk(store->snap, tc->next_free); | ||
49 | tc->next_free += store->snap->chunk_size; | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | static void transient_commit_exception(struct dm_exception_store *store, | ||
55 | struct dm_snap_exception *e, | ||
56 | void (*callback) (void *, int success), | ||
57 | void *callback_context) | ||
58 | { | ||
59 | /* Just succeed */ | ||
60 | callback(callback_context, 1); | ||
61 | } | ||
62 | |||
63 | static void transient_fraction_full(struct dm_exception_store *store, | ||
64 | sector_t *numerator, sector_t *denominator) | ||
65 | { | ||
66 | *numerator = ((struct transient_c *) store->context)->next_free; | ||
67 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
68 | } | ||
69 | |||
70 | int dm_create_transient(struct dm_exception_store *store) | ||
71 | { | ||
72 | struct transient_c *tc; | ||
73 | |||
74 | store->destroy = transient_destroy; | ||
75 | store->read_metadata = transient_read_metadata; | ||
76 | store->prepare_exception = transient_prepare_exception; | ||
77 | store->commit_exception = transient_commit_exception; | ||
78 | store->drop_snapshot = NULL; | ||
79 | store->fraction_full = transient_fraction_full; | ||
80 | |||
81 | tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); | ||
82 | if (!tc) | ||
83 | return -ENOMEM; | ||
84 | |||
85 | tc->next_free = 0; | ||
86 | store->context = tc; | ||
87 | |||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | int dm_transient_snapshot_init(void) | ||
92 | { | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | void dm_transient_snapshot_exit(void) | ||
97 | { | ||
98 | } | ||
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6c96db26b87c..65ff82ff124e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/blkdev.h> | 9 | #include <linux/blkdev.h> |
10 | #include <linux/ctype.h> | 10 | #include <linux/ctype.h> |
11 | #include <linux/device-mapper.h> | 11 | #include <linux/device-mapper.h> |
12 | #include <linux/delay.h> | ||
12 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
13 | #include <linux/init.h> | 14 | #include <linux/init.h> |
14 | #include <linux/kdev_t.h> | 15 | #include <linux/kdev_t.h> |
@@ -20,6 +21,7 @@ | |||
20 | #include <linux/log2.h> | 21 | #include <linux/log2.h> |
21 | #include <linux/dm-kcopyd.h> | 22 | #include <linux/dm-kcopyd.h> |
22 | 23 | ||
24 | #include "dm-exception-store.h" | ||
23 | #include "dm-snap.h" | 25 | #include "dm-snap.h" |
24 | #include "dm-bio-list.h" | 26 | #include "dm-bio-list.h" |
25 | 27 | ||
@@ -428,8 +430,13 @@ out: | |||
428 | list_add(&new_e->hash_list, e ? &e->hash_list : l); | 430 | list_add(&new_e->hash_list, e ? &e->hash_list : l); |
429 | } | 431 | } |
430 | 432 | ||
431 | int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) | 433 | /* |
434 | * Callback used by the exception stores to load exceptions when | ||
435 | * initialising. | ||
436 | */ | ||
437 | static int dm_add_exception(void *context, chunk_t old, chunk_t new) | ||
432 | { | 438 | { |
439 | struct dm_snapshot *s = context; | ||
433 | struct dm_snap_exception *e; | 440 | struct dm_snap_exception *e; |
434 | 441 | ||
435 | e = alloc_exception(); | 442 | e = alloc_exception(); |
@@ -658,7 +665,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
658 | spin_lock_init(&s->tracked_chunk_lock); | 665 | spin_lock_init(&s->tracked_chunk_lock); |
659 | 666 | ||
660 | /* Metadata must only be loaded into one table at once */ | 667 | /* Metadata must only be loaded into one table at once */ |
661 | r = s->store.read_metadata(&s->store); | 668 | r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s); |
662 | if (r < 0) { | 669 | if (r < 0) { |
663 | ti->error = "Failed to read snapshot metadata"; | 670 | ti->error = "Failed to read snapshot metadata"; |
664 | goto bad_load_and_register; | 671 | goto bad_load_and_register; |
@@ -735,7 +742,7 @@ static void snapshot_dtr(struct dm_target *ti) | |||
735 | unregister_snapshot(s); | 742 | unregister_snapshot(s); |
736 | 743 | ||
737 | while (atomic_read(&s->pending_exceptions_count)) | 744 | while (atomic_read(&s->pending_exceptions_count)) |
738 | yield(); | 745 | msleep(1); |
739 | /* | 746 | /* |
740 | * Ensure instructions in mempool_destroy aren't reordered | 747 | * Ensure instructions in mempool_destroy aren't reordered |
741 | * before atomic_read. | 748 | * before atomic_read. |
@@ -888,10 +895,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) | |||
888 | 895 | ||
889 | /* | 896 | /* |
890 | * Check for conflicting reads. This is extremely improbable, | 897 | * Check for conflicting reads. This is extremely improbable, |
891 | * so yield() is sufficient and there is no need for a wait queue. | 898 | * so msleep(1) is sufficient and there is no need for a wait queue. |
892 | */ | 899 | */ |
893 | while (__chunk_is_tracked(s, pe->e.old_chunk)) | 900 | while (__chunk_is_tracked(s, pe->e.old_chunk)) |
894 | yield(); | 901 | msleep(1); |
895 | 902 | ||
896 | /* | 903 | /* |
897 | * Add a proper exception, and remove the | 904 | * Add a proper exception, and remove the |
@@ -1404,6 +1411,12 @@ static int __init dm_snapshot_init(void) | |||
1404 | { | 1411 | { |
1405 | int r; | 1412 | int r; |
1406 | 1413 | ||
1414 | r = dm_exception_store_init(); | ||
1415 | if (r) { | ||
1416 | DMERR("Failed to initialize exception stores"); | ||
1417 | return r; | ||
1418 | } | ||
1419 | |||
1407 | r = dm_register_target(&snapshot_target); | 1420 | r = dm_register_target(&snapshot_target); |
1408 | if (r) { | 1421 | if (r) { |
1409 | DMERR("snapshot target register failed %d", r); | 1422 | DMERR("snapshot target register failed %d", r); |
@@ -1452,39 +1465,34 @@ static int __init dm_snapshot_init(void) | |||
1452 | 1465 | ||
1453 | return 0; | 1466 | return 0; |
1454 | 1467 | ||
1455 | bad_pending_pool: | 1468 | bad_pending_pool: |
1456 | kmem_cache_destroy(tracked_chunk_cache); | 1469 | kmem_cache_destroy(tracked_chunk_cache); |
1457 | bad5: | 1470 | bad5: |
1458 | kmem_cache_destroy(pending_cache); | 1471 | kmem_cache_destroy(pending_cache); |
1459 | bad4: | 1472 | bad4: |
1460 | kmem_cache_destroy(exception_cache); | 1473 | kmem_cache_destroy(exception_cache); |
1461 | bad3: | 1474 | bad3: |
1462 | exit_origin_hash(); | 1475 | exit_origin_hash(); |
1463 | bad2: | 1476 | bad2: |
1464 | dm_unregister_target(&origin_target); | 1477 | dm_unregister_target(&origin_target); |
1465 | bad1: | 1478 | bad1: |
1466 | dm_unregister_target(&snapshot_target); | 1479 | dm_unregister_target(&snapshot_target); |
1467 | return r; | 1480 | return r; |
1468 | } | 1481 | } |
1469 | 1482 | ||
1470 | static void __exit dm_snapshot_exit(void) | 1483 | static void __exit dm_snapshot_exit(void) |
1471 | { | 1484 | { |
1472 | int r; | ||
1473 | |||
1474 | destroy_workqueue(ksnapd); | 1485 | destroy_workqueue(ksnapd); |
1475 | 1486 | ||
1476 | r = dm_unregister_target(&snapshot_target); | 1487 | dm_unregister_target(&snapshot_target); |
1477 | if (r) | 1488 | dm_unregister_target(&origin_target); |
1478 | DMERR("snapshot unregister failed %d", r); | ||
1479 | |||
1480 | r = dm_unregister_target(&origin_target); | ||
1481 | if (r) | ||
1482 | DMERR("origin unregister failed %d", r); | ||
1483 | 1489 | ||
1484 | exit_origin_hash(); | 1490 | exit_origin_hash(); |
1485 | kmem_cache_destroy(pending_cache); | 1491 | kmem_cache_destroy(pending_cache); |
1486 | kmem_cache_destroy(exception_cache); | 1492 | kmem_cache_destroy(exception_cache); |
1487 | kmem_cache_destroy(tracked_chunk_cache); | 1493 | kmem_cache_destroy(tracked_chunk_cache); |
1494 | |||
1495 | dm_exception_store_exit(); | ||
1488 | } | 1496 | } |
1489 | 1497 | ||
1490 | /* Module hooks */ | 1498 | /* Module hooks */ |
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 99c0106ede2d..d9e62b43cf85 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h | |||
@@ -1,6 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * dm-snapshot.c | ||
3 | * | ||
4 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
5 | * | 3 | * |
6 | * This file is released under the GPL. | 4 | * This file is released under the GPL. |
@@ -10,6 +8,7 @@ | |||
10 | #define DM_SNAPSHOT_H | 8 | #define DM_SNAPSHOT_H |
11 | 9 | ||
12 | #include <linux/device-mapper.h> | 10 | #include <linux/device-mapper.h> |
11 | #include "dm-exception-store.h" | ||
13 | #include "dm-bio-list.h" | 12 | #include "dm-bio-list.h" |
14 | #include <linux/blkdev.h> | 13 | #include <linux/blkdev.h> |
15 | #include <linux/workqueue.h> | 14 | #include <linux/workqueue.h> |
@@ -20,116 +19,6 @@ struct exception_table { | |||
20 | struct list_head *table; | 19 | struct list_head *table; |
21 | }; | 20 | }; |
22 | 21 | ||
23 | /* | ||
24 | * The snapshot code deals with largish chunks of the disk at a | ||
25 | * time. Typically 32k - 512k. | ||
26 | */ | ||
27 | typedef sector_t chunk_t; | ||
28 | |||
29 | /* | ||
30 | * An exception is used where an old chunk of data has been | ||
31 | * replaced by a new one. | ||
32 | * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number | ||
33 | * of chunks that follow contiguously. Remaining bits hold the number of the | ||
34 | * chunk within the device. | ||
35 | */ | ||
36 | struct dm_snap_exception { | ||
37 | struct list_head hash_list; | ||
38 | |||
39 | chunk_t old_chunk; | ||
40 | chunk_t new_chunk; | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * Funtions to manipulate consecutive chunks | ||
45 | */ | ||
46 | # if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) | ||
47 | # define DM_CHUNK_CONSECUTIVE_BITS 8 | ||
48 | # define DM_CHUNK_NUMBER_BITS 56 | ||
49 | |||
50 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
51 | { | ||
52 | return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); | ||
53 | } | ||
54 | |||
55 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
56 | { | ||
57 | return e->new_chunk >> DM_CHUNK_NUMBER_BITS; | ||
58 | } | ||
59 | |||
60 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
61 | { | ||
62 | e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); | ||
63 | |||
64 | BUG_ON(!dm_consecutive_chunk_count(e)); | ||
65 | } | ||
66 | |||
67 | # else | ||
68 | # define DM_CHUNK_CONSECUTIVE_BITS 0 | ||
69 | |||
70 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
71 | { | ||
72 | return chunk; | ||
73 | } | ||
74 | |||
75 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
76 | { | ||
77 | return 0; | ||
78 | } | ||
79 | |||
80 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
81 | { | ||
82 | } | ||
83 | |||
84 | # endif | ||
85 | |||
86 | /* | ||
87 | * Abstraction to handle the meta/layout of exception stores (the | ||
88 | * COW device). | ||
89 | */ | ||
90 | struct exception_store { | ||
91 | |||
92 | /* | ||
93 | * Destroys this object when you've finished with it. | ||
94 | */ | ||
95 | void (*destroy) (struct exception_store *store); | ||
96 | |||
97 | /* | ||
98 | * The target shouldn't read the COW device until this is | ||
99 | * called. | ||
100 | */ | ||
101 | int (*read_metadata) (struct exception_store *store); | ||
102 | |||
103 | /* | ||
104 | * Find somewhere to store the next exception. | ||
105 | */ | ||
106 | int (*prepare_exception) (struct exception_store *store, | ||
107 | struct dm_snap_exception *e); | ||
108 | |||
109 | /* | ||
110 | * Update the metadata with this exception. | ||
111 | */ | ||
112 | void (*commit_exception) (struct exception_store *store, | ||
113 | struct dm_snap_exception *e, | ||
114 | void (*callback) (void *, int success), | ||
115 | void *callback_context); | ||
116 | |||
117 | /* | ||
118 | * The snapshot is invalid, note this in the metadata. | ||
119 | */ | ||
120 | void (*drop_snapshot) (struct exception_store *store); | ||
121 | |||
122 | /* | ||
123 | * Return how full the snapshot is. | ||
124 | */ | ||
125 | void (*fraction_full) (struct exception_store *store, | ||
126 | sector_t *numerator, | ||
127 | sector_t *denominator); | ||
128 | |||
129 | struct dm_snapshot *snap; | ||
130 | void *context; | ||
131 | }; | ||
132 | |||
133 | #define DM_TRACKED_CHUNK_HASH_SIZE 16 | 22 | #define DM_TRACKED_CHUNK_HASH_SIZE 16 |
134 | #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ | 23 | #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ |
135 | (DM_TRACKED_CHUNK_HASH_SIZE - 1)) | 24 | (DM_TRACKED_CHUNK_HASH_SIZE - 1)) |
@@ -172,7 +61,7 @@ struct dm_snapshot { | |||
172 | spinlock_t pe_lock; | 61 | spinlock_t pe_lock; |
173 | 62 | ||
174 | /* The on disk metadata handler */ | 63 | /* The on disk metadata handler */ |
175 | struct exception_store store; | 64 | struct dm_exception_store store; |
176 | 65 | ||
177 | struct dm_kcopyd_client *kcopyd_client; | 66 | struct dm_kcopyd_client *kcopyd_client; |
178 | 67 | ||
@@ -187,20 +76,6 @@ struct dm_snapshot { | |||
187 | }; | 76 | }; |
188 | 77 | ||
189 | /* | 78 | /* |
190 | * Used by the exception stores to load exceptions hen | ||
191 | * initialising. | ||
192 | */ | ||
193 | int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); | ||
194 | |||
195 | /* | ||
196 | * Constructor and destructor for the default persistent | ||
197 | * store. | ||
198 | */ | ||
199 | int dm_create_persistent(struct exception_store *store); | ||
200 | |||
201 | int dm_create_transient(struct exception_store *store); | ||
202 | |||
203 | /* | ||
204 | * Return the number of sectors in the device. | 79 | * Return the number of sectors in the device. |
205 | */ | 80 | */ |
206 | static inline sector_t get_dev_size(struct block_device *bdev) | 81 | static inline sector_t get_dev_size(struct block_device *bdev) |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 9e4ef88d421e..41569bc60abc 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -337,9 +337,7 @@ int __init dm_stripe_init(void) | |||
337 | 337 | ||
338 | void dm_stripe_exit(void) | 338 | void dm_stripe_exit(void) |
339 | { | 339 | { |
340 | if (dm_unregister_target(&stripe_target)) | 340 | dm_unregister_target(&stripe_target); |
341 | DMWARN("target unregistration failed"); | ||
342 | |||
343 | destroy_workqueue(kstriped); | 341 | destroy_workqueue(kstriped); |
344 | 342 | ||
345 | return; | 343 | return; |
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c new file mode 100644 index 000000000000..a2a45e6c7c8b --- /dev/null +++ b/drivers/md/dm-sysfs.c | |||
@@ -0,0 +1,99 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Red Hat, Inc. All rights reserved. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #include <linux/sysfs.h> | ||
8 | #include <linux/dm-ioctl.h> | ||
9 | #include "dm.h" | ||
10 | |||
11 | struct dm_sysfs_attr { | ||
12 | struct attribute attr; | ||
13 | ssize_t (*show)(struct mapped_device *, char *); | ||
14 | ssize_t (*store)(struct mapped_device *, char *); | ||
15 | }; | ||
16 | |||
17 | #define DM_ATTR_RO(_name) \ | ||
18 | struct dm_sysfs_attr dm_attr_##_name = \ | ||
19 | __ATTR(_name, S_IRUGO, dm_attr_##_name##_show, NULL) | ||
20 | |||
21 | static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr, | ||
22 | char *page) | ||
23 | { | ||
24 | struct dm_sysfs_attr *dm_attr; | ||
25 | struct mapped_device *md; | ||
26 | ssize_t ret; | ||
27 | |||
28 | dm_attr = container_of(attr, struct dm_sysfs_attr, attr); | ||
29 | if (!dm_attr->show) | ||
30 | return -EIO; | ||
31 | |||
32 | md = dm_get_from_kobject(kobj); | ||
33 | if (!md) | ||
34 | return -EINVAL; | ||
35 | |||
36 | ret = dm_attr->show(md, page); | ||
37 | dm_put(md); | ||
38 | |||
39 | return ret; | ||
40 | } | ||
41 | |||
42 | static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf) | ||
43 | { | ||
44 | if (dm_copy_name_and_uuid(md, buf, NULL)) | ||
45 | return -EIO; | ||
46 | |||
47 | strcat(buf, "\n"); | ||
48 | return strlen(buf); | ||
49 | } | ||
50 | |||
51 | static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf) | ||
52 | { | ||
53 | if (dm_copy_name_and_uuid(md, NULL, buf)) | ||
54 | return -EIO; | ||
55 | |||
56 | strcat(buf, "\n"); | ||
57 | return strlen(buf); | ||
58 | } | ||
59 | |||
60 | static DM_ATTR_RO(name); | ||
61 | static DM_ATTR_RO(uuid); | ||
62 | |||
63 | static struct attribute *dm_attrs[] = { | ||
64 | &dm_attr_name.attr, | ||
65 | &dm_attr_uuid.attr, | ||
66 | NULL, | ||
67 | }; | ||
68 | |||
69 | static struct sysfs_ops dm_sysfs_ops = { | ||
70 | .show = dm_attr_show, | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * dm kobject is embedded in mapped_device structure | ||
75 | * no need to define release function here | ||
76 | */ | ||
77 | static struct kobj_type dm_ktype = { | ||
78 | .sysfs_ops = &dm_sysfs_ops, | ||
79 | .default_attrs = dm_attrs, | ||
80 | }; | ||
81 | |||
82 | /* | ||
83 | * Initialize kobj | ||
84 | * because nobody using md yet, no need to call explicit dm_get/put | ||
85 | */ | ||
86 | int dm_sysfs_init(struct mapped_device *md) | ||
87 | { | ||
88 | return kobject_init_and_add(dm_kobject(md), &dm_ktype, | ||
89 | &disk_to_dev(dm_disk(md))->kobj, | ||
90 | "%s", "dm"); | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Remove kobj, called after all references removed | ||
95 | */ | ||
96 | void dm_sysfs_exit(struct mapped_device *md) | ||
97 | { | ||
98 | kobject_put(dm_kobject(md)); | ||
99 | } | ||
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 04e5fd742c2c..2fd66c30f7f8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2001 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001 Sistina Software (UK) Limited. |
3 | * Copyright (C) 2004 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
18 | #include <linux/delay.h> | ||
18 | #include <asm/atomic.h> | 19 | #include <asm/atomic.h> |
19 | 20 | ||
20 | #define DM_MSG_PREFIX "table" | 21 | #define DM_MSG_PREFIX "table" |
@@ -24,6 +25,19 @@ | |||
24 | #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) | 25 | #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) |
25 | #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) | 26 | #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) |
26 | 27 | ||
28 | /* | ||
29 | * The table has always exactly one reference from either mapped_device->map | ||
30 | * or hash_cell->new_map. This reference is not counted in table->holders. | ||
31 | * A pair of dm_create_table/dm_destroy_table functions is used for table | ||
32 | * creation/destruction. | ||
33 | * | ||
34 | * Temporary references from the other code increase table->holders. A pair | ||
35 | * of dm_table_get/dm_table_put functions is used to manipulate it. | ||
36 | * | ||
37 | * When the table is about to be destroyed, we wait for table->holders to | ||
38 | * drop to zero. | ||
39 | */ | ||
40 | |||
27 | struct dm_table { | 41 | struct dm_table { |
28 | struct mapped_device *md; | 42 | struct mapped_device *md; |
29 | atomic_t holders; | 43 | atomic_t holders; |
@@ -38,6 +52,8 @@ struct dm_table { | |||
38 | sector_t *highs; | 52 | sector_t *highs; |
39 | struct dm_target *targets; | 53 | struct dm_target *targets; |
40 | 54 | ||
55 | unsigned barriers_supported:1; | ||
56 | |||
41 | /* | 57 | /* |
42 | * Indicates the rw permissions for the new logical | 58 | * Indicates the rw permissions for the new logical |
43 | * device. This should be a combination of FMODE_READ | 59 | * device. This should be a combination of FMODE_READ |
@@ -226,7 +242,8 @@ int dm_table_create(struct dm_table **result, fmode_t mode, | |||
226 | return -ENOMEM; | 242 | return -ENOMEM; |
227 | 243 | ||
228 | INIT_LIST_HEAD(&t->devices); | 244 | INIT_LIST_HEAD(&t->devices); |
229 | atomic_set(&t->holders, 1); | 245 | atomic_set(&t->holders, 0); |
246 | t->barriers_supported = 1; | ||
230 | 247 | ||
231 | if (!num_targets) | 248 | if (!num_targets) |
232 | num_targets = KEYS_PER_NODE; | 249 | num_targets = KEYS_PER_NODE; |
@@ -256,10 +273,14 @@ static void free_devices(struct list_head *devices) | |||
256 | } | 273 | } |
257 | } | 274 | } |
258 | 275 | ||
259 | static void table_destroy(struct dm_table *t) | 276 | void dm_table_destroy(struct dm_table *t) |
260 | { | 277 | { |
261 | unsigned int i; | 278 | unsigned int i; |
262 | 279 | ||
280 | while (atomic_read(&t->holders)) | ||
281 | msleep(1); | ||
282 | smp_mb(); | ||
283 | |||
263 | /* free the indexes (see dm_table_complete) */ | 284 | /* free the indexes (see dm_table_complete) */ |
264 | if (t->depth >= 2) | 285 | if (t->depth >= 2) |
265 | vfree(t->index[t->depth - 2]); | 286 | vfree(t->index[t->depth - 2]); |
@@ -297,8 +318,8 @@ void dm_table_put(struct dm_table *t) | |||
297 | if (!t) | 318 | if (!t) |
298 | return; | 319 | return; |
299 | 320 | ||
300 | if (atomic_dec_and_test(&t->holders)) | 321 | smp_mb__before_atomic_dec(); |
301 | table_destroy(t); | 322 | atomic_dec(&t->holders); |
302 | } | 323 | } |
303 | 324 | ||
304 | /* | 325 | /* |
@@ -728,6 +749,10 @@ int dm_table_add_target(struct dm_table *t, const char *type, | |||
728 | /* FIXME: the plan is to combine high here and then have | 749 | /* FIXME: the plan is to combine high here and then have |
729 | * the merge fn apply the target level restrictions. */ | 750 | * the merge fn apply the target level restrictions. */ |
730 | combine_restrictions_low(&t->limits, &tgt->limits); | 751 | combine_restrictions_low(&t->limits, &tgt->limits); |
752 | |||
753 | if (!(tgt->type->features & DM_TARGET_SUPPORTS_BARRIERS)) | ||
754 | t->barriers_supported = 0; | ||
755 | |||
731 | return 0; | 756 | return 0; |
732 | 757 | ||
733 | bad: | 758 | bad: |
@@ -772,6 +797,12 @@ int dm_table_complete(struct dm_table *t) | |||
772 | 797 | ||
773 | check_for_valid_limits(&t->limits); | 798 | check_for_valid_limits(&t->limits); |
774 | 799 | ||
800 | /* | ||
801 | * We only support barriers if there is exactly one underlying device. | ||
802 | */ | ||
803 | if (!list_is_singular(&t->devices)) | ||
804 | t->barriers_supported = 0; | ||
805 | |||
775 | /* how many indexes will the btree have ? */ | 806 | /* how many indexes will the btree have ? */ |
776 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); | 807 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); |
777 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); | 808 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); |
@@ -986,6 +1017,12 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) | |||
986 | return t->md; | 1017 | return t->md; |
987 | } | 1018 | } |
988 | 1019 | ||
1020 | int dm_table_barrier_ok(struct dm_table *t) | ||
1021 | { | ||
1022 | return t->barriers_supported; | ||
1023 | } | ||
1024 | EXPORT_SYMBOL(dm_table_barrier_ok); | ||
1025 | |||
989 | EXPORT_SYMBOL(dm_vcalloc); | 1026 | EXPORT_SYMBOL(dm_vcalloc); |
990 | EXPORT_SYMBOL(dm_get_device); | 1027 | EXPORT_SYMBOL(dm_get_device); |
991 | EXPORT_SYMBOL(dm_put_device); | 1028 | EXPORT_SYMBOL(dm_put_device); |
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 835cf95b857f..7decf10006e4 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c | |||
@@ -130,26 +130,26 @@ int dm_register_target(struct target_type *t) | |||
130 | return rv; | 130 | return rv; |
131 | } | 131 | } |
132 | 132 | ||
133 | int dm_unregister_target(struct target_type *t) | 133 | void dm_unregister_target(struct target_type *t) |
134 | { | 134 | { |
135 | struct tt_internal *ti; | 135 | struct tt_internal *ti; |
136 | 136 | ||
137 | down_write(&_lock); | 137 | down_write(&_lock); |
138 | if (!(ti = __find_target_type(t->name))) { | 138 | if (!(ti = __find_target_type(t->name))) { |
139 | up_write(&_lock); | 139 | DMCRIT("Unregistering unrecognised target: %s", t->name); |
140 | return -EINVAL; | 140 | BUG(); |
141 | } | 141 | } |
142 | 142 | ||
143 | if (ti->use) { | 143 | if (ti->use) { |
144 | up_write(&_lock); | 144 | DMCRIT("Attempt to unregister target still in use: %s", |
145 | return -ETXTBSY; | 145 | t->name); |
146 | BUG(); | ||
146 | } | 147 | } |
147 | 148 | ||
148 | list_del(&ti->list); | 149 | list_del(&ti->list); |
149 | kfree(ti); | 150 | kfree(ti); |
150 | 151 | ||
151 | up_write(&_lock); | 152 | up_write(&_lock); |
152 | return 0; | ||
153 | } | 153 | } |
154 | 154 | ||
155 | /* | 155 | /* |
@@ -187,8 +187,7 @@ int __init dm_target_init(void) | |||
187 | 187 | ||
188 | void dm_target_exit(void) | 188 | void dm_target_exit(void) |
189 | { | 189 | { |
190 | if (dm_unregister_target(&error_target)) | 190 | dm_unregister_target(&error_target); |
191 | DMWARN("error target unregistration failed"); | ||
192 | } | 191 | } |
193 | 192 | ||
194 | EXPORT_SYMBOL(dm_register_target); | 193 | EXPORT_SYMBOL(dm_register_target); |
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index cdbf126ec106..bbc97030c0c2 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c | |||
@@ -69,10 +69,7 @@ static int __init dm_zero_init(void) | |||
69 | 69 | ||
70 | static void __exit dm_zero_exit(void) | 70 | static void __exit dm_zero_exit(void) |
71 | { | 71 | { |
72 | int r = dm_unregister_target(&zero_target); | 72 | dm_unregister_target(&zero_target); |
73 | |||
74 | if (r < 0) | ||
75 | DMERR("unregister failed %d", r); | ||
76 | } | 73 | } |
77 | 74 | ||
78 | module_init(dm_zero_init) | 75 | module_init(dm_zero_init) |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 421c9f02d8ca..51ba1db4b3e7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
@@ -32,6 +32,7 @@ static unsigned int _major = 0; | |||
32 | 32 | ||
33 | static DEFINE_SPINLOCK(_minor_lock); | 33 | static DEFINE_SPINLOCK(_minor_lock); |
34 | /* | 34 | /* |
35 | * For bio-based dm. | ||
35 | * One of these is allocated per bio. | 36 | * One of these is allocated per bio. |
36 | */ | 37 | */ |
37 | struct dm_io { | 38 | struct dm_io { |
@@ -43,6 +44,7 @@ struct dm_io { | |||
43 | }; | 44 | }; |
44 | 45 | ||
45 | /* | 46 | /* |
47 | * For bio-based dm. | ||
46 | * One of these is allocated per target within a bio. Hopefully | 48 | * One of these is allocated per target within a bio. Hopefully |
47 | * this will be simplified out one day. | 49 | * this will be simplified out one day. |
48 | */ | 50 | */ |
@@ -54,6 +56,27 @@ struct dm_target_io { | |||
54 | 56 | ||
55 | DEFINE_TRACE(block_bio_complete); | 57 | DEFINE_TRACE(block_bio_complete); |
56 | 58 | ||
59 | /* | ||
60 | * For request-based dm. | ||
61 | * One of these is allocated per request. | ||
62 | */ | ||
63 | struct dm_rq_target_io { | ||
64 | struct mapped_device *md; | ||
65 | struct dm_target *ti; | ||
66 | struct request *orig, clone; | ||
67 | int error; | ||
68 | union map_info info; | ||
69 | }; | ||
70 | |||
71 | /* | ||
72 | * For request-based dm. | ||
73 | * One of these is allocated per bio. | ||
74 | */ | ||
75 | struct dm_rq_clone_bio_info { | ||
76 | struct bio *orig; | ||
77 | struct request *rq; | ||
78 | }; | ||
79 | |||
57 | union map_info *dm_get_mapinfo(struct bio *bio) | 80 | union map_info *dm_get_mapinfo(struct bio *bio) |
58 | { | 81 | { |
59 | if (bio && bio->bi_private) | 82 | if (bio && bio->bi_private) |
@@ -144,11 +167,16 @@ struct mapped_device { | |||
144 | 167 | ||
145 | /* forced geometry settings */ | 168 | /* forced geometry settings */ |
146 | struct hd_geometry geometry; | 169 | struct hd_geometry geometry; |
170 | |||
171 | /* sysfs handle */ | ||
172 | struct kobject kobj; | ||
147 | }; | 173 | }; |
148 | 174 | ||
149 | #define MIN_IOS 256 | 175 | #define MIN_IOS 256 |
150 | static struct kmem_cache *_io_cache; | 176 | static struct kmem_cache *_io_cache; |
151 | static struct kmem_cache *_tio_cache; | 177 | static struct kmem_cache *_tio_cache; |
178 | static struct kmem_cache *_rq_tio_cache; | ||
179 | static struct kmem_cache *_rq_bio_info_cache; | ||
152 | 180 | ||
153 | static int __init local_init(void) | 181 | static int __init local_init(void) |
154 | { | 182 | { |
@@ -164,9 +192,17 @@ static int __init local_init(void) | |||
164 | if (!_tio_cache) | 192 | if (!_tio_cache) |
165 | goto out_free_io_cache; | 193 | goto out_free_io_cache; |
166 | 194 | ||
195 | _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); | ||
196 | if (!_rq_tio_cache) | ||
197 | goto out_free_tio_cache; | ||
198 | |||
199 | _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); | ||
200 | if (!_rq_bio_info_cache) | ||
201 | goto out_free_rq_tio_cache; | ||
202 | |||
167 | r = dm_uevent_init(); | 203 | r = dm_uevent_init(); |
168 | if (r) | 204 | if (r) |
169 | goto out_free_tio_cache; | 205 | goto out_free_rq_bio_info_cache; |
170 | 206 | ||
171 | _major = major; | 207 | _major = major; |
172 | r = register_blkdev(_major, _name); | 208 | r = register_blkdev(_major, _name); |
@@ -180,6 +216,10 @@ static int __init local_init(void) | |||
180 | 216 | ||
181 | out_uevent_exit: | 217 | out_uevent_exit: |
182 | dm_uevent_exit(); | 218 | dm_uevent_exit(); |
219 | out_free_rq_bio_info_cache: | ||
220 | kmem_cache_destroy(_rq_bio_info_cache); | ||
221 | out_free_rq_tio_cache: | ||
222 | kmem_cache_destroy(_rq_tio_cache); | ||
183 | out_free_tio_cache: | 223 | out_free_tio_cache: |
184 | kmem_cache_destroy(_tio_cache); | 224 | kmem_cache_destroy(_tio_cache); |
185 | out_free_io_cache: | 225 | out_free_io_cache: |
@@ -190,6 +230,8 @@ out_free_io_cache: | |||
190 | 230 | ||
191 | static void local_exit(void) | 231 | static void local_exit(void) |
192 | { | 232 | { |
233 | kmem_cache_destroy(_rq_bio_info_cache); | ||
234 | kmem_cache_destroy(_rq_tio_cache); | ||
193 | kmem_cache_destroy(_tio_cache); | 235 | kmem_cache_destroy(_tio_cache); |
194 | kmem_cache_destroy(_io_cache); | 236 | kmem_cache_destroy(_io_cache); |
195 | unregister_blkdev(_major, _name); | 237 | unregister_blkdev(_major, _name); |
@@ -796,7 +838,11 @@ static int __split_bio(struct mapped_device *md, struct bio *bio) | |||
796 | ci.map = dm_get_table(md); | 838 | ci.map = dm_get_table(md); |
797 | if (unlikely(!ci.map)) | 839 | if (unlikely(!ci.map)) |
798 | return -EIO; | 840 | return -EIO; |
799 | 841 | if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { | |
842 | dm_table_put(ci.map); | ||
843 | bio_endio(bio, -EOPNOTSUPP); | ||
844 | return 0; | ||
845 | } | ||
800 | ci.md = md; | 846 | ci.md = md; |
801 | ci.bio = bio; | 847 | ci.bio = bio; |
802 | ci.io = alloc_io(md); | 848 | ci.io = alloc_io(md); |
@@ -880,15 +926,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
880 | struct mapped_device *md = q->queuedata; | 926 | struct mapped_device *md = q->queuedata; |
881 | int cpu; | 927 | int cpu; |
882 | 928 | ||
883 | /* | ||
884 | * There is no use in forwarding any barrier request since we can't | ||
885 | * guarantee it is (or can be) handled by the targets correctly. | ||
886 | */ | ||
887 | if (unlikely(bio_barrier(bio))) { | ||
888 | bio_endio(bio, -EOPNOTSUPP); | ||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | down_read(&md->io_lock); | 929 | down_read(&md->io_lock); |
893 | 930 | ||
894 | cpu = part_stat_lock(); | 931 | cpu = part_stat_lock(); |
@@ -943,8 +980,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
943 | struct mapped_device *md = congested_data; | 980 | struct mapped_device *md = congested_data; |
944 | struct dm_table *map; | 981 | struct dm_table *map; |
945 | 982 | ||
946 | atomic_inc(&md->pending); | ||
947 | |||
948 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { | 983 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { |
949 | map = dm_get_table(md); | 984 | map = dm_get_table(md); |
950 | if (map) { | 985 | if (map) { |
@@ -953,10 +988,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
953 | } | 988 | } |
954 | } | 989 | } |
955 | 990 | ||
956 | if (!atomic_dec_return(&md->pending)) | ||
957 | /* nudge anyone waiting on suspend queue */ | ||
958 | wake_up(&md->wait); | ||
959 | |||
960 | return r; | 991 | return r; |
961 | } | 992 | } |
962 | 993 | ||
@@ -1216,10 +1247,12 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
1216 | 1247 | ||
1217 | if (md->suspended_bdev) | 1248 | if (md->suspended_bdev) |
1218 | __set_size(md, size); | 1249 | __set_size(md, size); |
1219 | if (size == 0) | 1250 | |
1251 | if (!size) { | ||
1252 | dm_table_destroy(t); | ||
1220 | return 0; | 1253 | return 0; |
1254 | } | ||
1221 | 1255 | ||
1222 | dm_table_get(t); | ||
1223 | dm_table_event_callback(t, event_callback, md); | 1256 | dm_table_event_callback(t, event_callback, md); |
1224 | 1257 | ||
1225 | write_lock(&md->map_lock); | 1258 | write_lock(&md->map_lock); |
@@ -1241,7 +1274,7 @@ static void __unbind(struct mapped_device *md) | |||
1241 | write_lock(&md->map_lock); | 1274 | write_lock(&md->map_lock); |
1242 | md->map = NULL; | 1275 | md->map = NULL; |
1243 | write_unlock(&md->map_lock); | 1276 | write_unlock(&md->map_lock); |
1244 | dm_table_put(map); | 1277 | dm_table_destroy(map); |
1245 | } | 1278 | } |
1246 | 1279 | ||
1247 | /* | 1280 | /* |
@@ -1255,6 +1288,8 @@ int dm_create(int minor, struct mapped_device **result) | |||
1255 | if (!md) | 1288 | if (!md) |
1256 | return -ENXIO; | 1289 | return -ENXIO; |
1257 | 1290 | ||
1291 | dm_sysfs_init(md); | ||
1292 | |||
1258 | *result = md; | 1293 | *result = md; |
1259 | return 0; | 1294 | return 0; |
1260 | } | 1295 | } |
@@ -1330,8 +1365,9 @@ void dm_put(struct mapped_device *md) | |||
1330 | dm_table_presuspend_targets(map); | 1365 | dm_table_presuspend_targets(map); |
1331 | dm_table_postsuspend_targets(map); | 1366 | dm_table_postsuspend_targets(map); |
1332 | } | 1367 | } |
1333 | __unbind(md); | 1368 | dm_sysfs_exit(md); |
1334 | dm_table_put(map); | 1369 | dm_table_put(map); |
1370 | __unbind(md); | ||
1335 | free_dev(md); | 1371 | free_dev(md); |
1336 | } | 1372 | } |
1337 | } | 1373 | } |
@@ -1669,6 +1705,27 @@ struct gendisk *dm_disk(struct mapped_device *md) | |||
1669 | return md->disk; | 1705 | return md->disk; |
1670 | } | 1706 | } |
1671 | 1707 | ||
1708 | struct kobject *dm_kobject(struct mapped_device *md) | ||
1709 | { | ||
1710 | return &md->kobj; | ||
1711 | } | ||
1712 | |||
1713 | /* | ||
1714 | * struct mapped_device should not be exported outside of dm.c | ||
1715 | * so use this check to verify that kobj is part of md structure | ||
1716 | */ | ||
1717 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | ||
1718 | { | ||
1719 | struct mapped_device *md; | ||
1720 | |||
1721 | md = container_of(kobj, struct mapped_device, kobj); | ||
1722 | if (&md->kobj != kobj) | ||
1723 | return NULL; | ||
1724 | |||
1725 | dm_get(md); | ||
1726 | return md; | ||
1727 | } | ||
1728 | |||
1672 | int dm_suspended(struct mapped_device *md) | 1729 | int dm_suspended(struct mapped_device *md) |
1673 | { | 1730 | { |
1674 | return test_bit(DMF_SUSPENDED, &md->flags); | 1731 | return test_bit(DMF_SUSPENDED, &md->flags); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 0ade60cdef42..20194e000c5a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -36,6 +36,7 @@ struct dm_table; | |||
36 | /*----------------------------------------------------------------- | 36 | /*----------------------------------------------------------------- |
37 | * Internal table functions. | 37 | * Internal table functions. |
38 | *---------------------------------------------------------------*/ | 38 | *---------------------------------------------------------------*/ |
39 | void dm_table_destroy(struct dm_table *t); | ||
39 | void dm_table_event_callback(struct dm_table *t, | 40 | void dm_table_event_callback(struct dm_table *t, |
40 | void (*fn)(void *), void *context); | 41 | void (*fn)(void *), void *context); |
41 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); | 42 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); |
@@ -51,6 +52,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits); | |||
51 | * To check the return value from dm_table_find_target(). | 52 | * To check the return value from dm_table_find_target(). |
52 | */ | 53 | */ |
53 | #define dm_target_is_valid(t) ((t)->table) | 54 | #define dm_target_is_valid(t) ((t)->table) |
55 | int dm_table_barrier_ok(struct dm_table *t); | ||
54 | 56 | ||
55 | /*----------------------------------------------------------------- | 57 | /*----------------------------------------------------------------- |
56 | * A registry of target types. | 58 | * A registry of target types. |
@@ -72,6 +74,14 @@ int dm_interface_init(void); | |||
72 | void dm_interface_exit(void); | 74 | void dm_interface_exit(void); |
73 | 75 | ||
74 | /* | 76 | /* |
77 | * sysfs interface | ||
78 | */ | ||
79 | int dm_sysfs_init(struct mapped_device *md); | ||
80 | void dm_sysfs_exit(struct mapped_device *md); | ||
81 | struct kobject *dm_kobject(struct mapped_device *md); | ||
82 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj); | ||
83 | |||
84 | /* | ||
75 | * Targets for linear and striped mappings | 85 | * Targets for linear and striped mappings |
76 | */ | 86 | */ |
77 | int dm_linear_init(void); | 87 | int dm_linear_init(void); |
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index c17fd334e574..8209e08969f9 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h | |||
@@ -45,6 +45,8 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); | |||
45 | */ | 45 | */ |
46 | typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, | 46 | typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, |
47 | union map_info *map_context); | 47 | union map_info *map_context); |
48 | typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, | ||
49 | union map_info *map_context); | ||
48 | 50 | ||
49 | /* | 51 | /* |
50 | * Returns: | 52 | * Returns: |
@@ -57,6 +59,9 @@ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, | |||
57 | typedef int (*dm_endio_fn) (struct dm_target *ti, | 59 | typedef int (*dm_endio_fn) (struct dm_target *ti, |
58 | struct bio *bio, int error, | 60 | struct bio *bio, int error, |
59 | union map_info *map_context); | 61 | union map_info *map_context); |
62 | typedef int (*dm_request_endio_fn) (struct dm_target *ti, | ||
63 | struct request *clone, int error, | ||
64 | union map_info *map_context); | ||
60 | 65 | ||
61 | typedef void (*dm_flush_fn) (struct dm_target *ti); | 66 | typedef void (*dm_flush_fn) (struct dm_target *ti); |
62 | typedef void (*dm_presuspend_fn) (struct dm_target *ti); | 67 | typedef void (*dm_presuspend_fn) (struct dm_target *ti); |
@@ -75,6 +80,13 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd, | |||
75 | typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, | 80 | typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, |
76 | struct bio_vec *biovec, int max_size); | 81 | struct bio_vec *biovec, int max_size); |
77 | 82 | ||
83 | /* | ||
84 | * Returns: | ||
85 | * 0: The target can handle the next I/O immediately. | ||
86 | * 1: The target can't handle the next I/O immediately. | ||
87 | */ | ||
88 | typedef int (*dm_busy_fn) (struct dm_target *ti); | ||
89 | |||
78 | void dm_error(const char *message); | 90 | void dm_error(const char *message); |
79 | 91 | ||
80 | /* | 92 | /* |
@@ -100,14 +112,23 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d); | |||
100 | /* | 112 | /* |
101 | * Information about a target type | 113 | * Information about a target type |
102 | */ | 114 | */ |
115 | |||
116 | /* | ||
117 | * Target features | ||
118 | */ | ||
119 | #define DM_TARGET_SUPPORTS_BARRIERS 0x00000001 | ||
120 | |||
103 | struct target_type { | 121 | struct target_type { |
122 | uint64_t features; | ||
104 | const char *name; | 123 | const char *name; |
105 | struct module *module; | 124 | struct module *module; |
106 | unsigned version[3]; | 125 | unsigned version[3]; |
107 | dm_ctr_fn ctr; | 126 | dm_ctr_fn ctr; |
108 | dm_dtr_fn dtr; | 127 | dm_dtr_fn dtr; |
109 | dm_map_fn map; | 128 | dm_map_fn map; |
129 | dm_map_request_fn map_rq; | ||
110 | dm_endio_fn end_io; | 130 | dm_endio_fn end_io; |
131 | dm_request_endio_fn rq_end_io; | ||
111 | dm_flush_fn flush; | 132 | dm_flush_fn flush; |
112 | dm_presuspend_fn presuspend; | 133 | dm_presuspend_fn presuspend; |
113 | dm_postsuspend_fn postsuspend; | 134 | dm_postsuspend_fn postsuspend; |
@@ -117,6 +138,7 @@ struct target_type { | |||
117 | dm_message_fn message; | 138 | dm_message_fn message; |
118 | dm_ioctl_fn ioctl; | 139 | dm_ioctl_fn ioctl; |
119 | dm_merge_fn merge; | 140 | dm_merge_fn merge; |
141 | dm_busy_fn busy; | ||
120 | }; | 142 | }; |
121 | 143 | ||
122 | struct io_restrictions { | 144 | struct io_restrictions { |
@@ -157,8 +179,7 @@ struct dm_target { | |||
157 | }; | 179 | }; |
158 | 180 | ||
159 | int dm_register_target(struct target_type *t); | 181 | int dm_register_target(struct target_type *t); |
160 | int dm_unregister_target(struct target_type *t); | 182 | void dm_unregister_target(struct target_type *t); |
161 | |||
162 | 183 | ||
163 | /*----------------------------------------------------------------- | 184 | /*----------------------------------------------------------------- |
164 | * Functions for creating and manipulating mapped devices. | 185 | * Functions for creating and manipulating mapped devices. |
@@ -276,6 +297,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); | |||
276 | *---------------------------------------------------------------*/ | 297 | *---------------------------------------------------------------*/ |
277 | #define DM_NAME "device-mapper" | 298 | #define DM_NAME "device-mapper" |
278 | 299 | ||
300 | #define DMCRIT(f, arg...) \ | ||
301 | printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) | ||
302 | |||
279 | #define DMERR(f, arg...) \ | 303 | #define DMERR(f, arg...) \ |
280 | printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) | 304 | printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) |
281 | #define DMERR_LIMIT(f, arg...) \ | 305 | #define DMERR_LIMIT(f, arg...) \ |