diff options
| -rw-r--r-- | drivers/md/Makefile | 5 | ||||
| -rw-r--r-- | drivers/md/dm-crypt.c | 6 | ||||
| -rw-r--r-- | drivers/md/dm-delay.c | 6 | ||||
| -rw-r--r-- | drivers/md/dm-exception-store.c | 749 | ||||
| -rw-r--r-- | drivers/md/dm-exception-store.h | 148 | ||||
| -rw-r--r-- | drivers/md/dm-ioctl.c | 16 | ||||
| -rw-r--r-- | drivers/md/dm-linear.c | 6 | ||||
| -rw-r--r-- | drivers/md/dm-log.c | 40 | ||||
| -rw-r--r-- | drivers/md/dm-mpath.c | 14 | ||||
| -rw-r--r-- | drivers/md/dm-raid1.c | 24 | ||||
| -rw-r--r-- | drivers/md/dm-snap-persistent.c | 704 | ||||
| -rw-r--r-- | drivers/md/dm-snap-transient.c | 98 | ||||
| -rw-r--r-- | drivers/md/dm-snap.c | 48 | ||||
| -rw-r--r-- | drivers/md/dm-snap.h | 129 | ||||
| -rw-r--r-- | drivers/md/dm-stripe.c | 4 | ||||
| -rw-r--r-- | drivers/md/dm-sysfs.c | 99 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 47 | ||||
| -rw-r--r-- | drivers/md/dm-target.c | 15 | ||||
| -rw-r--r-- | drivers/md/dm-zero.c | 5 | ||||
| -rw-r--r-- | drivers/md/dm.c | 101 | ||||
| -rw-r--r-- | drivers/md/dm.h | 10 | ||||
| -rw-r--r-- | include/linux/device-mapper.h | 28 |
22 files changed, 1319 insertions, 983 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 1c615804ea76..72880b7e28d9 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
| @@ -3,9 +3,10 @@ | |||
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ | 5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ |
| 6 | dm-ioctl.o dm-io.o dm-kcopyd.o | 6 | dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o |
| 7 | dm-multipath-objs := dm-path-selector.o dm-mpath.o | 7 | dm-multipath-objs := dm-path-selector.o dm-mpath.o |
| 8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o | 8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \ |
| 9 | dm-snap-persistent.o | ||
| 9 | dm-mirror-objs := dm-raid1.o | 10 | dm-mirror-objs := dm-raid1.o |
| 10 | md-mod-objs := md.o bitmap.o | 11 | md-mod-objs := md.o bitmap.o |
| 11 | raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ | 12 | raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3326750ec02c..35bda49796fb 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
| @@ -1322,11 +1322,7 @@ static int __init dm_crypt_init(void) | |||
| 1322 | 1322 | ||
| 1323 | static void __exit dm_crypt_exit(void) | 1323 | static void __exit dm_crypt_exit(void) |
| 1324 | { | 1324 | { |
| 1325 | int r = dm_unregister_target(&crypt_target); | 1325 | dm_unregister_target(&crypt_target); |
| 1326 | |||
| 1327 | if (r < 0) | ||
| 1328 | DMERR("unregister failed %d", r); | ||
| 1329 | |||
| 1330 | kmem_cache_destroy(_crypt_io_pool); | 1326 | kmem_cache_destroy(_crypt_io_pool); |
| 1331 | } | 1327 | } |
| 1332 | 1328 | ||
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 848b381f1173..59ee1b015d2d 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
| @@ -364,11 +364,7 @@ bad_queue: | |||
| 364 | 364 | ||
| 365 | static void __exit dm_delay_exit(void) | 365 | static void __exit dm_delay_exit(void) |
| 366 | { | 366 | { |
| 367 | int r = dm_unregister_target(&delay_target); | 367 | dm_unregister_target(&delay_target); |
| 368 | |||
| 369 | if (r < 0) | ||
| 370 | DMERR("unregister failed %d", r); | ||
| 371 | |||
| 372 | kmem_cache_destroy(delayed_cache); | 368 | kmem_cache_destroy(delayed_cache); |
| 373 | destroy_workqueue(kdelayd_wq); | 369 | destroy_workqueue(kdelayd_wq); |
| 374 | } | 370 | } |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 01590f3e0009..dccbfb0e010f 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
| @@ -1,756 +1,45 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * dm-exception-store.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
| 5 | * Copyright (C) 2006 Red Hat GmbH | 3 | * Copyright (C) 2006-2008 Red Hat GmbH |
| 6 | * | 4 | * |
| 7 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
| 8 | */ | 6 | */ |
| 9 | 7 | ||
| 10 | #include "dm-snap.h" | 8 | #include "dm-exception-store.h" |
| 11 | 9 | ||
| 12 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
| 13 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
| 14 | #include <linux/vmalloc.h> | 12 | #include <linux/vmalloc.h> |
| 15 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
| 16 | #include <linux/dm-io.h> | ||
| 17 | #include <linux/dm-kcopyd.h> | ||
| 18 | |||
| 19 | #define DM_MSG_PREFIX "snapshots" | ||
| 20 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | ||
| 21 | |||
| 22 | /*----------------------------------------------------------------- | ||
| 23 | * Persistent snapshots, by persistent we mean that the snapshot | ||
| 24 | * will survive a reboot. | ||
| 25 | *---------------------------------------------------------------*/ | ||
| 26 | |||
| 27 | /* | ||
| 28 | * We need to store a record of which parts of the origin have | ||
| 29 | * been copied to the snapshot device. The snapshot code | ||
| 30 | * requires that we copy exception chunks to chunk aligned areas | ||
| 31 | * of the COW store. It makes sense therefore, to store the | ||
| 32 | * metadata in chunk size blocks. | ||
| 33 | * | ||
| 34 | * There is no backward or forward compatibility implemented, | ||
| 35 | * snapshots with different disk versions than the kernel will | ||
| 36 | * not be usable. It is expected that "lvcreate" will blank out | ||
| 37 | * the start of a fresh COW device before calling the snapshot | ||
| 38 | * constructor. | ||
| 39 | * | ||
| 40 | * The first chunk of the COW device just contains the header. | ||
| 41 | * After this there is a chunk filled with exception metadata, | ||
| 42 | * followed by as many exception chunks as can fit in the | ||
| 43 | * metadata areas. | ||
| 44 | * | ||
| 45 | * All on disk structures are in little-endian format. The end | ||
| 46 | * of the exceptions info is indicated by an exception with a | ||
| 47 | * new_chunk of 0, which is invalid since it would point to the | ||
| 48 | * header chunk. | ||
| 49 | */ | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Magic for persistent snapshots: "SnAp" - Feeble isn't it. | ||
| 53 | */ | ||
| 54 | #define SNAP_MAGIC 0x70416e53 | ||
| 55 | |||
| 56 | /* | ||
| 57 | * The on-disk version of the metadata. | ||
| 58 | */ | ||
| 59 | #define SNAPSHOT_DISK_VERSION 1 | ||
| 60 | |||
| 61 | struct disk_header { | ||
| 62 | uint32_t magic; | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Is this snapshot valid. There is no way of recovering | ||
| 66 | * an invalid snapshot. | ||
| 67 | */ | ||
| 68 | uint32_t valid; | ||
| 69 | |||
| 70 | /* | ||
| 71 | * Simple, incrementing version. no backward | ||
| 72 | * compatibility. | ||
| 73 | */ | ||
| 74 | uint32_t version; | ||
| 75 | |||
| 76 | /* In sectors */ | ||
| 77 | uint32_t chunk_size; | ||
| 78 | }; | ||
| 79 | |||
| 80 | struct disk_exception { | ||
| 81 | uint64_t old_chunk; | ||
| 82 | uint64_t new_chunk; | ||
| 83 | }; | ||
| 84 | |||
| 85 | struct commit_callback { | ||
| 86 | void (*callback)(void *, int success); | ||
| 87 | void *context; | ||
| 88 | }; | ||
| 89 | |||
| 90 | /* | ||
| 91 | * The top level structure for a persistent exception store. | ||
| 92 | */ | ||
| 93 | struct pstore { | ||
| 94 | struct dm_snapshot *snap; /* up pointer to my snapshot */ | ||
| 95 | int version; | ||
| 96 | int valid; | ||
| 97 | uint32_t exceptions_per_area; | ||
| 98 | |||
| 99 | /* | ||
| 100 | * Now that we have an asynchronous kcopyd there is no | ||
| 101 | * need for large chunk sizes, so it wont hurt to have a | ||
| 102 | * whole chunks worth of metadata in memory at once. | ||
| 103 | */ | ||
| 104 | void *area; | ||
| 105 | |||
| 106 | /* | ||
| 107 | * An area of zeros used to clear the next area. | ||
| 108 | */ | ||
| 109 | void *zero_area; | ||
| 110 | |||
| 111 | /* | ||
| 112 | * Used to keep track of which metadata area the data in | ||
| 113 | * 'chunk' refers to. | ||
| 114 | */ | ||
| 115 | chunk_t current_area; | ||
| 116 | |||
| 117 | /* | ||
| 118 | * The next free chunk for an exception. | ||
| 119 | */ | ||
| 120 | chunk_t next_free; | ||
| 121 | |||
| 122 | /* | ||
| 123 | * The index of next free exception in the current | ||
| 124 | * metadata area. | ||
| 125 | */ | ||
| 126 | uint32_t current_committed; | ||
| 127 | |||
| 128 | atomic_t pending_count; | ||
| 129 | uint32_t callback_count; | ||
| 130 | struct commit_callback *callbacks; | ||
| 131 | struct dm_io_client *io_client; | ||
| 132 | |||
| 133 | struct workqueue_struct *metadata_wq; | ||
| 134 | }; | ||
| 135 | |||
| 136 | static unsigned sectors_to_pages(unsigned sectors) | ||
| 137 | { | ||
| 138 | return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); | ||
| 139 | } | ||
| 140 | |||
| 141 | static int alloc_area(struct pstore *ps) | ||
| 142 | { | ||
| 143 | int r = -ENOMEM; | ||
| 144 | size_t len; | ||
| 145 | |||
| 146 | len = ps->snap->chunk_size << SECTOR_SHIFT; | ||
| 147 | |||
| 148 | /* | ||
| 149 | * Allocate the chunk_size block of memory that will hold | ||
| 150 | * a single metadata area. | ||
| 151 | */ | ||
| 152 | ps->area = vmalloc(len); | ||
| 153 | if (!ps->area) | ||
| 154 | return r; | ||
| 155 | |||
| 156 | ps->zero_area = vmalloc(len); | ||
| 157 | if (!ps->zero_area) { | ||
| 158 | vfree(ps->area); | ||
| 159 | return r; | ||
| 160 | } | ||
| 161 | memset(ps->zero_area, 0, len); | ||
| 162 | |||
| 163 | return 0; | ||
| 164 | } | ||
| 165 | |||
| 166 | static void free_area(struct pstore *ps) | ||
| 167 | { | ||
| 168 | vfree(ps->area); | ||
| 169 | ps->area = NULL; | ||
| 170 | vfree(ps->zero_area); | ||
| 171 | ps->zero_area = NULL; | ||
| 172 | } | ||
| 173 | |||
| 174 | struct mdata_req { | ||
| 175 | struct dm_io_region *where; | ||
| 176 | struct dm_io_request *io_req; | ||
| 177 | struct work_struct work; | ||
| 178 | int result; | ||
| 179 | }; | ||
| 180 | |||
| 181 | static void do_metadata(struct work_struct *work) | ||
| 182 | { | ||
| 183 | struct mdata_req *req = container_of(work, struct mdata_req, work); | ||
| 184 | |||
| 185 | req->result = dm_io(req->io_req, 1, req->where, NULL); | ||
| 186 | } | ||
| 187 | |||
| 188 | /* | ||
| 189 | * Read or write a chunk aligned and sized block of data from a device. | ||
| 190 | */ | ||
| 191 | static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) | ||
| 192 | { | ||
| 193 | struct dm_io_region where = { | ||
| 194 | .bdev = ps->snap->cow->bdev, | ||
| 195 | .sector = ps->snap->chunk_size * chunk, | ||
| 196 | .count = ps->snap->chunk_size, | ||
| 197 | }; | ||
| 198 | struct dm_io_request io_req = { | ||
| 199 | .bi_rw = rw, | ||
| 200 | .mem.type = DM_IO_VMA, | ||
| 201 | .mem.ptr.vma = ps->area, | ||
| 202 | .client = ps->io_client, | ||
| 203 | .notify.fn = NULL, | ||
| 204 | }; | ||
| 205 | struct mdata_req req; | ||
| 206 | |||
| 207 | if (!metadata) | ||
| 208 | return dm_io(&io_req, 1, &where, NULL); | ||
| 209 | |||
| 210 | req.where = &where; | ||
| 211 | req.io_req = &io_req; | ||
| 212 | |||
| 213 | /* | ||
| 214 | * Issue the synchronous I/O from a different thread | ||
| 215 | * to avoid generic_make_request recursion. | ||
| 216 | */ | ||
| 217 | INIT_WORK(&req.work, do_metadata); | ||
| 218 | queue_work(ps->metadata_wq, &req.work); | ||
| 219 | flush_workqueue(ps->metadata_wq); | ||
| 220 | |||
| 221 | return req.result; | ||
| 222 | } | ||
| 223 | |||
| 224 | /* | ||
| 225 | * Convert a metadata area index to a chunk index. | ||
| 226 | */ | ||
| 227 | static chunk_t area_location(struct pstore *ps, chunk_t area) | ||
| 228 | { | ||
| 229 | return 1 + ((ps->exceptions_per_area + 1) * area); | ||
| 230 | } | ||
| 231 | |||
| 232 | /* | ||
| 233 | * Read or write a metadata area. Remembering to skip the first | ||
| 234 | * chunk which holds the header. | ||
| 235 | */ | ||
| 236 | static int area_io(struct pstore *ps, int rw) | ||
| 237 | { | ||
| 238 | int r; | ||
| 239 | chunk_t chunk; | ||
| 240 | |||
| 241 | chunk = area_location(ps, ps->current_area); | ||
| 242 | |||
| 243 | r = chunk_io(ps, chunk, rw, 0); | ||
| 244 | if (r) | ||
| 245 | return r; | ||
| 246 | |||
| 247 | return 0; | ||
| 248 | } | ||
| 249 | |||
| 250 | static void zero_memory_area(struct pstore *ps) | ||
| 251 | { | ||
| 252 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
| 253 | } | ||
| 254 | |||
| 255 | static int zero_disk_area(struct pstore *ps, chunk_t area) | ||
| 256 | { | ||
| 257 | struct dm_io_region where = { | ||
| 258 | .bdev = ps->snap->cow->bdev, | ||
| 259 | .sector = ps->snap->chunk_size * area_location(ps, area), | ||
| 260 | .count = ps->snap->chunk_size, | ||
| 261 | }; | ||
| 262 | struct dm_io_request io_req = { | ||
| 263 | .bi_rw = WRITE, | ||
| 264 | .mem.type = DM_IO_VMA, | ||
| 265 | .mem.ptr.vma = ps->zero_area, | ||
| 266 | .client = ps->io_client, | ||
| 267 | .notify.fn = NULL, | ||
| 268 | }; | ||
| 269 | |||
| 270 | return dm_io(&io_req, 1, &where, NULL); | ||
| 271 | } | ||
| 272 | |||
| 273 | static int read_header(struct pstore *ps, int *new_snapshot) | ||
| 274 | { | ||
| 275 | int r; | ||
| 276 | struct disk_header *dh; | ||
| 277 | chunk_t chunk_size; | ||
| 278 | int chunk_size_supplied = 1; | ||
| 279 | |||
| 280 | /* | ||
| 281 | * Use default chunk size (or hardsect_size, if larger) if none supplied | ||
| 282 | */ | ||
| 283 | if (!ps->snap->chunk_size) { | ||
| 284 | ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, | ||
| 285 | bdev_hardsect_size(ps->snap->cow->bdev) >> 9); | ||
| 286 | ps->snap->chunk_mask = ps->snap->chunk_size - 1; | ||
| 287 | ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; | ||
| 288 | chunk_size_supplied = 0; | ||
| 289 | } | ||
| 290 | |||
| 291 | ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> | ||
| 292 | chunk_size)); | ||
| 293 | if (IS_ERR(ps->io_client)) | ||
| 294 | return PTR_ERR(ps->io_client); | ||
| 295 | |||
| 296 | r = alloc_area(ps); | ||
| 297 | if (r) | ||
| 298 | return r; | ||
| 299 | |||
| 300 | r = chunk_io(ps, 0, READ, 1); | ||
| 301 | if (r) | ||
| 302 | goto bad; | ||
| 303 | |||
| 304 | dh = (struct disk_header *) ps->area; | ||
| 305 | |||
| 306 | if (le32_to_cpu(dh->magic) == 0) { | ||
| 307 | *new_snapshot = 1; | ||
| 308 | return 0; | ||
| 309 | } | ||
| 310 | |||
| 311 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { | ||
| 312 | DMWARN("Invalid or corrupt snapshot"); | ||
| 313 | r = -ENXIO; | ||
| 314 | goto bad; | ||
| 315 | } | ||
| 316 | |||
| 317 | *new_snapshot = 0; | ||
| 318 | ps->valid = le32_to_cpu(dh->valid); | ||
| 319 | ps->version = le32_to_cpu(dh->version); | ||
| 320 | chunk_size = le32_to_cpu(dh->chunk_size); | ||
| 321 | |||
| 322 | if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) | ||
| 323 | return 0; | ||
| 324 | |||
| 325 | DMWARN("chunk size %llu in device metadata overrides " | ||
| 326 | "table chunk size of %llu.", | ||
| 327 | (unsigned long long)chunk_size, | ||
| 328 | (unsigned long long)ps->snap->chunk_size); | ||
| 329 | |||
| 330 | /* We had a bogus chunk_size. Fix stuff up. */ | ||
| 331 | free_area(ps); | ||
| 332 | |||
| 333 | ps->snap->chunk_size = chunk_size; | ||
| 334 | ps->snap->chunk_mask = chunk_size - 1; | ||
| 335 | ps->snap->chunk_shift = ffs(chunk_size) - 1; | ||
| 336 | |||
| 337 | r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), | ||
| 338 | ps->io_client); | ||
| 339 | if (r) | ||
| 340 | return r; | ||
| 341 | |||
| 342 | r = alloc_area(ps); | ||
| 343 | return r; | ||
| 344 | |||
| 345 | bad: | ||
| 346 | free_area(ps); | ||
| 347 | return r; | ||
| 348 | } | ||
| 349 | |||
| 350 | static int write_header(struct pstore *ps) | ||
| 351 | { | ||
| 352 | struct disk_header *dh; | ||
| 353 | |||
| 354 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
| 355 | |||
| 356 | dh = (struct disk_header *) ps->area; | ||
| 357 | dh->magic = cpu_to_le32(SNAP_MAGIC); | ||
| 358 | dh->valid = cpu_to_le32(ps->valid); | ||
| 359 | dh->version = cpu_to_le32(ps->version); | ||
| 360 | dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); | ||
| 361 | |||
| 362 | return chunk_io(ps, 0, WRITE, 1); | ||
| 363 | } | ||
| 364 | |||
| 365 | /* | ||
| 366 | * Access functions for the disk exceptions, these do the endian conversions. | ||
| 367 | */ | ||
| 368 | static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | ||
| 369 | { | ||
| 370 | BUG_ON(index >= ps->exceptions_per_area); | ||
| 371 | |||
| 372 | return ((struct disk_exception *) ps->area) + index; | ||
| 373 | } | ||
| 374 | 14 | ||
| 375 | static void read_exception(struct pstore *ps, | 15 | #define DM_MSG_PREFIX "snapshot exception stores" |
| 376 | uint32_t index, struct disk_exception *result) | ||
| 377 | { | ||
| 378 | struct disk_exception *e = get_exception(ps, index); | ||
| 379 | |||
| 380 | /* copy it */ | ||
| 381 | result->old_chunk = le64_to_cpu(e->old_chunk); | ||
| 382 | result->new_chunk = le64_to_cpu(e->new_chunk); | ||
| 383 | } | ||
| 384 | |||
| 385 | static void write_exception(struct pstore *ps, | ||
| 386 | uint32_t index, struct disk_exception *de) | ||
| 387 | { | ||
| 388 | struct disk_exception *e = get_exception(ps, index); | ||
| 389 | |||
| 390 | /* copy it */ | ||
| 391 | e->old_chunk = cpu_to_le64(de->old_chunk); | ||
| 392 | e->new_chunk = cpu_to_le64(de->new_chunk); | ||
| 393 | } | ||
| 394 | 16 | ||
| 395 | /* | 17 | int dm_exception_store_init(void) |
| 396 | * Registers the exceptions that are present in the current area. | ||
| 397 | * 'full' is filled in to indicate if the area has been | ||
| 398 | * filled. | ||
| 399 | */ | ||
| 400 | static int insert_exceptions(struct pstore *ps, int *full) | ||
| 401 | { | 18 | { |
| 402 | int r; | 19 | int r; |
| 403 | unsigned int i; | ||
| 404 | struct disk_exception de; | ||
| 405 | |||
| 406 | /* presume the area is full */ | ||
| 407 | *full = 1; | ||
| 408 | |||
| 409 | for (i = 0; i < ps->exceptions_per_area; i++) { | ||
| 410 | read_exception(ps, i, &de); | ||
| 411 | |||
| 412 | /* | ||
| 413 | * If the new_chunk is pointing at the start of | ||
| 414 | * the COW device, where the first metadata area | ||
| 415 | * is we know that we've hit the end of the | ||
| 416 | * exceptions. Therefore the area is not full. | ||
| 417 | */ | ||
| 418 | if (de.new_chunk == 0LL) { | ||
| 419 | ps->current_committed = i; | ||
| 420 | *full = 0; | ||
| 421 | break; | ||
| 422 | } | ||
| 423 | |||
| 424 | /* | ||
| 425 | * Keep track of the start of the free chunks. | ||
| 426 | */ | ||
| 427 | if (ps->next_free <= de.new_chunk) | ||
| 428 | ps->next_free = de.new_chunk + 1; | ||
| 429 | |||
| 430 | /* | ||
| 431 | * Otherwise we add the exception to the snapshot. | ||
| 432 | */ | ||
| 433 | r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); | ||
| 434 | if (r) | ||
| 435 | return r; | ||
| 436 | } | ||
| 437 | |||
| 438 | return 0; | ||
| 439 | } | ||
| 440 | |||
| 441 | static int read_exceptions(struct pstore *ps) | ||
| 442 | { | ||
| 443 | int r, full = 1; | ||
| 444 | |||
| 445 | /* | ||
| 446 | * Keeping reading chunks and inserting exceptions until | ||
| 447 | * we find a partially full area. | ||
| 448 | */ | ||
| 449 | for (ps->current_area = 0; full; ps->current_area++) { | ||
| 450 | r = area_io(ps, READ); | ||
| 451 | if (r) | ||
| 452 | return r; | ||
| 453 | 20 | ||
| 454 | r = insert_exceptions(ps, &full); | 21 | r = dm_transient_snapshot_init(); |
| 455 | if (r) | 22 | if (r) { |
| 456 | return r; | 23 | DMERR("Unable to register transient exception store type."); |
| 24 | goto transient_fail; | ||
| 457 | } | 25 | } |
| 458 | 26 | ||
| 459 | ps->current_area--; | 27 | r = dm_persistent_snapshot_init(); |
| 460 | 28 | if (r) { | |
| 461 | return 0; | 29 | DMERR("Unable to register persistent exception store type"); |
| 462 | } | 30 | goto persistent_fail; |
| 463 | |||
| 464 | static struct pstore *get_info(struct exception_store *store) | ||
| 465 | { | ||
| 466 | return (struct pstore *) store->context; | ||
| 467 | } | ||
| 468 | |||
| 469 | static void persistent_fraction_full(struct exception_store *store, | ||
| 470 | sector_t *numerator, sector_t *denominator) | ||
| 471 | { | ||
| 472 | *numerator = get_info(store)->next_free * store->snap->chunk_size; | ||
| 473 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
| 474 | } | ||
| 475 | |||
| 476 | static void persistent_destroy(struct exception_store *store) | ||
| 477 | { | ||
| 478 | struct pstore *ps = get_info(store); | ||
| 479 | |||
| 480 | destroy_workqueue(ps->metadata_wq); | ||
| 481 | dm_io_client_destroy(ps->io_client); | ||
| 482 | vfree(ps->callbacks); | ||
| 483 | free_area(ps); | ||
| 484 | kfree(ps); | ||
| 485 | } | ||
| 486 | |||
| 487 | static int persistent_read_metadata(struct exception_store *store) | ||
| 488 | { | ||
| 489 | int r, uninitialized_var(new_snapshot); | ||
| 490 | struct pstore *ps = get_info(store); | ||
| 491 | |||
| 492 | /* | ||
| 493 | * Read the snapshot header. | ||
| 494 | */ | ||
| 495 | r = read_header(ps, &new_snapshot); | ||
| 496 | if (r) | ||
| 497 | return r; | ||
| 498 | |||
| 499 | /* | ||
| 500 | * Now we know correct chunk_size, complete the initialisation. | ||
| 501 | */ | ||
| 502 | ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / | ||
| 503 | sizeof(struct disk_exception); | ||
| 504 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | ||
| 505 | sizeof(*ps->callbacks)); | ||
| 506 | if (!ps->callbacks) | ||
| 507 | return -ENOMEM; | ||
| 508 | |||
| 509 | /* | ||
| 510 | * Do we need to setup a new snapshot ? | ||
| 511 | */ | ||
| 512 | if (new_snapshot) { | ||
| 513 | r = write_header(ps); | ||
| 514 | if (r) { | ||
| 515 | DMWARN("write_header failed"); | ||
| 516 | return r; | ||
| 517 | } | ||
| 518 | |||
| 519 | ps->current_area = 0; | ||
| 520 | zero_memory_area(ps); | ||
| 521 | r = zero_disk_area(ps, 0); | ||
| 522 | if (r) { | ||
| 523 | DMWARN("zero_disk_area(0) failed"); | ||
| 524 | return r; | ||
| 525 | } | ||
| 526 | } else { | ||
| 527 | /* | ||
| 528 | * Sanity checks. | ||
| 529 | */ | ||
| 530 | if (ps->version != SNAPSHOT_DISK_VERSION) { | ||
| 531 | DMWARN("unable to handle snapshot disk version %d", | ||
| 532 | ps->version); | ||
| 533 | return -EINVAL; | ||
| 534 | } | ||
| 535 | |||
| 536 | /* | ||
| 537 | * Metadata are valid, but snapshot is invalidated | ||
| 538 | */ | ||
| 539 | if (!ps->valid) | ||
| 540 | return 1; | ||
| 541 | |||
| 542 | /* | ||
| 543 | * Read the metadata. | ||
| 544 | */ | ||
| 545 | r = read_exceptions(ps); | ||
| 546 | if (r) | ||
| 547 | return r; | ||
| 548 | } | 31 | } |
| 549 | 32 | ||
| 550 | return 0; | 33 | return 0; |
| 551 | } | ||
| 552 | |||
| 553 | static int persistent_prepare(struct exception_store *store, | ||
| 554 | struct dm_snap_exception *e) | ||
| 555 | { | ||
| 556 | struct pstore *ps = get_info(store); | ||
| 557 | uint32_t stride; | ||
| 558 | chunk_t next_free; | ||
| 559 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
| 560 | |||
| 561 | /* Is there enough room ? */ | ||
| 562 | if (size < ((ps->next_free + 1) * store->snap->chunk_size)) | ||
| 563 | return -ENOSPC; | ||
| 564 | 34 | ||
| 565 | e->new_chunk = ps->next_free; | 35 | persistent_fail: |
| 566 | 36 | dm_persistent_snapshot_exit(); | |
| 567 | /* | 37 | transient_fail: |
| 568 | * Move onto the next free pending, making sure to take | 38 | return r; |
| 569 | * into account the location of the metadata chunks. | ||
| 570 | */ | ||
| 571 | stride = (ps->exceptions_per_area + 1); | ||
| 572 | next_free = ++ps->next_free; | ||
| 573 | if (sector_div(next_free, stride) == 1) | ||
| 574 | ps->next_free++; | ||
| 575 | |||
| 576 | atomic_inc(&ps->pending_count); | ||
| 577 | return 0; | ||
| 578 | } | ||
| 579 | |||
| 580 | static void persistent_commit(struct exception_store *store, | ||
| 581 | struct dm_snap_exception *e, | ||
| 582 | void (*callback) (void *, int success), | ||
| 583 | void *callback_context) | ||
| 584 | { | ||
| 585 | unsigned int i; | ||
| 586 | struct pstore *ps = get_info(store); | ||
| 587 | struct disk_exception de; | ||
| 588 | struct commit_callback *cb; | ||
| 589 | |||
| 590 | de.old_chunk = e->old_chunk; | ||
| 591 | de.new_chunk = e->new_chunk; | ||
| 592 | write_exception(ps, ps->current_committed++, &de); | ||
| 593 | |||
| 594 | /* | ||
| 595 | * Add the callback to the back of the array. This code | ||
| 596 | * is the only place where the callback array is | ||
| 597 | * manipulated, and we know that it will never be called | ||
| 598 | * multiple times concurrently. | ||
| 599 | */ | ||
| 600 | cb = ps->callbacks + ps->callback_count++; | ||
| 601 | cb->callback = callback; | ||
| 602 | cb->context = callback_context; | ||
| 603 | |||
| 604 | /* | ||
| 605 | * If there are exceptions in flight and we have not yet | ||
| 606 | * filled this metadata area there's nothing more to do. | ||
| 607 | */ | ||
| 608 | if (!atomic_dec_and_test(&ps->pending_count) && | ||
| 609 | (ps->current_committed != ps->exceptions_per_area)) | ||
| 610 | return; | ||
| 611 | |||
| 612 | /* | ||
| 613 | * If we completely filled the current area, then wipe the next one. | ||
| 614 | */ | ||
| 615 | if ((ps->current_committed == ps->exceptions_per_area) && | ||
| 616 | zero_disk_area(ps, ps->current_area + 1)) | ||
| 617 | ps->valid = 0; | ||
| 618 | |||
| 619 | /* | ||
| 620 | * Commit exceptions to disk. | ||
| 621 | */ | ||
| 622 | if (ps->valid && area_io(ps, WRITE)) | ||
| 623 | ps->valid = 0; | ||
| 624 | |||
| 625 | /* | ||
| 626 | * Advance to the next area if this one is full. | ||
| 627 | */ | ||
| 628 | if (ps->current_committed == ps->exceptions_per_area) { | ||
| 629 | ps->current_committed = 0; | ||
| 630 | ps->current_area++; | ||
| 631 | zero_memory_area(ps); | ||
| 632 | } | ||
| 633 | |||
| 634 | for (i = 0; i < ps->callback_count; i++) { | ||
| 635 | cb = ps->callbacks + i; | ||
| 636 | cb->callback(cb->context, ps->valid); | ||
| 637 | } | ||
| 638 | |||
| 639 | ps->callback_count = 0; | ||
| 640 | } | ||
| 641 | |||
| 642 | static void persistent_drop(struct exception_store *store) | ||
| 643 | { | ||
| 644 | struct pstore *ps = get_info(store); | ||
| 645 | |||
| 646 | ps->valid = 0; | ||
| 647 | if (write_header(ps)) | ||
| 648 | DMWARN("write header failed"); | ||
| 649 | } | ||
| 650 | |||
| 651 | int dm_create_persistent(struct exception_store *store) | ||
| 652 | { | ||
| 653 | struct pstore *ps; | ||
| 654 | |||
| 655 | /* allocate the pstore */ | ||
| 656 | ps = kmalloc(sizeof(*ps), GFP_KERNEL); | ||
| 657 | if (!ps) | ||
| 658 | return -ENOMEM; | ||
| 659 | |||
| 660 | ps->snap = store->snap; | ||
| 661 | ps->valid = 1; | ||
| 662 | ps->version = SNAPSHOT_DISK_VERSION; | ||
| 663 | ps->area = NULL; | ||
| 664 | ps->next_free = 2; /* skipping the header and first area */ | ||
| 665 | ps->current_committed = 0; | ||
| 666 | |||
| 667 | ps->callback_count = 0; | ||
| 668 | atomic_set(&ps->pending_count, 0); | ||
| 669 | ps->callbacks = NULL; | ||
| 670 | |||
| 671 | ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); | ||
| 672 | if (!ps->metadata_wq) { | ||
| 673 | kfree(ps); | ||
| 674 | DMERR("couldn't start header metadata update thread"); | ||
| 675 | return -ENOMEM; | ||
| 676 | } | ||
| 677 | |||
| 678 | store->destroy = persistent_destroy; | ||
| 679 | store->read_metadata = persistent_read_metadata; | ||
| 680 | store->prepare_exception = persistent_prepare; | ||
| 681 | store->commit_exception = persistent_commit; | ||
| 682 | store->drop_snapshot = persistent_drop; | ||
| 683 | store->fraction_full = persistent_fraction_full; | ||
| 684 | store->context = ps; | ||
| 685 | |||
| 686 | return 0; | ||
| 687 | } | ||
| 688 | |||
| 689 | /*----------------------------------------------------------------- | ||
| 690 | * Implementation of the store for non-persistent snapshots. | ||
| 691 | *---------------------------------------------------------------*/ | ||
| 692 | struct transient_c { | ||
| 693 | sector_t next_free; | ||
| 694 | }; | ||
| 695 | |||
| 696 | static void transient_destroy(struct exception_store *store) | ||
| 697 | { | ||
| 698 | kfree(store->context); | ||
| 699 | } | ||
| 700 | |||
| 701 | static int transient_read_metadata(struct exception_store *store) | ||
| 702 | { | ||
| 703 | return 0; | ||
| 704 | } | ||
| 705 | |||
| 706 | static int transient_prepare(struct exception_store *store, | ||
| 707 | struct dm_snap_exception *e) | ||
| 708 | { | ||
| 709 | struct transient_c *tc = (struct transient_c *) store->context; | ||
| 710 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
| 711 | |||
| 712 | if (size < (tc->next_free + store->snap->chunk_size)) | ||
| 713 | return -1; | ||
| 714 | |||
| 715 | e->new_chunk = sector_to_chunk(store->snap, tc->next_free); | ||
| 716 | tc->next_free += store->snap->chunk_size; | ||
| 717 | |||
| 718 | return 0; | ||
| 719 | } | ||
| 720 | |||
| 721 | static void transient_commit(struct exception_store *store, | ||
| 722 | struct dm_snap_exception *e, | ||
| 723 | void (*callback) (void *, int success), | ||
| 724 | void *callback_context) | ||
| 725 | { | ||
| 726 | /* Just succeed */ | ||
| 727 | callback(callback_context, 1); | ||
| 728 | } | ||
| 729 | |||
| 730 | static void transient_fraction_full(struct exception_store *store, | ||
| 731 | sector_t *numerator, sector_t *denominator) | ||
| 732 | { | ||
| 733 | *numerator = ((struct transient_c *) store->context)->next_free; | ||
| 734 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
| 735 | } | 39 | } |
| 736 | 40 | ||
| 737 | int dm_create_transient(struct exception_store *store) | 41 | void dm_exception_store_exit(void) |
| 738 | { | 42 | { |
| 739 | struct transient_c *tc; | 43 | dm_persistent_snapshot_exit(); |
| 740 | 44 | dm_transient_snapshot_exit(); | |
| 741 | store->destroy = transient_destroy; | ||
| 742 | store->read_metadata = transient_read_metadata; | ||
| 743 | store->prepare_exception = transient_prepare; | ||
| 744 | store->commit_exception = transient_commit; | ||
| 745 | store->drop_snapshot = NULL; | ||
| 746 | store->fraction_full = transient_fraction_full; | ||
| 747 | |||
| 748 | tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); | ||
| 749 | if (!tc) | ||
| 750 | return -ENOMEM; | ||
| 751 | |||
| 752 | tc->next_free = 0; | ||
| 753 | store->context = tc; | ||
| 754 | |||
| 755 | return 0; | ||
| 756 | } | 45 | } |
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h new file mode 100644 index 000000000000..bb9f33d5daa2 --- /dev/null +++ b/drivers/md/dm-exception-store.h | |||
| @@ -0,0 +1,148 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
| 3 | * Copyright (C) 2008 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * Device-mapper snapshot exception store. | ||
| 6 | * | ||
| 7 | * This file is released under the GPL. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef _LINUX_DM_EXCEPTION_STORE | ||
| 11 | #define _LINUX_DM_EXCEPTION_STORE | ||
| 12 | |||
| 13 | #include <linux/blkdev.h> | ||
| 14 | #include <linux/device-mapper.h> | ||
| 15 | |||
| 16 | /* | ||
| 17 | * The snapshot code deals with largish chunks of the disk at a | ||
| 18 | * time. Typically 32k - 512k. | ||
| 19 | */ | ||
| 20 | typedef sector_t chunk_t; | ||
| 21 | |||
| 22 | /* | ||
| 23 | * An exception is used where an old chunk of data has been | ||
| 24 | * replaced by a new one. | ||
| 25 | * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number | ||
| 26 | * of chunks that follow contiguously. Remaining bits hold the number of the | ||
| 27 | * chunk within the device. | ||
| 28 | */ | ||
| 29 | struct dm_snap_exception { | ||
| 30 | struct list_head hash_list; | ||
| 31 | |||
| 32 | chunk_t old_chunk; | ||
| 33 | chunk_t new_chunk; | ||
| 34 | }; | ||
| 35 | |||
| 36 | /* | ||
| 37 | * Abstraction to handle the meta/layout of exception stores (the | ||
| 38 | * COW device). | ||
| 39 | */ | ||
| 40 | struct dm_exception_store { | ||
| 41 | /* | ||
| 42 | * Destroys this object when you've finished with it. | ||
| 43 | */ | ||
| 44 | void (*destroy) (struct dm_exception_store *store); | ||
| 45 | |||
| 46 | /* | ||
| 47 | * The target shouldn't read the COW device until this is | ||
| 48 | * called. As exceptions are read from the COW, they are | ||
| 49 | * reported back via the callback. | ||
| 50 | */ | ||
| 51 | int (*read_metadata) (struct dm_exception_store *store, | ||
| 52 | int (*callback)(void *callback_context, | ||
| 53 | chunk_t old, chunk_t new), | ||
| 54 | void *callback_context); | ||
| 55 | |||
| 56 | /* | ||
| 57 | * Find somewhere to store the next exception. | ||
| 58 | */ | ||
| 59 | int (*prepare_exception) (struct dm_exception_store *store, | ||
| 60 | struct dm_snap_exception *e); | ||
| 61 | |||
| 62 | /* | ||
| 63 | * Update the metadata with this exception. | ||
| 64 | */ | ||
| 65 | void (*commit_exception) (struct dm_exception_store *store, | ||
| 66 | struct dm_snap_exception *e, | ||
| 67 | void (*callback) (void *, int success), | ||
| 68 | void *callback_context); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * The snapshot is invalid, note this in the metadata. | ||
| 72 | */ | ||
| 73 | void (*drop_snapshot) (struct dm_exception_store *store); | ||
| 74 | |||
| 75 | int (*status) (struct dm_exception_store *store, status_type_t status, | ||
| 76 | char *result, unsigned int maxlen); | ||
| 77 | |||
| 78 | /* | ||
| 79 | * Return how full the snapshot is. | ||
| 80 | */ | ||
| 81 | void (*fraction_full) (struct dm_exception_store *store, | ||
| 82 | sector_t *numerator, | ||
| 83 | sector_t *denominator); | ||
| 84 | |||
| 85 | struct dm_snapshot *snap; | ||
| 86 | void *context; | ||
| 87 | }; | ||
| 88 | |||
| 89 | /* | ||
| 90 | * Funtions to manipulate consecutive chunks | ||
| 91 | */ | ||
| 92 | # if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) | ||
| 93 | # define DM_CHUNK_CONSECUTIVE_BITS 8 | ||
| 94 | # define DM_CHUNK_NUMBER_BITS 56 | ||
| 95 | |||
| 96 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
| 97 | { | ||
| 98 | return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); | ||
| 99 | } | ||
| 100 | |||
| 101 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
| 102 | { | ||
| 103 | return e->new_chunk >> DM_CHUNK_NUMBER_BITS; | ||
| 104 | } | ||
| 105 | |||
| 106 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
| 107 | { | ||
| 108 | e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); | ||
| 109 | |||
| 110 | BUG_ON(!dm_consecutive_chunk_count(e)); | ||
| 111 | } | ||
| 112 | |||
| 113 | # else | ||
| 114 | # define DM_CHUNK_CONSECUTIVE_BITS 0 | ||
| 115 | |||
| 116 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
| 117 | { | ||
| 118 | return chunk; | ||
| 119 | } | ||
| 120 | |||
| 121 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
| 122 | { | ||
| 123 | return 0; | ||
| 124 | } | ||
| 125 | |||
| 126 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
| 127 | { | ||
| 128 | } | ||
| 129 | |||
| 130 | # endif | ||
| 131 | |||
| 132 | int dm_exception_store_init(void); | ||
| 133 | void dm_exception_store_exit(void); | ||
| 134 | |||
| 135 | /* | ||
| 136 | * Two exception store implementations. | ||
| 137 | */ | ||
| 138 | int dm_persistent_snapshot_init(void); | ||
| 139 | void dm_persistent_snapshot_exit(void); | ||
| 140 | |||
| 141 | int dm_transient_snapshot_init(void); | ||
| 142 | void dm_transient_snapshot_exit(void); | ||
| 143 | |||
| 144 | int dm_create_persistent(struct dm_exception_store *store); | ||
| 145 | |||
| 146 | int dm_create_transient(struct dm_exception_store *store); | ||
| 147 | |||
| 148 | #endif /* _LINUX_DM_EXCEPTION_STORE */ | ||
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 777c948180f9..54d0588fc1f6 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
| @@ -233,7 +233,7 @@ static void __hash_remove(struct hash_cell *hc) | |||
| 233 | } | 233 | } |
| 234 | 234 | ||
| 235 | if (hc->new_map) | 235 | if (hc->new_map) |
| 236 | dm_table_put(hc->new_map); | 236 | dm_table_destroy(hc->new_map); |
| 237 | dm_put(hc->md); | 237 | dm_put(hc->md); |
| 238 | free_cell(hc); | 238 | free_cell(hc); |
| 239 | } | 239 | } |
| @@ -827,8 +827,8 @@ static int do_resume(struct dm_ioctl *param) | |||
| 827 | 827 | ||
| 828 | r = dm_swap_table(md, new_map); | 828 | r = dm_swap_table(md, new_map); |
| 829 | if (r) { | 829 | if (r) { |
| 830 | dm_table_destroy(new_map); | ||
| 830 | dm_put(md); | 831 | dm_put(md); |
| 831 | dm_table_put(new_map); | ||
| 832 | return r; | 832 | return r; |
| 833 | } | 833 | } |
| 834 | 834 | ||
| @@ -836,8 +836,6 @@ static int do_resume(struct dm_ioctl *param) | |||
| 836 | set_disk_ro(dm_disk(md), 0); | 836 | set_disk_ro(dm_disk(md), 0); |
| 837 | else | 837 | else |
| 838 | set_disk_ro(dm_disk(md), 1); | 838 | set_disk_ro(dm_disk(md), 1); |
| 839 | |||
| 840 | dm_table_put(new_map); | ||
| 841 | } | 839 | } |
| 842 | 840 | ||
| 843 | if (dm_suspended(md)) | 841 | if (dm_suspended(md)) |
| @@ -1080,7 +1078,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size) | |||
| 1080 | } | 1078 | } |
| 1081 | 1079 | ||
| 1082 | if (hc->new_map) | 1080 | if (hc->new_map) |
| 1083 | dm_table_put(hc->new_map); | 1081 | dm_table_destroy(hc->new_map); |
| 1084 | hc->new_map = t; | 1082 | hc->new_map = t; |
| 1085 | up_write(&_hash_lock); | 1083 | up_write(&_hash_lock); |
| 1086 | 1084 | ||
| @@ -1109,7 +1107,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) | |||
| 1109 | } | 1107 | } |
| 1110 | 1108 | ||
| 1111 | if (hc->new_map) { | 1109 | if (hc->new_map) { |
| 1112 | dm_table_put(hc->new_map); | 1110 | dm_table_destroy(hc->new_map); |
| 1113 | hc->new_map = NULL; | 1111 | hc->new_map = NULL; |
| 1114 | } | 1112 | } |
| 1115 | 1113 | ||
| @@ -1550,8 +1548,10 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid) | |||
| 1550 | goto out; | 1548 | goto out; |
| 1551 | } | 1549 | } |
| 1552 | 1550 | ||
| 1553 | strcpy(name, hc->name); | 1551 | if (name) |
| 1554 | strcpy(uuid, hc->uuid ? : ""); | 1552 | strcpy(name, hc->name); |
| 1553 | if (uuid) | ||
| 1554 | strcpy(uuid, hc->uuid ? : ""); | ||
| 1555 | 1555 | ||
| 1556 | out: | 1556 | out: |
| 1557 | up_read(&_hash_lock); | 1557 | up_read(&_hash_lock); |
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 44042becad8a..bfa107f59d96 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c | |||
| @@ -142,6 +142,7 @@ static struct target_type linear_target = { | |||
| 142 | .status = linear_status, | 142 | .status = linear_status, |
| 143 | .ioctl = linear_ioctl, | 143 | .ioctl = linear_ioctl, |
| 144 | .merge = linear_merge, | 144 | .merge = linear_merge, |
| 145 | .features = DM_TARGET_SUPPORTS_BARRIERS, | ||
| 145 | }; | 146 | }; |
| 146 | 147 | ||
| 147 | int __init dm_linear_init(void) | 148 | int __init dm_linear_init(void) |
| @@ -156,8 +157,5 @@ int __init dm_linear_init(void) | |||
| 156 | 157 | ||
| 157 | void dm_linear_exit(void) | 158 | void dm_linear_exit(void) |
| 158 | { | 159 | { |
| 159 | int r = dm_unregister_target(&linear_target); | 160 | dm_unregister_target(&linear_target); |
| 160 | |||
| 161 | if (r < 0) | ||
| 162 | DMERR("unregister failed %d", r); | ||
| 163 | } | 161 | } |
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index a8c0fc79ca78..737961f275c1 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
| @@ -326,8 +326,6 @@ static void header_from_disk(struct log_header *core, struct log_header *disk) | |||
| 326 | static int rw_header(struct log_c *lc, int rw) | 326 | static int rw_header(struct log_c *lc, int rw) |
| 327 | { | 327 | { |
| 328 | lc->io_req.bi_rw = rw; | 328 | lc->io_req.bi_rw = rw; |
| 329 | lc->io_req.mem.ptr.vma = lc->disk_header; | ||
| 330 | lc->io_req.notify.fn = NULL; | ||
| 331 | 329 | ||
| 332 | return dm_io(&lc->io_req, 1, &lc->header_location, NULL); | 330 | return dm_io(&lc->io_req, 1, &lc->header_location, NULL); |
| 333 | } | 331 | } |
| @@ -362,10 +360,15 @@ static int read_header(struct log_c *log) | |||
| 362 | return 0; | 360 | return 0; |
| 363 | } | 361 | } |
| 364 | 362 | ||
| 365 | static inline int write_header(struct log_c *log) | 363 | static int _check_region_size(struct dm_target *ti, uint32_t region_size) |
| 366 | { | 364 | { |
| 367 | header_to_disk(&log->header, log->disk_header); | 365 | if (region_size < 2 || region_size > ti->len) |
| 368 | return rw_header(log, WRITE); | 366 | return 0; |
| 367 | |||
| 368 | if (!is_power_of_2(region_size)) | ||
| 369 | return 0; | ||
| 370 | |||
| 371 | return 1; | ||
| 369 | } | 372 | } |
| 370 | 373 | ||
| 371 | /*---------------------------------------------------------------- | 374 | /*---------------------------------------------------------------- |
| @@ -403,8 +406,9 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
| 403 | } | 406 | } |
| 404 | } | 407 | } |
| 405 | 408 | ||
| 406 | if (sscanf(argv[0], "%u", ®ion_size) != 1) { | 409 | if (sscanf(argv[0], "%u", ®ion_size) != 1 || |
| 407 | DMWARN("invalid region size string"); | 410 | !_check_region_size(ti, region_size)) { |
| 411 | DMWARN("invalid region size %s", argv[0]); | ||
| 408 | return -EINVAL; | 412 | return -EINVAL; |
| 409 | } | 413 | } |
| 410 | 414 | ||
| @@ -453,8 +457,18 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
| 453 | */ | 457 | */ |
| 454 | buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + | 458 | buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + |
| 455 | bitset_size, ti->limits.hardsect_size); | 459 | bitset_size, ti->limits.hardsect_size); |
| 460 | |||
| 461 | if (buf_size > dev->bdev->bd_inode->i_size) { | ||
| 462 | DMWARN("log device %s too small: need %llu bytes", | ||
| 463 | dev->name, (unsigned long long)buf_size); | ||
| 464 | kfree(lc); | ||
| 465 | return -EINVAL; | ||
| 466 | } | ||
| 467 | |||
| 456 | lc->header_location.count = buf_size >> SECTOR_SHIFT; | 468 | lc->header_location.count = buf_size >> SECTOR_SHIFT; |
| 469 | |||
| 457 | lc->io_req.mem.type = DM_IO_VMA; | 470 | lc->io_req.mem.type = DM_IO_VMA; |
| 471 | lc->io_req.notify.fn = NULL; | ||
| 458 | lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, | 472 | lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, |
| 459 | PAGE_SIZE)); | 473 | PAGE_SIZE)); |
| 460 | if (IS_ERR(lc->io_req.client)) { | 474 | if (IS_ERR(lc->io_req.client)) { |
| @@ -467,10 +481,12 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
| 467 | lc->disk_header = vmalloc(buf_size); | 481 | lc->disk_header = vmalloc(buf_size); |
| 468 | if (!lc->disk_header) { | 482 | if (!lc->disk_header) { |
| 469 | DMWARN("couldn't allocate disk log buffer"); | 483 | DMWARN("couldn't allocate disk log buffer"); |
| 484 | dm_io_client_destroy(lc->io_req.client); | ||
| 470 | kfree(lc); | 485 | kfree(lc); |
| 471 | return -ENOMEM; | 486 | return -ENOMEM; |
| 472 | } | 487 | } |
| 473 | 488 | ||
| 489 | lc->io_req.mem.ptr.vma = lc->disk_header; | ||
| 474 | lc->clean_bits = (void *)lc->disk_header + | 490 | lc->clean_bits = (void *)lc->disk_header + |
| 475 | (LOG_OFFSET << SECTOR_SHIFT); | 491 | (LOG_OFFSET << SECTOR_SHIFT); |
| 476 | } | 492 | } |
| @@ -482,6 +498,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
| 482 | DMWARN("couldn't allocate sync bitset"); | 498 | DMWARN("couldn't allocate sync bitset"); |
| 483 | if (!dev) | 499 | if (!dev) |
| 484 | vfree(lc->clean_bits); | 500 | vfree(lc->clean_bits); |
| 501 | else | ||
| 502 | dm_io_client_destroy(lc->io_req.client); | ||
| 485 | vfree(lc->disk_header); | 503 | vfree(lc->disk_header); |
| 486 | kfree(lc); | 504 | kfree(lc); |
| 487 | return -ENOMEM; | 505 | return -ENOMEM; |
| @@ -495,6 +513,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
| 495 | vfree(lc->sync_bits); | 513 | vfree(lc->sync_bits); |
| 496 | if (!dev) | 514 | if (!dev) |
| 497 | vfree(lc->clean_bits); | 515 | vfree(lc->clean_bits); |
| 516 | else | ||
| 517 | dm_io_client_destroy(lc->io_req.client); | ||
| 498 | vfree(lc->disk_header); | 518 | vfree(lc->disk_header); |
| 499 | kfree(lc); | 519 | kfree(lc); |
| 500 | return -ENOMEM; | 520 | return -ENOMEM; |
| @@ -631,8 +651,10 @@ static int disk_resume(struct dm_dirty_log *log) | |||
| 631 | /* set the correct number of regions in the header */ | 651 | /* set the correct number of regions in the header */ |
| 632 | lc->header.nr_regions = lc->region_count; | 652 | lc->header.nr_regions = lc->region_count; |
| 633 | 653 | ||
| 654 | header_to_disk(&lc->header, lc->disk_header); | ||
| 655 | |||
| 634 | /* write the new header */ | 656 | /* write the new header */ |
| 635 | r = write_header(lc); | 657 | r = rw_header(lc, WRITE); |
| 636 | if (r) { | 658 | if (r) { |
| 637 | DMWARN("%s: Failed to write header on dirty region log device", | 659 | DMWARN("%s: Failed to write header on dirty region log device", |
| 638 | lc->log_dev->name); | 660 | lc->log_dev->name); |
| @@ -682,7 +704,7 @@ static int disk_flush(struct dm_dirty_log *log) | |||
| 682 | if (!lc->touched) | 704 | if (!lc->touched) |
| 683 | return 0; | 705 | return 0; |
| 684 | 706 | ||
| 685 | r = write_header(lc); | 707 | r = rw_header(lc, WRITE); |
| 686 | if (r) | 708 | if (r) |
| 687 | fail_log_device(lc); | 709 | fail_log_device(lc); |
| 688 | else | 710 | else |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3d7f4923cd13..095f77bf9681 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
| @@ -889,7 +889,7 @@ static int fail_path(struct pgpath *pgpath) | |||
| 889 | dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, | 889 | dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, |
| 890 | pgpath->path.dev->name, m->nr_valid_paths); | 890 | pgpath->path.dev->name, m->nr_valid_paths); |
| 891 | 891 | ||
| 892 | queue_work(kmultipathd, &m->trigger_event); | 892 | schedule_work(&m->trigger_event); |
| 893 | queue_work(kmultipathd, &pgpath->deactivate_path); | 893 | queue_work(kmultipathd, &pgpath->deactivate_path); |
| 894 | 894 | ||
| 895 | out: | 895 | out: |
| @@ -932,7 +932,7 @@ static int reinstate_path(struct pgpath *pgpath) | |||
| 932 | dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, | 932 | dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, |
| 933 | pgpath->path.dev->name, m->nr_valid_paths); | 933 | pgpath->path.dev->name, m->nr_valid_paths); |
| 934 | 934 | ||
| 935 | queue_work(kmultipathd, &m->trigger_event); | 935 | schedule_work(&m->trigger_event); |
| 936 | 936 | ||
| 937 | out: | 937 | out: |
| 938 | spin_unlock_irqrestore(&m->lock, flags); | 938 | spin_unlock_irqrestore(&m->lock, flags); |
| @@ -976,7 +976,7 @@ static void bypass_pg(struct multipath *m, struct priority_group *pg, | |||
| 976 | 976 | ||
| 977 | spin_unlock_irqrestore(&m->lock, flags); | 977 | spin_unlock_irqrestore(&m->lock, flags); |
| 978 | 978 | ||
| 979 | queue_work(kmultipathd, &m->trigger_event); | 979 | schedule_work(&m->trigger_event); |
| 980 | } | 980 | } |
| 981 | 981 | ||
| 982 | /* | 982 | /* |
| @@ -1006,7 +1006,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr) | |||
| 1006 | } | 1006 | } |
| 1007 | spin_unlock_irqrestore(&m->lock, flags); | 1007 | spin_unlock_irqrestore(&m->lock, flags); |
| 1008 | 1008 | ||
| 1009 | queue_work(kmultipathd, &m->trigger_event); | 1009 | schedule_work(&m->trigger_event); |
| 1010 | return 0; | 1010 | return 0; |
| 1011 | } | 1011 | } |
| 1012 | 1012 | ||
| @@ -1495,14 +1495,10 @@ static int __init dm_multipath_init(void) | |||
| 1495 | 1495 | ||
| 1496 | static void __exit dm_multipath_exit(void) | 1496 | static void __exit dm_multipath_exit(void) |
| 1497 | { | 1497 | { |
| 1498 | int r; | ||
| 1499 | |||
| 1500 | destroy_workqueue(kmpath_handlerd); | 1498 | destroy_workqueue(kmpath_handlerd); |
| 1501 | destroy_workqueue(kmultipathd); | 1499 | destroy_workqueue(kmultipathd); |
| 1502 | 1500 | ||
| 1503 | r = dm_unregister_target(&multipath_target); | 1501 | dm_unregister_target(&multipath_target); |
| 1504 | if (r < 0) | ||
| 1505 | DMERR("target unregister failed %d", r); | ||
| 1506 | kmem_cache_destroy(_mpio_cache); | 1502 | kmem_cache_destroy(_mpio_cache); |
| 1507 | } | 1503 | } |
| 1508 | 1504 | ||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ec43f9fa4b2a..4d6bc101962e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
| @@ -197,9 +197,6 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) | |||
| 197 | struct mirror_set *ms = m->ms; | 197 | struct mirror_set *ms = m->ms; |
| 198 | struct mirror *new; | 198 | struct mirror *new; |
| 199 | 199 | ||
| 200 | if (!errors_handled(ms)) | ||
| 201 | return; | ||
| 202 | |||
| 203 | /* | 200 | /* |
| 204 | * error_count is used for nothing more than a | 201 | * error_count is used for nothing more than a |
| 205 | * simple way to tell if a device has encountered | 202 | * simple way to tell if a device has encountered |
| @@ -210,6 +207,9 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) | |||
| 210 | if (test_and_set_bit(error_type, &m->error_type)) | 207 | if (test_and_set_bit(error_type, &m->error_type)) |
| 211 | return; | 208 | return; |
| 212 | 209 | ||
| 210 | if (!errors_handled(ms)) | ||
| 211 | return; | ||
| 212 | |||
| 213 | if (m != get_default_mirror(ms)) | 213 | if (m != get_default_mirror(ms)) |
| 214 | goto out; | 214 | goto out; |
| 215 | 215 | ||
| @@ -808,12 +808,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, | |||
| 808 | kfree(ms); | 808 | kfree(ms); |
| 809 | } | 809 | } |
| 810 | 810 | ||
| 811 | static inline int _check_region_size(struct dm_target *ti, uint32_t size) | ||
| 812 | { | ||
| 813 | return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) || | ||
| 814 | size > ti->len); | ||
| 815 | } | ||
| 816 | |||
| 817 | static int get_mirror(struct mirror_set *ms, struct dm_target *ti, | 811 | static int get_mirror(struct mirror_set *ms, struct dm_target *ti, |
| 818 | unsigned int mirror, char **argv) | 812 | unsigned int mirror, char **argv) |
| 819 | { | 813 | { |
| @@ -872,12 +866,6 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, | |||
| 872 | return NULL; | 866 | return NULL; |
| 873 | } | 867 | } |
| 874 | 868 | ||
| 875 | if (!_check_region_size(ti, dl->type->get_region_size(dl))) { | ||
| 876 | ti->error = "Invalid region size"; | ||
| 877 | dm_dirty_log_destroy(dl); | ||
| 878 | return NULL; | ||
| 879 | } | ||
| 880 | |||
| 881 | return dl; | 869 | return dl; |
| 882 | } | 870 | } |
| 883 | 871 | ||
| @@ -1300,11 +1288,7 @@ static int __init dm_mirror_init(void) | |||
| 1300 | 1288 | ||
| 1301 | static void __exit dm_mirror_exit(void) | 1289 | static void __exit dm_mirror_exit(void) |
| 1302 | { | 1290 | { |
| 1303 | int r; | 1291 | dm_unregister_target(&mirror_target); |
| 1304 | |||
| 1305 | r = dm_unregister_target(&mirror_target); | ||
| 1306 | if (r < 0) | ||
| 1307 | DMERR("unregister failed %d", r); | ||
| 1308 | } | 1292 | } |
| 1309 | 1293 | ||
| 1310 | /* Module hooks */ | 1294 | /* Module hooks */ |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c new file mode 100644 index 000000000000..936b34e0959f --- /dev/null +++ b/drivers/md/dm-snap-persistent.c | |||
| @@ -0,0 +1,704 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
| 3 | * Copyright (C) 2006-2008 Red Hat GmbH | ||
| 4 | * | ||
| 5 | * This file is released under the GPL. | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include "dm-exception-store.h" | ||
| 9 | #include "dm-snap.h" | ||
| 10 | |||
| 11 | #include <linux/mm.h> | ||
| 12 | #include <linux/pagemap.h> | ||
| 13 | #include <linux/vmalloc.h> | ||
| 14 | #include <linux/slab.h> | ||
| 15 | #include <linux/dm-io.h> | ||
| 16 | |||
| 17 | #define DM_MSG_PREFIX "persistent snapshot" | ||
| 18 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | ||
| 19 | |||
| 20 | /*----------------------------------------------------------------- | ||
| 21 | * Persistent snapshots, by persistent we mean that the snapshot | ||
| 22 | * will survive a reboot. | ||
| 23 | *---------------------------------------------------------------*/ | ||
| 24 | |||
| 25 | /* | ||
| 26 | * We need to store a record of which parts of the origin have | ||
| 27 | * been copied to the snapshot device. The snapshot code | ||
| 28 | * requires that we copy exception chunks to chunk aligned areas | ||
| 29 | * of the COW store. It makes sense therefore, to store the | ||
| 30 | * metadata in chunk size blocks. | ||
| 31 | * | ||
| 32 | * There is no backward or forward compatibility implemented, | ||
| 33 | * snapshots with different disk versions than the kernel will | ||
| 34 | * not be usable. It is expected that "lvcreate" will blank out | ||
| 35 | * the start of a fresh COW device before calling the snapshot | ||
| 36 | * constructor. | ||
| 37 | * | ||
| 38 | * The first chunk of the COW device just contains the header. | ||
| 39 | * After this there is a chunk filled with exception metadata, | ||
| 40 | * followed by as many exception chunks as can fit in the | ||
| 41 | * metadata areas. | ||
| 42 | * | ||
| 43 | * All on disk structures are in little-endian format. The end | ||
| 44 | * of the exceptions info is indicated by an exception with a | ||
| 45 | * new_chunk of 0, which is invalid since it would point to the | ||
| 46 | * header chunk. | ||
| 47 | */ | ||
| 48 | |||
| 49 | /* | ||
| 50 | * Magic for persistent snapshots: "SnAp" - Feeble isn't it. | ||
| 51 | */ | ||
| 52 | #define SNAP_MAGIC 0x70416e53 | ||
| 53 | |||
| 54 | /* | ||
| 55 | * The on-disk version of the metadata. | ||
| 56 | */ | ||
| 57 | #define SNAPSHOT_DISK_VERSION 1 | ||
| 58 | |||
| 59 | struct disk_header { | ||
| 60 | uint32_t magic; | ||
| 61 | |||
| 62 | /* | ||
| 63 | * Is this snapshot valid. There is no way of recovering | ||
| 64 | * an invalid snapshot. | ||
| 65 | */ | ||
| 66 | uint32_t valid; | ||
| 67 | |||
| 68 | /* | ||
| 69 | * Simple, incrementing version. no backward | ||
| 70 | * compatibility. | ||
| 71 | */ | ||
| 72 | uint32_t version; | ||
| 73 | |||
| 74 | /* In sectors */ | ||
| 75 | uint32_t chunk_size; | ||
| 76 | }; | ||
| 77 | |||
| 78 | struct disk_exception { | ||
| 79 | uint64_t old_chunk; | ||
| 80 | uint64_t new_chunk; | ||
| 81 | }; | ||
| 82 | |||
| 83 | struct commit_callback { | ||
| 84 | void (*callback)(void *, int success); | ||
| 85 | void *context; | ||
| 86 | }; | ||
| 87 | |||
| 88 | /* | ||
| 89 | * The top level structure for a persistent exception store. | ||
| 90 | */ | ||
| 91 | struct pstore { | ||
| 92 | struct dm_snapshot *snap; /* up pointer to my snapshot */ | ||
| 93 | int version; | ||
| 94 | int valid; | ||
| 95 | uint32_t exceptions_per_area; | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Now that we have an asynchronous kcopyd there is no | ||
| 99 | * need for large chunk sizes, so it wont hurt to have a | ||
| 100 | * whole chunks worth of metadata in memory at once. | ||
| 101 | */ | ||
| 102 | void *area; | ||
| 103 | |||
| 104 | /* | ||
| 105 | * An area of zeros used to clear the next area. | ||
| 106 | */ | ||
| 107 | void *zero_area; | ||
| 108 | |||
| 109 | /* | ||
| 110 | * Used to keep track of which metadata area the data in | ||
| 111 | * 'chunk' refers to. | ||
| 112 | */ | ||
| 113 | chunk_t current_area; | ||
| 114 | |||
| 115 | /* | ||
| 116 | * The next free chunk for an exception. | ||
| 117 | */ | ||
| 118 | chunk_t next_free; | ||
| 119 | |||
| 120 | /* | ||
| 121 | * The index of next free exception in the current | ||
| 122 | * metadata area. | ||
| 123 | */ | ||
| 124 | uint32_t current_committed; | ||
| 125 | |||
| 126 | atomic_t pending_count; | ||
| 127 | uint32_t callback_count; | ||
| 128 | struct commit_callback *callbacks; | ||
| 129 | struct dm_io_client *io_client; | ||
| 130 | |||
| 131 | struct workqueue_struct *metadata_wq; | ||
| 132 | }; | ||
| 133 | |||
| 134 | static unsigned sectors_to_pages(unsigned sectors) | ||
| 135 | { | ||
| 136 | return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); | ||
| 137 | } | ||
| 138 | |||
| 139 | static int alloc_area(struct pstore *ps) | ||
| 140 | { | ||
| 141 | int r = -ENOMEM; | ||
| 142 | size_t len; | ||
| 143 | |||
| 144 | len = ps->snap->chunk_size << SECTOR_SHIFT; | ||
| 145 | |||
| 146 | /* | ||
| 147 | * Allocate the chunk_size block of memory that will hold | ||
| 148 | * a single metadata area. | ||
| 149 | */ | ||
| 150 | ps->area = vmalloc(len); | ||
| 151 | if (!ps->area) | ||
| 152 | return r; | ||
| 153 | |||
| 154 | ps->zero_area = vmalloc(len); | ||
| 155 | if (!ps->zero_area) { | ||
| 156 | vfree(ps->area); | ||
| 157 | return r; | ||
| 158 | } | ||
| 159 | memset(ps->zero_area, 0, len); | ||
| 160 | |||
| 161 | return 0; | ||
| 162 | } | ||
| 163 | |||
| 164 | static void free_area(struct pstore *ps) | ||
| 165 | { | ||
| 166 | vfree(ps->area); | ||
| 167 | ps->area = NULL; | ||
| 168 | vfree(ps->zero_area); | ||
| 169 | ps->zero_area = NULL; | ||
| 170 | } | ||
| 171 | |||
| 172 | struct mdata_req { | ||
| 173 | struct dm_io_region *where; | ||
| 174 | struct dm_io_request *io_req; | ||
| 175 | struct work_struct work; | ||
| 176 | int result; | ||
| 177 | }; | ||
| 178 | |||
| 179 | static void do_metadata(struct work_struct *work) | ||
| 180 | { | ||
| 181 | struct mdata_req *req = container_of(work, struct mdata_req, work); | ||
| 182 | |||
| 183 | req->result = dm_io(req->io_req, 1, req->where, NULL); | ||
| 184 | } | ||
| 185 | |||
| 186 | /* | ||
| 187 | * Read or write a chunk aligned and sized block of data from a device. | ||
| 188 | */ | ||
| 189 | static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) | ||
| 190 | { | ||
| 191 | struct dm_io_region where = { | ||
| 192 | .bdev = ps->snap->cow->bdev, | ||
| 193 | .sector = ps->snap->chunk_size * chunk, | ||
| 194 | .count = ps->snap->chunk_size, | ||
| 195 | }; | ||
| 196 | struct dm_io_request io_req = { | ||
| 197 | .bi_rw = rw, | ||
| 198 | .mem.type = DM_IO_VMA, | ||
| 199 | .mem.ptr.vma = ps->area, | ||
| 200 | .client = ps->io_client, | ||
| 201 | .notify.fn = NULL, | ||
| 202 | }; | ||
| 203 | struct mdata_req req; | ||
| 204 | |||
| 205 | if (!metadata) | ||
| 206 | return dm_io(&io_req, 1, &where, NULL); | ||
| 207 | |||
| 208 | req.where = &where; | ||
| 209 | req.io_req = &io_req; | ||
| 210 | |||
| 211 | /* | ||
| 212 | * Issue the synchronous I/O from a different thread | ||
| 213 | * to avoid generic_make_request recursion. | ||
| 214 | */ | ||
| 215 | INIT_WORK(&req.work, do_metadata); | ||
| 216 | queue_work(ps->metadata_wq, &req.work); | ||
| 217 | flush_workqueue(ps->metadata_wq); | ||
| 218 | |||
| 219 | return req.result; | ||
| 220 | } | ||
| 221 | |||
| 222 | /* | ||
| 223 | * Convert a metadata area index to a chunk index. | ||
| 224 | */ | ||
| 225 | static chunk_t area_location(struct pstore *ps, chunk_t area) | ||
| 226 | { | ||
| 227 | return 1 + ((ps->exceptions_per_area + 1) * area); | ||
| 228 | } | ||
| 229 | |||
| 230 | /* | ||
| 231 | * Read or write a metadata area. Remembering to skip the first | ||
| 232 | * chunk which holds the header. | ||
| 233 | */ | ||
| 234 | static int area_io(struct pstore *ps, int rw) | ||
| 235 | { | ||
| 236 | int r; | ||
| 237 | chunk_t chunk; | ||
| 238 | |||
| 239 | chunk = area_location(ps, ps->current_area); | ||
| 240 | |||
| 241 | r = chunk_io(ps, chunk, rw, 0); | ||
| 242 | if (r) | ||
| 243 | return r; | ||
| 244 | |||
| 245 | return 0; | ||
| 246 | } | ||
| 247 | |||
| 248 | static void zero_memory_area(struct pstore *ps) | ||
| 249 | { | ||
| 250 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
| 251 | } | ||
| 252 | |||
| 253 | static int zero_disk_area(struct pstore *ps, chunk_t area) | ||
| 254 | { | ||
| 255 | struct dm_io_region where = { | ||
| 256 | .bdev = ps->snap->cow->bdev, | ||
| 257 | .sector = ps->snap->chunk_size * area_location(ps, area), | ||
| 258 | .count = ps->snap->chunk_size, | ||
| 259 | }; | ||
| 260 | struct dm_io_request io_req = { | ||
| 261 | .bi_rw = WRITE, | ||
| 262 | .mem.type = DM_IO_VMA, | ||
| 263 | .mem.ptr.vma = ps->zero_area, | ||
| 264 | .client = ps->io_client, | ||
| 265 | .notify.fn = NULL, | ||
| 266 | }; | ||
| 267 | |||
| 268 | return dm_io(&io_req, 1, &where, NULL); | ||
| 269 | } | ||
| 270 | |||
| 271 | static int read_header(struct pstore *ps, int *new_snapshot) | ||
| 272 | { | ||
| 273 | int r; | ||
| 274 | struct disk_header *dh; | ||
| 275 | chunk_t chunk_size; | ||
| 276 | int chunk_size_supplied = 1; | ||
| 277 | |||
| 278 | /* | ||
| 279 | * Use default chunk size (or hardsect_size, if larger) if none supplied | ||
| 280 | */ | ||
| 281 | if (!ps->snap->chunk_size) { | ||
| 282 | ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, | ||
| 283 | bdev_hardsect_size(ps->snap->cow->bdev) >> 9); | ||
| 284 | ps->snap->chunk_mask = ps->snap->chunk_size - 1; | ||
| 285 | ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; | ||
| 286 | chunk_size_supplied = 0; | ||
| 287 | } | ||
| 288 | |||
| 289 | ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> | ||
| 290 | chunk_size)); | ||
| 291 | if (IS_ERR(ps->io_client)) | ||
| 292 | return PTR_ERR(ps->io_client); | ||
| 293 | |||
| 294 | r = alloc_area(ps); | ||
| 295 | if (r) | ||
| 296 | return r; | ||
| 297 | |||
| 298 | r = chunk_io(ps, 0, READ, 1); | ||
| 299 | if (r) | ||
| 300 | goto bad; | ||
| 301 | |||
| 302 | dh = (struct disk_header *) ps->area; | ||
| 303 | |||
| 304 | if (le32_to_cpu(dh->magic) == 0) { | ||
| 305 | *new_snapshot = 1; | ||
| 306 | return 0; | ||
| 307 | } | ||
| 308 | |||
| 309 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { | ||
| 310 | DMWARN("Invalid or corrupt snapshot"); | ||
| 311 | r = -ENXIO; | ||
| 312 | goto bad; | ||
| 313 | } | ||
| 314 | |||
| 315 | *new_snapshot = 0; | ||
| 316 | ps->valid = le32_to_cpu(dh->valid); | ||
| 317 | ps->version = le32_to_cpu(dh->version); | ||
| 318 | chunk_size = le32_to_cpu(dh->chunk_size); | ||
| 319 | |||
| 320 | if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) | ||
| 321 | return 0; | ||
| 322 | |||
| 323 | DMWARN("chunk size %llu in device metadata overrides " | ||
| 324 | "table chunk size of %llu.", | ||
| 325 | (unsigned long long)chunk_size, | ||
| 326 | (unsigned long long)ps->snap->chunk_size); | ||
| 327 | |||
| 328 | /* We had a bogus chunk_size. Fix stuff up. */ | ||
| 329 | free_area(ps); | ||
| 330 | |||
| 331 | ps->snap->chunk_size = chunk_size; | ||
| 332 | ps->snap->chunk_mask = chunk_size - 1; | ||
| 333 | ps->snap->chunk_shift = ffs(chunk_size) - 1; | ||
| 334 | |||
| 335 | r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), | ||
| 336 | ps->io_client); | ||
| 337 | if (r) | ||
| 338 | return r; | ||
| 339 | |||
| 340 | r = alloc_area(ps); | ||
| 341 | return r; | ||
| 342 | |||
| 343 | bad: | ||
| 344 | free_area(ps); | ||
| 345 | return r; | ||
| 346 | } | ||
| 347 | |||
| 348 | static int write_header(struct pstore *ps) | ||
| 349 | { | ||
| 350 | struct disk_header *dh; | ||
| 351 | |||
| 352 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
| 353 | |||
| 354 | dh = (struct disk_header *) ps->area; | ||
| 355 | dh->magic = cpu_to_le32(SNAP_MAGIC); | ||
| 356 | dh->valid = cpu_to_le32(ps->valid); | ||
| 357 | dh->version = cpu_to_le32(ps->version); | ||
| 358 | dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); | ||
| 359 | |||
| 360 | return chunk_io(ps, 0, WRITE, 1); | ||
| 361 | } | ||
| 362 | |||
| 363 | /* | ||
| 364 | * Access functions for the disk exceptions, these do the endian conversions. | ||
| 365 | */ | ||
| 366 | static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | ||
| 367 | { | ||
| 368 | BUG_ON(index >= ps->exceptions_per_area); | ||
| 369 | |||
| 370 | return ((struct disk_exception *) ps->area) + index; | ||
| 371 | } | ||
| 372 | |||
| 373 | static void read_exception(struct pstore *ps, | ||
| 374 | uint32_t index, struct disk_exception *result) | ||
| 375 | { | ||
| 376 | struct disk_exception *e = get_exception(ps, index); | ||
| 377 | |||
| 378 | /* copy it */ | ||
| 379 | result->old_chunk = le64_to_cpu(e->old_chunk); | ||
| 380 | result->new_chunk = le64_to_cpu(e->new_chunk); | ||
| 381 | } | ||
| 382 | |||
| 383 | static void write_exception(struct pstore *ps, | ||
| 384 | uint32_t index, struct disk_exception *de) | ||
| 385 | { | ||
| 386 | struct disk_exception *e = get_exception(ps, index); | ||
| 387 | |||
| 388 | /* copy it */ | ||
| 389 | e->old_chunk = cpu_to_le64(de->old_chunk); | ||
| 390 | e->new_chunk = cpu_to_le64(de->new_chunk); | ||
| 391 | } | ||
| 392 | |||
| 393 | /* | ||
| 394 | * Registers the exceptions that are present in the current area. | ||
| 395 | * 'full' is filled in to indicate if the area has been | ||
| 396 | * filled. | ||
| 397 | */ | ||
| 398 | static int insert_exceptions(struct pstore *ps, | ||
| 399 | int (*callback)(void *callback_context, | ||
| 400 | chunk_t old, chunk_t new), | ||
| 401 | void *callback_context, | ||
| 402 | int *full) | ||
| 403 | { | ||
| 404 | int r; | ||
| 405 | unsigned int i; | ||
| 406 | struct disk_exception de; | ||
| 407 | |||
| 408 | /* presume the area is full */ | ||
| 409 | *full = 1; | ||
| 410 | |||
| 411 | for (i = 0; i < ps->exceptions_per_area; i++) { | ||
| 412 | read_exception(ps, i, &de); | ||
| 413 | |||
| 414 | /* | ||
| 415 | * If the new_chunk is pointing at the start of | ||
| 416 | * the COW device, where the first metadata area | ||
| 417 | * is we know that we've hit the end of the | ||
| 418 | * exceptions. Therefore the area is not full. | ||
| 419 | */ | ||
| 420 | if (de.new_chunk == 0LL) { | ||
| 421 | ps->current_committed = i; | ||
| 422 | *full = 0; | ||
| 423 | break; | ||
| 424 | } | ||
| 425 | |||
| 426 | /* | ||
| 427 | * Keep track of the start of the free chunks. | ||
| 428 | */ | ||
| 429 | if (ps->next_free <= de.new_chunk) | ||
| 430 | ps->next_free = de.new_chunk + 1; | ||
| 431 | |||
| 432 | /* | ||
| 433 | * Otherwise we add the exception to the snapshot. | ||
| 434 | */ | ||
| 435 | r = callback(callback_context, de.old_chunk, de.new_chunk); | ||
| 436 | if (r) | ||
| 437 | return r; | ||
| 438 | } | ||
| 439 | |||
| 440 | return 0; | ||
| 441 | } | ||
| 442 | |||
| 443 | static int read_exceptions(struct pstore *ps, | ||
| 444 | int (*callback)(void *callback_context, chunk_t old, | ||
| 445 | chunk_t new), | ||
| 446 | void *callback_context) | ||
| 447 | { | ||
| 448 | int r, full = 1; | ||
| 449 | |||
| 450 | /* | ||
| 451 | * Keeping reading chunks and inserting exceptions until | ||
| 452 | * we find a partially full area. | ||
| 453 | */ | ||
| 454 | for (ps->current_area = 0; full; ps->current_area++) { | ||
| 455 | r = area_io(ps, READ); | ||
| 456 | if (r) | ||
| 457 | return r; | ||
| 458 | |||
| 459 | r = insert_exceptions(ps, callback, callback_context, &full); | ||
| 460 | if (r) | ||
| 461 | return r; | ||
| 462 | } | ||
| 463 | |||
| 464 | ps->current_area--; | ||
| 465 | |||
| 466 | return 0; | ||
| 467 | } | ||
| 468 | |||
| 469 | static struct pstore *get_info(struct dm_exception_store *store) | ||
| 470 | { | ||
| 471 | return (struct pstore *) store->context; | ||
| 472 | } | ||
| 473 | |||
| 474 | static void persistent_fraction_full(struct dm_exception_store *store, | ||
| 475 | sector_t *numerator, sector_t *denominator) | ||
| 476 | { | ||
| 477 | *numerator = get_info(store)->next_free * store->snap->chunk_size; | ||
| 478 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
| 479 | } | ||
| 480 | |||
| 481 | static void persistent_destroy(struct dm_exception_store *store) | ||
| 482 | { | ||
| 483 | struct pstore *ps = get_info(store); | ||
| 484 | |||
| 485 | destroy_workqueue(ps->metadata_wq); | ||
| 486 | dm_io_client_destroy(ps->io_client); | ||
| 487 | vfree(ps->callbacks); | ||
| 488 | free_area(ps); | ||
| 489 | kfree(ps); | ||
| 490 | } | ||
| 491 | |||
| 492 | static int persistent_read_metadata(struct dm_exception_store *store, | ||
| 493 | int (*callback)(void *callback_context, | ||
| 494 | chunk_t old, chunk_t new), | ||
| 495 | void *callback_context) | ||
| 496 | { | ||
| 497 | int r, uninitialized_var(new_snapshot); | ||
| 498 | struct pstore *ps = get_info(store); | ||
| 499 | |||
| 500 | /* | ||
| 501 | * Read the snapshot header. | ||
| 502 | */ | ||
| 503 | r = read_header(ps, &new_snapshot); | ||
| 504 | if (r) | ||
| 505 | return r; | ||
| 506 | |||
| 507 | /* | ||
| 508 | * Now we know correct chunk_size, complete the initialisation. | ||
| 509 | */ | ||
| 510 | ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / | ||
| 511 | sizeof(struct disk_exception); | ||
| 512 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | ||
| 513 | sizeof(*ps->callbacks)); | ||
| 514 | if (!ps->callbacks) | ||
| 515 | return -ENOMEM; | ||
| 516 | |||
| 517 | /* | ||
| 518 | * Do we need to setup a new snapshot ? | ||
| 519 | */ | ||
| 520 | if (new_snapshot) { | ||
| 521 | r = write_header(ps); | ||
| 522 | if (r) { | ||
| 523 | DMWARN("write_header failed"); | ||
| 524 | return r; | ||
| 525 | } | ||
| 526 | |||
| 527 | ps->current_area = 0; | ||
| 528 | zero_memory_area(ps); | ||
| 529 | r = zero_disk_area(ps, 0); | ||
| 530 | if (r) { | ||
| 531 | DMWARN("zero_disk_area(0) failed"); | ||
| 532 | return r; | ||
| 533 | } | ||
| 534 | } else { | ||
| 535 | /* | ||
| 536 | * Sanity checks. | ||
| 537 | */ | ||
| 538 | if (ps->version != SNAPSHOT_DISK_VERSION) { | ||
| 539 | DMWARN("unable to handle snapshot disk version %d", | ||
| 540 | ps->version); | ||
| 541 | return -EINVAL; | ||
| 542 | } | ||
| 543 | |||
| 544 | /* | ||
| 545 | * Metadata are valid, but snapshot is invalidated | ||
| 546 | */ | ||
| 547 | if (!ps->valid) | ||
| 548 | return 1; | ||
| 549 | |||
| 550 | /* | ||
| 551 | * Read the metadata. | ||
| 552 | */ | ||
| 553 | r = read_exceptions(ps, callback, callback_context); | ||
| 554 | if (r) | ||
| 555 | return r; | ||
| 556 | } | ||
| 557 | |||
| 558 | return 0; | ||
| 559 | } | ||
| 560 | |||
| 561 | static int persistent_prepare_exception(struct dm_exception_store *store, | ||
| 562 | struct dm_snap_exception *e) | ||
| 563 | { | ||
| 564 | struct pstore *ps = get_info(store); | ||
| 565 | uint32_t stride; | ||
| 566 | chunk_t next_free; | ||
| 567 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
| 568 | |||
| 569 | /* Is there enough room ? */ | ||
| 570 | if (size < ((ps->next_free + 1) * store->snap->chunk_size)) | ||
| 571 | return -ENOSPC; | ||
| 572 | |||
| 573 | e->new_chunk = ps->next_free; | ||
| 574 | |||
| 575 | /* | ||
| 576 | * Move onto the next free pending, making sure to take | ||
| 577 | * into account the location of the metadata chunks. | ||
| 578 | */ | ||
| 579 | stride = (ps->exceptions_per_area + 1); | ||
| 580 | next_free = ++ps->next_free; | ||
| 581 | if (sector_div(next_free, stride) == 1) | ||
| 582 | ps->next_free++; | ||
| 583 | |||
| 584 | atomic_inc(&ps->pending_count); | ||
| 585 | return 0; | ||
| 586 | } | ||
| 587 | |||
| 588 | static void persistent_commit_exception(struct dm_exception_store *store, | ||
| 589 | struct dm_snap_exception *e, | ||
| 590 | void (*callback) (void *, int success), | ||
| 591 | void *callback_context) | ||
| 592 | { | ||
| 593 | unsigned int i; | ||
| 594 | struct pstore *ps = get_info(store); | ||
| 595 | struct disk_exception de; | ||
| 596 | struct commit_callback *cb; | ||
| 597 | |||
| 598 | de.old_chunk = e->old_chunk; | ||
| 599 | de.new_chunk = e->new_chunk; | ||
| 600 | write_exception(ps, ps->current_committed++, &de); | ||
| 601 | |||
| 602 | /* | ||
| 603 | * Add the callback to the back of the array. This code | ||
| 604 | * is the only place where the callback array is | ||
| 605 | * manipulated, and we know that it will never be called | ||
| 606 | * multiple times concurrently. | ||
| 607 | */ | ||
| 608 | cb = ps->callbacks + ps->callback_count++; | ||
| 609 | cb->callback = callback; | ||
| 610 | cb->context = callback_context; | ||
| 611 | |||
| 612 | /* | ||
| 613 | * If there are exceptions in flight and we have not yet | ||
| 614 | * filled this metadata area there's nothing more to do. | ||
| 615 | */ | ||
| 616 | if (!atomic_dec_and_test(&ps->pending_count) && | ||
| 617 | (ps->current_committed != ps->exceptions_per_area)) | ||
| 618 | return; | ||
| 619 | |||
| 620 | /* | ||
| 621 | * If we completely filled the current area, then wipe the next one. | ||
| 622 | */ | ||
| 623 | if ((ps->current_committed == ps->exceptions_per_area) && | ||
| 624 | zero_disk_area(ps, ps->current_area + 1)) | ||
| 625 | ps->valid = 0; | ||
| 626 | |||
| 627 | /* | ||
| 628 | * Commit exceptions to disk. | ||
| 629 | */ | ||
| 630 | if (ps->valid && area_io(ps, WRITE)) | ||
| 631 | ps->valid = 0; | ||
| 632 | |||
| 633 | /* | ||
| 634 | * Advance to the next area if this one is full. | ||
| 635 | */ | ||
| 636 | if (ps->current_committed == ps->exceptions_per_area) { | ||
| 637 | ps->current_committed = 0; | ||
| 638 | ps->current_area++; | ||
| 639 | zero_memory_area(ps); | ||
| 640 | } | ||
| 641 | |||
| 642 | for (i = 0; i < ps->callback_count; i++) { | ||
| 643 | cb = ps->callbacks + i; | ||
| 644 | cb->callback(cb->context, ps->valid); | ||
| 645 | } | ||
| 646 | |||
| 647 | ps->callback_count = 0; | ||
| 648 | } | ||
| 649 | |||
| 650 | static void persistent_drop_snapshot(struct dm_exception_store *store) | ||
| 651 | { | ||
| 652 | struct pstore *ps = get_info(store); | ||
| 653 | |||
| 654 | ps->valid = 0; | ||
| 655 | if (write_header(ps)) | ||
| 656 | DMWARN("write header failed"); | ||
| 657 | } | ||
| 658 | |||
| 659 | int dm_create_persistent(struct dm_exception_store *store) | ||
| 660 | { | ||
| 661 | struct pstore *ps; | ||
| 662 | |||
| 663 | /* allocate the pstore */ | ||
| 664 | ps = kmalloc(sizeof(*ps), GFP_KERNEL); | ||
| 665 | if (!ps) | ||
| 666 | return -ENOMEM; | ||
| 667 | |||
| 668 | ps->snap = store->snap; | ||
| 669 | ps->valid = 1; | ||
| 670 | ps->version = SNAPSHOT_DISK_VERSION; | ||
| 671 | ps->area = NULL; | ||
| 672 | ps->next_free = 2; /* skipping the header and first area */ | ||
| 673 | ps->current_committed = 0; | ||
| 674 | |||
| 675 | ps->callback_count = 0; | ||
| 676 | atomic_set(&ps->pending_count, 0); | ||
| 677 | ps->callbacks = NULL; | ||
| 678 | |||
| 679 | ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); | ||
| 680 | if (!ps->metadata_wq) { | ||
| 681 | kfree(ps); | ||
| 682 | DMERR("couldn't start header metadata update thread"); | ||
| 683 | return -ENOMEM; | ||
| 684 | } | ||
| 685 | |||
| 686 | store->destroy = persistent_destroy; | ||
| 687 | store->read_metadata = persistent_read_metadata; | ||
| 688 | store->prepare_exception = persistent_prepare_exception; | ||
| 689 | store->commit_exception = persistent_commit_exception; | ||
| 690 | store->drop_snapshot = persistent_drop_snapshot; | ||
| 691 | store->fraction_full = persistent_fraction_full; | ||
| 692 | store->context = ps; | ||
| 693 | |||
| 694 | return 0; | ||
| 695 | } | ||
| 696 | |||
| 697 | int dm_persistent_snapshot_init(void) | ||
| 698 | { | ||
| 699 | return 0; | ||
| 700 | } | ||
| 701 | |||
| 702 | void dm_persistent_snapshot_exit(void) | ||
| 703 | { | ||
| 704 | } | ||
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c new file mode 100644 index 000000000000..7f6e2e6dcb0d --- /dev/null +++ b/drivers/md/dm-snap-transient.c | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
| 3 | * Copyright (C) 2006-2008 Red Hat GmbH | ||
| 4 | * | ||
| 5 | * This file is released under the GPL. | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include "dm-exception-store.h" | ||
| 9 | #include "dm-snap.h" | ||
| 10 | |||
| 11 | #include <linux/mm.h> | ||
| 12 | #include <linux/pagemap.h> | ||
| 13 | #include <linux/vmalloc.h> | ||
| 14 | #include <linux/slab.h> | ||
| 15 | #include <linux/dm-io.h> | ||
| 16 | |||
| 17 | #define DM_MSG_PREFIX "transient snapshot" | ||
| 18 | |||
| 19 | /*----------------------------------------------------------------- | ||
| 20 | * Implementation of the store for non-persistent snapshots. | ||
| 21 | *---------------------------------------------------------------*/ | ||
| 22 | struct transient_c { | ||
| 23 | sector_t next_free; | ||
| 24 | }; | ||
| 25 | |||
| 26 | static void transient_destroy(struct dm_exception_store *store) | ||
| 27 | { | ||
| 28 | kfree(store->context); | ||
| 29 | } | ||
| 30 | |||
| 31 | static int transient_read_metadata(struct dm_exception_store *store, | ||
| 32 | int (*callback)(void *callback_context, | ||
| 33 | chunk_t old, chunk_t new), | ||
| 34 | void *callback_context) | ||
| 35 | { | ||
| 36 | return 0; | ||
| 37 | } | ||
| 38 | |||
| 39 | static int transient_prepare_exception(struct dm_exception_store *store, | ||
| 40 | struct dm_snap_exception *e) | ||
| 41 | { | ||
| 42 | struct transient_c *tc = (struct transient_c *) store->context; | ||
| 43 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
| 44 | |||
| 45 | if (size < (tc->next_free + store->snap->chunk_size)) | ||
| 46 | return -1; | ||
| 47 | |||
| 48 | e->new_chunk = sector_to_chunk(store->snap, tc->next_free); | ||
| 49 | tc->next_free += store->snap->chunk_size; | ||
| 50 | |||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | |||
| 54 | static void transient_commit_exception(struct dm_exception_store *store, | ||
| 55 | struct dm_snap_exception *e, | ||
| 56 | void (*callback) (void *, int success), | ||
| 57 | void *callback_context) | ||
| 58 | { | ||
| 59 | /* Just succeed */ | ||
| 60 | callback(callback_context, 1); | ||
| 61 | } | ||
| 62 | |||
| 63 | static void transient_fraction_full(struct dm_exception_store *store, | ||
| 64 | sector_t *numerator, sector_t *denominator) | ||
| 65 | { | ||
| 66 | *numerator = ((struct transient_c *) store->context)->next_free; | ||
| 67 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
| 68 | } | ||
| 69 | |||
| 70 | int dm_create_transient(struct dm_exception_store *store) | ||
| 71 | { | ||
| 72 | struct transient_c *tc; | ||
| 73 | |||
| 74 | store->destroy = transient_destroy; | ||
| 75 | store->read_metadata = transient_read_metadata; | ||
| 76 | store->prepare_exception = transient_prepare_exception; | ||
| 77 | store->commit_exception = transient_commit_exception; | ||
| 78 | store->drop_snapshot = NULL; | ||
| 79 | store->fraction_full = transient_fraction_full; | ||
| 80 | |||
| 81 | tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); | ||
| 82 | if (!tc) | ||
| 83 | return -ENOMEM; | ||
| 84 | |||
| 85 | tc->next_free = 0; | ||
| 86 | store->context = tc; | ||
| 87 | |||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | |||
| 91 | int dm_transient_snapshot_init(void) | ||
| 92 | { | ||
| 93 | return 0; | ||
| 94 | } | ||
| 95 | |||
| 96 | void dm_transient_snapshot_exit(void) | ||
| 97 | { | ||
| 98 | } | ||
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6c96db26b87c..65ff82ff124e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <linux/blkdev.h> | 9 | #include <linux/blkdev.h> |
| 10 | #include <linux/ctype.h> | 10 | #include <linux/ctype.h> |
| 11 | #include <linux/device-mapper.h> | 11 | #include <linux/device-mapper.h> |
| 12 | #include <linux/delay.h> | ||
| 12 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
| 13 | #include <linux/init.h> | 14 | #include <linux/init.h> |
| 14 | #include <linux/kdev_t.h> | 15 | #include <linux/kdev_t.h> |
| @@ -20,6 +21,7 @@ | |||
| 20 | #include <linux/log2.h> | 21 | #include <linux/log2.h> |
| 21 | #include <linux/dm-kcopyd.h> | 22 | #include <linux/dm-kcopyd.h> |
| 22 | 23 | ||
| 24 | #include "dm-exception-store.h" | ||
| 23 | #include "dm-snap.h" | 25 | #include "dm-snap.h" |
| 24 | #include "dm-bio-list.h" | 26 | #include "dm-bio-list.h" |
| 25 | 27 | ||
| @@ -428,8 +430,13 @@ out: | |||
| 428 | list_add(&new_e->hash_list, e ? &e->hash_list : l); | 430 | list_add(&new_e->hash_list, e ? &e->hash_list : l); |
| 429 | } | 431 | } |
| 430 | 432 | ||
| 431 | int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) | 433 | /* |
| 434 | * Callback used by the exception stores to load exceptions when | ||
| 435 | * initialising. | ||
| 436 | */ | ||
| 437 | static int dm_add_exception(void *context, chunk_t old, chunk_t new) | ||
| 432 | { | 438 | { |
| 439 | struct dm_snapshot *s = context; | ||
| 433 | struct dm_snap_exception *e; | 440 | struct dm_snap_exception *e; |
| 434 | 441 | ||
| 435 | e = alloc_exception(); | 442 | e = alloc_exception(); |
| @@ -658,7 +665,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
| 658 | spin_lock_init(&s->tracked_chunk_lock); | 665 | spin_lock_init(&s->tracked_chunk_lock); |
| 659 | 666 | ||
| 660 | /* Metadata must only be loaded into one table at once */ | 667 | /* Metadata must only be loaded into one table at once */ |
| 661 | r = s->store.read_metadata(&s->store); | 668 | r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s); |
| 662 | if (r < 0) { | 669 | if (r < 0) { |
| 663 | ti->error = "Failed to read snapshot metadata"; | 670 | ti->error = "Failed to read snapshot metadata"; |
| 664 | goto bad_load_and_register; | 671 | goto bad_load_and_register; |
| @@ -735,7 +742,7 @@ static void snapshot_dtr(struct dm_target *ti) | |||
| 735 | unregister_snapshot(s); | 742 | unregister_snapshot(s); |
| 736 | 743 | ||
| 737 | while (atomic_read(&s->pending_exceptions_count)) | 744 | while (atomic_read(&s->pending_exceptions_count)) |
| 738 | yield(); | 745 | msleep(1); |
| 739 | /* | 746 | /* |
| 740 | * Ensure instructions in mempool_destroy aren't reordered | 747 | * Ensure instructions in mempool_destroy aren't reordered |
| 741 | * before atomic_read. | 748 | * before atomic_read. |
| @@ -888,10 +895,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) | |||
| 888 | 895 | ||
| 889 | /* | 896 | /* |
| 890 | * Check for conflicting reads. This is extremely improbable, | 897 | * Check for conflicting reads. This is extremely improbable, |
| 891 | * so yield() is sufficient and there is no need for a wait queue. | 898 | * so msleep(1) is sufficient and there is no need for a wait queue. |
| 892 | */ | 899 | */ |
| 893 | while (__chunk_is_tracked(s, pe->e.old_chunk)) | 900 | while (__chunk_is_tracked(s, pe->e.old_chunk)) |
| 894 | yield(); | 901 | msleep(1); |
| 895 | 902 | ||
| 896 | /* | 903 | /* |
| 897 | * Add a proper exception, and remove the | 904 | * Add a proper exception, and remove the |
| @@ -1404,6 +1411,12 @@ static int __init dm_snapshot_init(void) | |||
| 1404 | { | 1411 | { |
| 1405 | int r; | 1412 | int r; |
| 1406 | 1413 | ||
| 1414 | r = dm_exception_store_init(); | ||
| 1415 | if (r) { | ||
| 1416 | DMERR("Failed to initialize exception stores"); | ||
| 1417 | return r; | ||
| 1418 | } | ||
| 1419 | |||
| 1407 | r = dm_register_target(&snapshot_target); | 1420 | r = dm_register_target(&snapshot_target); |
| 1408 | if (r) { | 1421 | if (r) { |
| 1409 | DMERR("snapshot target register failed %d", r); | 1422 | DMERR("snapshot target register failed %d", r); |
| @@ -1452,39 +1465,34 @@ static int __init dm_snapshot_init(void) | |||
| 1452 | 1465 | ||
| 1453 | return 0; | 1466 | return 0; |
| 1454 | 1467 | ||
| 1455 | bad_pending_pool: | 1468 | bad_pending_pool: |
| 1456 | kmem_cache_destroy(tracked_chunk_cache); | 1469 | kmem_cache_destroy(tracked_chunk_cache); |
| 1457 | bad5: | 1470 | bad5: |
| 1458 | kmem_cache_destroy(pending_cache); | 1471 | kmem_cache_destroy(pending_cache); |
| 1459 | bad4: | 1472 | bad4: |
| 1460 | kmem_cache_destroy(exception_cache); | 1473 | kmem_cache_destroy(exception_cache); |
| 1461 | bad3: | 1474 | bad3: |
| 1462 | exit_origin_hash(); | 1475 | exit_origin_hash(); |
| 1463 | bad2: | 1476 | bad2: |
| 1464 | dm_unregister_target(&origin_target); | 1477 | dm_unregister_target(&origin_target); |
| 1465 | bad1: | 1478 | bad1: |
| 1466 | dm_unregister_target(&snapshot_target); | 1479 | dm_unregister_target(&snapshot_target); |
| 1467 | return r; | 1480 | return r; |
| 1468 | } | 1481 | } |
| 1469 | 1482 | ||
| 1470 | static void __exit dm_snapshot_exit(void) | 1483 | static void __exit dm_snapshot_exit(void) |
| 1471 | { | 1484 | { |
| 1472 | int r; | ||
| 1473 | |||
| 1474 | destroy_workqueue(ksnapd); | 1485 | destroy_workqueue(ksnapd); |
| 1475 | 1486 | ||
| 1476 | r = dm_unregister_target(&snapshot_target); | 1487 | dm_unregister_target(&snapshot_target); |
| 1477 | if (r) | 1488 | dm_unregister_target(&origin_target); |
| 1478 | DMERR("snapshot unregister failed %d", r); | ||
| 1479 | |||
| 1480 | r = dm_unregister_target(&origin_target); | ||
| 1481 | if (r) | ||
| 1482 | DMERR("origin unregister failed %d", r); | ||
| 1483 | 1489 | ||
| 1484 | exit_origin_hash(); | 1490 | exit_origin_hash(); |
| 1485 | kmem_cache_destroy(pending_cache); | 1491 | kmem_cache_destroy(pending_cache); |
| 1486 | kmem_cache_destroy(exception_cache); | 1492 | kmem_cache_destroy(exception_cache); |
| 1487 | kmem_cache_destroy(tracked_chunk_cache); | 1493 | kmem_cache_destroy(tracked_chunk_cache); |
| 1494 | |||
| 1495 | dm_exception_store_exit(); | ||
| 1488 | } | 1496 | } |
| 1489 | 1497 | ||
| 1490 | /* Module hooks */ | 1498 | /* Module hooks */ |
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 99c0106ede2d..d9e62b43cf85 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h | |||
| @@ -1,6 +1,4 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * dm-snapshot.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
| 5 | * | 3 | * |
| 6 | * This file is released under the GPL. | 4 | * This file is released under the GPL. |
| @@ -10,6 +8,7 @@ | |||
| 10 | #define DM_SNAPSHOT_H | 8 | #define DM_SNAPSHOT_H |
| 11 | 9 | ||
| 12 | #include <linux/device-mapper.h> | 10 | #include <linux/device-mapper.h> |
| 11 | #include "dm-exception-store.h" | ||
| 13 | #include "dm-bio-list.h" | 12 | #include "dm-bio-list.h" |
| 14 | #include <linux/blkdev.h> | 13 | #include <linux/blkdev.h> |
| 15 | #include <linux/workqueue.h> | 14 | #include <linux/workqueue.h> |
| @@ -20,116 +19,6 @@ struct exception_table { | |||
| 20 | struct list_head *table; | 19 | struct list_head *table; |
| 21 | }; | 20 | }; |
| 22 | 21 | ||
| 23 | /* | ||
| 24 | * The snapshot code deals with largish chunks of the disk at a | ||
| 25 | * time. Typically 32k - 512k. | ||
| 26 | */ | ||
| 27 | typedef sector_t chunk_t; | ||
| 28 | |||
| 29 | /* | ||
| 30 | * An exception is used where an old chunk of data has been | ||
| 31 | * replaced by a new one. | ||
| 32 | * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number | ||
| 33 | * of chunks that follow contiguously. Remaining bits hold the number of the | ||
| 34 | * chunk within the device. | ||
| 35 | */ | ||
| 36 | struct dm_snap_exception { | ||
| 37 | struct list_head hash_list; | ||
| 38 | |||
| 39 | chunk_t old_chunk; | ||
| 40 | chunk_t new_chunk; | ||
| 41 | }; | ||
| 42 | |||
| 43 | /* | ||
| 44 | * Funtions to manipulate consecutive chunks | ||
| 45 | */ | ||
| 46 | # if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) | ||
| 47 | # define DM_CHUNK_CONSECUTIVE_BITS 8 | ||
| 48 | # define DM_CHUNK_NUMBER_BITS 56 | ||
| 49 | |||
| 50 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
| 51 | { | ||
| 52 | return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); | ||
| 53 | } | ||
| 54 | |||
| 55 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
| 56 | { | ||
| 57 | return e->new_chunk >> DM_CHUNK_NUMBER_BITS; | ||
| 58 | } | ||
| 59 | |||
| 60 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
| 61 | { | ||
| 62 | e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); | ||
| 63 | |||
| 64 | BUG_ON(!dm_consecutive_chunk_count(e)); | ||
| 65 | } | ||
| 66 | |||
| 67 | # else | ||
| 68 | # define DM_CHUNK_CONSECUTIVE_BITS 0 | ||
| 69 | |||
| 70 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
| 71 | { | ||
| 72 | return chunk; | ||
| 73 | } | ||
| 74 | |||
| 75 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
| 76 | { | ||
| 77 | return 0; | ||
| 78 | } | ||
| 79 | |||
| 80 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
| 81 | { | ||
| 82 | } | ||
| 83 | |||
| 84 | # endif | ||
| 85 | |||
| 86 | /* | ||
| 87 | * Abstraction to handle the meta/layout of exception stores (the | ||
| 88 | * COW device). | ||
| 89 | */ | ||
| 90 | struct exception_store { | ||
| 91 | |||
| 92 | /* | ||
| 93 | * Destroys this object when you've finished with it. | ||
| 94 | */ | ||
| 95 | void (*destroy) (struct exception_store *store); | ||
| 96 | |||
| 97 | /* | ||
| 98 | * The target shouldn't read the COW device until this is | ||
| 99 | * called. | ||
| 100 | */ | ||
| 101 | int (*read_metadata) (struct exception_store *store); | ||
| 102 | |||
| 103 | /* | ||
| 104 | * Find somewhere to store the next exception. | ||
| 105 | */ | ||
| 106 | int (*prepare_exception) (struct exception_store *store, | ||
| 107 | struct dm_snap_exception *e); | ||
| 108 | |||
| 109 | /* | ||
| 110 | * Update the metadata with this exception. | ||
| 111 | */ | ||
| 112 | void (*commit_exception) (struct exception_store *store, | ||
| 113 | struct dm_snap_exception *e, | ||
| 114 | void (*callback) (void *, int success), | ||
| 115 | void *callback_context); | ||
| 116 | |||
| 117 | /* | ||
| 118 | * The snapshot is invalid, note this in the metadata. | ||
| 119 | */ | ||
| 120 | void (*drop_snapshot) (struct exception_store *store); | ||
| 121 | |||
| 122 | /* | ||
| 123 | * Return how full the snapshot is. | ||
| 124 | */ | ||
| 125 | void (*fraction_full) (struct exception_store *store, | ||
| 126 | sector_t *numerator, | ||
| 127 | sector_t *denominator); | ||
| 128 | |||
| 129 | struct dm_snapshot *snap; | ||
| 130 | void *context; | ||
| 131 | }; | ||
| 132 | |||
| 133 | #define DM_TRACKED_CHUNK_HASH_SIZE 16 | 22 | #define DM_TRACKED_CHUNK_HASH_SIZE 16 |
| 134 | #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ | 23 | #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ |
| 135 | (DM_TRACKED_CHUNK_HASH_SIZE - 1)) | 24 | (DM_TRACKED_CHUNK_HASH_SIZE - 1)) |
| @@ -172,7 +61,7 @@ struct dm_snapshot { | |||
| 172 | spinlock_t pe_lock; | 61 | spinlock_t pe_lock; |
| 173 | 62 | ||
| 174 | /* The on disk metadata handler */ | 63 | /* The on disk metadata handler */ |
| 175 | struct exception_store store; | 64 | struct dm_exception_store store; |
| 176 | 65 | ||
| 177 | struct dm_kcopyd_client *kcopyd_client; | 66 | struct dm_kcopyd_client *kcopyd_client; |
| 178 | 67 | ||
| @@ -187,20 +76,6 @@ struct dm_snapshot { | |||
| 187 | }; | 76 | }; |
| 188 | 77 | ||
| 189 | /* | 78 | /* |
| 190 | * Used by the exception stores to load exceptions hen | ||
| 191 | * initialising. | ||
| 192 | */ | ||
| 193 | int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); | ||
| 194 | |||
| 195 | /* | ||
| 196 | * Constructor and destructor for the default persistent | ||
| 197 | * store. | ||
| 198 | */ | ||
| 199 | int dm_create_persistent(struct exception_store *store); | ||
| 200 | |||
| 201 | int dm_create_transient(struct exception_store *store); | ||
| 202 | |||
| 203 | /* | ||
| 204 | * Return the number of sectors in the device. | 79 | * Return the number of sectors in the device. |
| 205 | */ | 80 | */ |
| 206 | static inline sector_t get_dev_size(struct block_device *bdev) | 81 | static inline sector_t get_dev_size(struct block_device *bdev) |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 9e4ef88d421e..41569bc60abc 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
| @@ -337,9 +337,7 @@ int __init dm_stripe_init(void) | |||
| 337 | 337 | ||
| 338 | void dm_stripe_exit(void) | 338 | void dm_stripe_exit(void) |
| 339 | { | 339 | { |
| 340 | if (dm_unregister_target(&stripe_target)) | 340 | dm_unregister_target(&stripe_target); |
| 341 | DMWARN("target unregistration failed"); | ||
| 342 | |||
| 343 | destroy_workqueue(kstriped); | 341 | destroy_workqueue(kstriped); |
| 344 | 342 | ||
| 345 | return; | 343 | return; |
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c new file mode 100644 index 000000000000..a2a45e6c7c8b --- /dev/null +++ b/drivers/md/dm-sysfs.c | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Red Hat, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This file is released under the GPL. | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <linux/sysfs.h> | ||
| 8 | #include <linux/dm-ioctl.h> | ||
| 9 | #include "dm.h" | ||
| 10 | |||
| 11 | struct dm_sysfs_attr { | ||
| 12 | struct attribute attr; | ||
| 13 | ssize_t (*show)(struct mapped_device *, char *); | ||
| 14 | ssize_t (*store)(struct mapped_device *, char *); | ||
| 15 | }; | ||
| 16 | |||
| 17 | #define DM_ATTR_RO(_name) \ | ||
| 18 | struct dm_sysfs_attr dm_attr_##_name = \ | ||
| 19 | __ATTR(_name, S_IRUGO, dm_attr_##_name##_show, NULL) | ||
| 20 | |||
| 21 | static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr, | ||
| 22 | char *page) | ||
| 23 | { | ||
| 24 | struct dm_sysfs_attr *dm_attr; | ||
| 25 | struct mapped_device *md; | ||
| 26 | ssize_t ret; | ||
| 27 | |||
| 28 | dm_attr = container_of(attr, struct dm_sysfs_attr, attr); | ||
| 29 | if (!dm_attr->show) | ||
| 30 | return -EIO; | ||
| 31 | |||
| 32 | md = dm_get_from_kobject(kobj); | ||
| 33 | if (!md) | ||
| 34 | return -EINVAL; | ||
| 35 | |||
| 36 | ret = dm_attr->show(md, page); | ||
| 37 | dm_put(md); | ||
| 38 | |||
| 39 | return ret; | ||
| 40 | } | ||
| 41 | |||
| 42 | static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf) | ||
| 43 | { | ||
| 44 | if (dm_copy_name_and_uuid(md, buf, NULL)) | ||
| 45 | return -EIO; | ||
| 46 | |||
| 47 | strcat(buf, "\n"); | ||
| 48 | return strlen(buf); | ||
| 49 | } | ||
| 50 | |||
| 51 | static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf) | ||
| 52 | { | ||
| 53 | if (dm_copy_name_and_uuid(md, NULL, buf)) | ||
| 54 | return -EIO; | ||
| 55 | |||
| 56 | strcat(buf, "\n"); | ||
| 57 | return strlen(buf); | ||
| 58 | } | ||
| 59 | |||
| 60 | static DM_ATTR_RO(name); | ||
| 61 | static DM_ATTR_RO(uuid); | ||
| 62 | |||
| 63 | static struct attribute *dm_attrs[] = { | ||
| 64 | &dm_attr_name.attr, | ||
| 65 | &dm_attr_uuid.attr, | ||
| 66 | NULL, | ||
| 67 | }; | ||
| 68 | |||
| 69 | static struct sysfs_ops dm_sysfs_ops = { | ||
| 70 | .show = dm_attr_show, | ||
| 71 | }; | ||
| 72 | |||
| 73 | /* | ||
| 74 | * dm kobject is embedded in mapped_device structure | ||
| 75 | * no need to define release function here | ||
| 76 | */ | ||
| 77 | static struct kobj_type dm_ktype = { | ||
| 78 | .sysfs_ops = &dm_sysfs_ops, | ||
| 79 | .default_attrs = dm_attrs, | ||
| 80 | }; | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Initialize kobj | ||
| 84 | * because nobody using md yet, no need to call explicit dm_get/put | ||
| 85 | */ | ||
| 86 | int dm_sysfs_init(struct mapped_device *md) | ||
| 87 | { | ||
| 88 | return kobject_init_and_add(dm_kobject(md), &dm_ktype, | ||
| 89 | &disk_to_dev(dm_disk(md))->kobj, | ||
| 90 | "%s", "dm"); | ||
| 91 | } | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Remove kobj, called after all references removed | ||
| 95 | */ | ||
| 96 | void dm_sysfs_exit(struct mapped_device *md) | ||
| 97 | { | ||
| 98 | kobject_put(dm_kobject(md)); | ||
| 99 | } | ||
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 04e5fd742c2c..2fd66c30f7f8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2001 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001 Sistina Software (UK) Limited. |
| 3 | * Copyright (C) 2004 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
| 6 | */ | 6 | */ |
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
| 17 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
| 18 | #include <linux/delay.h> | ||
| 18 | #include <asm/atomic.h> | 19 | #include <asm/atomic.h> |
| 19 | 20 | ||
| 20 | #define DM_MSG_PREFIX "table" | 21 | #define DM_MSG_PREFIX "table" |
| @@ -24,6 +25,19 @@ | |||
| 24 | #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) | 25 | #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) |
| 25 | #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) | 26 | #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) |
| 26 | 27 | ||
| 28 | /* | ||
| 29 | * The table has always exactly one reference from either mapped_device->map | ||
| 30 | * or hash_cell->new_map. This reference is not counted in table->holders. | ||
| 31 | * A pair of dm_create_table/dm_destroy_table functions is used for table | ||
| 32 | * creation/destruction. | ||
| 33 | * | ||
| 34 | * Temporary references from the other code increase table->holders. A pair | ||
| 35 | * of dm_table_get/dm_table_put functions is used to manipulate it. | ||
| 36 | * | ||
| 37 | * When the table is about to be destroyed, we wait for table->holders to | ||
| 38 | * drop to zero. | ||
| 39 | */ | ||
| 40 | |||
| 27 | struct dm_table { | 41 | struct dm_table { |
| 28 | struct mapped_device *md; | 42 | struct mapped_device *md; |
| 29 | atomic_t holders; | 43 | atomic_t holders; |
| @@ -38,6 +52,8 @@ struct dm_table { | |||
| 38 | sector_t *highs; | 52 | sector_t *highs; |
| 39 | struct dm_target *targets; | 53 | struct dm_target *targets; |
| 40 | 54 | ||
| 55 | unsigned barriers_supported:1; | ||
| 56 | |||
| 41 | /* | 57 | /* |
| 42 | * Indicates the rw permissions for the new logical | 58 | * Indicates the rw permissions for the new logical |
| 43 | * device. This should be a combination of FMODE_READ | 59 | * device. This should be a combination of FMODE_READ |
| @@ -226,7 +242,8 @@ int dm_table_create(struct dm_table **result, fmode_t mode, | |||
| 226 | return -ENOMEM; | 242 | return -ENOMEM; |
| 227 | 243 | ||
| 228 | INIT_LIST_HEAD(&t->devices); | 244 | INIT_LIST_HEAD(&t->devices); |
| 229 | atomic_set(&t->holders, 1); | 245 | atomic_set(&t->holders, 0); |
| 246 | t->barriers_supported = 1; | ||
| 230 | 247 | ||
| 231 | if (!num_targets) | 248 | if (!num_targets) |
| 232 | num_targets = KEYS_PER_NODE; | 249 | num_targets = KEYS_PER_NODE; |
| @@ -256,10 +273,14 @@ static void free_devices(struct list_head *devices) | |||
| 256 | } | 273 | } |
| 257 | } | 274 | } |
| 258 | 275 | ||
| 259 | static void table_destroy(struct dm_table *t) | 276 | void dm_table_destroy(struct dm_table *t) |
| 260 | { | 277 | { |
| 261 | unsigned int i; | 278 | unsigned int i; |
| 262 | 279 | ||
| 280 | while (atomic_read(&t->holders)) | ||
| 281 | msleep(1); | ||
| 282 | smp_mb(); | ||
| 283 | |||
| 263 | /* free the indexes (see dm_table_complete) */ | 284 | /* free the indexes (see dm_table_complete) */ |
| 264 | if (t->depth >= 2) | 285 | if (t->depth >= 2) |
| 265 | vfree(t->index[t->depth - 2]); | 286 | vfree(t->index[t->depth - 2]); |
| @@ -297,8 +318,8 @@ void dm_table_put(struct dm_table *t) | |||
| 297 | if (!t) | 318 | if (!t) |
| 298 | return; | 319 | return; |
| 299 | 320 | ||
| 300 | if (atomic_dec_and_test(&t->holders)) | 321 | smp_mb__before_atomic_dec(); |
| 301 | table_destroy(t); | 322 | atomic_dec(&t->holders); |
| 302 | } | 323 | } |
| 303 | 324 | ||
| 304 | /* | 325 | /* |
| @@ -728,6 +749,10 @@ int dm_table_add_target(struct dm_table *t, const char *type, | |||
| 728 | /* FIXME: the plan is to combine high here and then have | 749 | /* FIXME: the plan is to combine high here and then have |
| 729 | * the merge fn apply the target level restrictions. */ | 750 | * the merge fn apply the target level restrictions. */ |
| 730 | combine_restrictions_low(&t->limits, &tgt->limits); | 751 | combine_restrictions_low(&t->limits, &tgt->limits); |
| 752 | |||
| 753 | if (!(tgt->type->features & DM_TARGET_SUPPORTS_BARRIERS)) | ||
| 754 | t->barriers_supported = 0; | ||
| 755 | |||
| 731 | return 0; | 756 | return 0; |
| 732 | 757 | ||
| 733 | bad: | 758 | bad: |
| @@ -772,6 +797,12 @@ int dm_table_complete(struct dm_table *t) | |||
| 772 | 797 | ||
| 773 | check_for_valid_limits(&t->limits); | 798 | check_for_valid_limits(&t->limits); |
| 774 | 799 | ||
| 800 | /* | ||
| 801 | * We only support barriers if there is exactly one underlying device. | ||
| 802 | */ | ||
| 803 | if (!list_is_singular(&t->devices)) | ||
| 804 | t->barriers_supported = 0; | ||
| 805 | |||
| 775 | /* how many indexes will the btree have ? */ | 806 | /* how many indexes will the btree have ? */ |
| 776 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); | 807 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); |
| 777 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); | 808 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); |
| @@ -986,6 +1017,12 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) | |||
| 986 | return t->md; | 1017 | return t->md; |
| 987 | } | 1018 | } |
| 988 | 1019 | ||
| 1020 | int dm_table_barrier_ok(struct dm_table *t) | ||
| 1021 | { | ||
| 1022 | return t->barriers_supported; | ||
| 1023 | } | ||
| 1024 | EXPORT_SYMBOL(dm_table_barrier_ok); | ||
| 1025 | |||
| 989 | EXPORT_SYMBOL(dm_vcalloc); | 1026 | EXPORT_SYMBOL(dm_vcalloc); |
| 990 | EXPORT_SYMBOL(dm_get_device); | 1027 | EXPORT_SYMBOL(dm_get_device); |
| 991 | EXPORT_SYMBOL(dm_put_device); | 1028 | EXPORT_SYMBOL(dm_put_device); |
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 835cf95b857f..7decf10006e4 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c | |||
| @@ -130,26 +130,26 @@ int dm_register_target(struct target_type *t) | |||
| 130 | return rv; | 130 | return rv; |
| 131 | } | 131 | } |
| 132 | 132 | ||
| 133 | int dm_unregister_target(struct target_type *t) | 133 | void dm_unregister_target(struct target_type *t) |
| 134 | { | 134 | { |
| 135 | struct tt_internal *ti; | 135 | struct tt_internal *ti; |
| 136 | 136 | ||
| 137 | down_write(&_lock); | 137 | down_write(&_lock); |
| 138 | if (!(ti = __find_target_type(t->name))) { | 138 | if (!(ti = __find_target_type(t->name))) { |
| 139 | up_write(&_lock); | 139 | DMCRIT("Unregistering unrecognised target: %s", t->name); |
| 140 | return -EINVAL; | 140 | BUG(); |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | if (ti->use) { | 143 | if (ti->use) { |
| 144 | up_write(&_lock); | 144 | DMCRIT("Attempt to unregister target still in use: %s", |
| 145 | return -ETXTBSY; | 145 | t->name); |
| 146 | BUG(); | ||
| 146 | } | 147 | } |
| 147 | 148 | ||
| 148 | list_del(&ti->list); | 149 | list_del(&ti->list); |
| 149 | kfree(ti); | 150 | kfree(ti); |
| 150 | 151 | ||
| 151 | up_write(&_lock); | 152 | up_write(&_lock); |
| 152 | return 0; | ||
| 153 | } | 153 | } |
| 154 | 154 | ||
| 155 | /* | 155 | /* |
| @@ -187,8 +187,7 @@ int __init dm_target_init(void) | |||
| 187 | 187 | ||
| 188 | void dm_target_exit(void) | 188 | void dm_target_exit(void) |
| 189 | { | 189 | { |
| 190 | if (dm_unregister_target(&error_target)) | 190 | dm_unregister_target(&error_target); |
| 191 | DMWARN("error target unregistration failed"); | ||
| 192 | } | 191 | } |
| 193 | 192 | ||
| 194 | EXPORT_SYMBOL(dm_register_target); | 193 | EXPORT_SYMBOL(dm_register_target); |
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index cdbf126ec106..bbc97030c0c2 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c | |||
| @@ -69,10 +69,7 @@ static int __init dm_zero_init(void) | |||
| 69 | 69 | ||
| 70 | static void __exit dm_zero_exit(void) | 70 | static void __exit dm_zero_exit(void) |
| 71 | { | 71 | { |
| 72 | int r = dm_unregister_target(&zero_target); | 72 | dm_unregister_target(&zero_target); |
| 73 | |||
| 74 | if (r < 0) | ||
| 75 | DMERR("unregister failed %d", r); | ||
| 76 | } | 73 | } |
| 77 | 74 | ||
| 78 | module_init(dm_zero_init) | 75 | module_init(dm_zero_init) |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 421c9f02d8ca..51ba1db4b3e7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. |
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
| 6 | */ | 6 | */ |
| @@ -32,6 +32,7 @@ static unsigned int _major = 0; | |||
| 32 | 32 | ||
| 33 | static DEFINE_SPINLOCK(_minor_lock); | 33 | static DEFINE_SPINLOCK(_minor_lock); |
| 34 | /* | 34 | /* |
| 35 | * For bio-based dm. | ||
| 35 | * One of these is allocated per bio. | 36 | * One of these is allocated per bio. |
| 36 | */ | 37 | */ |
| 37 | struct dm_io { | 38 | struct dm_io { |
| @@ -43,6 +44,7 @@ struct dm_io { | |||
| 43 | }; | 44 | }; |
| 44 | 45 | ||
| 45 | /* | 46 | /* |
| 47 | * For bio-based dm. | ||
| 46 | * One of these is allocated per target within a bio. Hopefully | 48 | * One of these is allocated per target within a bio. Hopefully |
| 47 | * this will be simplified out one day. | 49 | * this will be simplified out one day. |
| 48 | */ | 50 | */ |
| @@ -54,6 +56,27 @@ struct dm_target_io { | |||
| 54 | 56 | ||
| 55 | DEFINE_TRACE(block_bio_complete); | 57 | DEFINE_TRACE(block_bio_complete); |
| 56 | 58 | ||
| 59 | /* | ||
| 60 | * For request-based dm. | ||
| 61 | * One of these is allocated per request. | ||
| 62 | */ | ||
| 63 | struct dm_rq_target_io { | ||
| 64 | struct mapped_device *md; | ||
| 65 | struct dm_target *ti; | ||
| 66 | struct request *orig, clone; | ||
| 67 | int error; | ||
| 68 | union map_info info; | ||
| 69 | }; | ||
| 70 | |||
| 71 | /* | ||
| 72 | * For request-based dm. | ||
| 73 | * One of these is allocated per bio. | ||
| 74 | */ | ||
| 75 | struct dm_rq_clone_bio_info { | ||
| 76 | struct bio *orig; | ||
| 77 | struct request *rq; | ||
| 78 | }; | ||
| 79 | |||
| 57 | union map_info *dm_get_mapinfo(struct bio *bio) | 80 | union map_info *dm_get_mapinfo(struct bio *bio) |
| 58 | { | 81 | { |
| 59 | if (bio && bio->bi_private) | 82 | if (bio && bio->bi_private) |
| @@ -144,11 +167,16 @@ struct mapped_device { | |||
| 144 | 167 | ||
| 145 | /* forced geometry settings */ | 168 | /* forced geometry settings */ |
| 146 | struct hd_geometry geometry; | 169 | struct hd_geometry geometry; |
| 170 | |||
| 171 | /* sysfs handle */ | ||
| 172 | struct kobject kobj; | ||
| 147 | }; | 173 | }; |
| 148 | 174 | ||
| 149 | #define MIN_IOS 256 | 175 | #define MIN_IOS 256 |
| 150 | static struct kmem_cache *_io_cache; | 176 | static struct kmem_cache *_io_cache; |
| 151 | static struct kmem_cache *_tio_cache; | 177 | static struct kmem_cache *_tio_cache; |
| 178 | static struct kmem_cache *_rq_tio_cache; | ||
| 179 | static struct kmem_cache *_rq_bio_info_cache; | ||
| 152 | 180 | ||
| 153 | static int __init local_init(void) | 181 | static int __init local_init(void) |
| 154 | { | 182 | { |
| @@ -164,9 +192,17 @@ static int __init local_init(void) | |||
| 164 | if (!_tio_cache) | 192 | if (!_tio_cache) |
| 165 | goto out_free_io_cache; | 193 | goto out_free_io_cache; |
| 166 | 194 | ||
| 195 | _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); | ||
| 196 | if (!_rq_tio_cache) | ||
| 197 | goto out_free_tio_cache; | ||
| 198 | |||
| 199 | _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); | ||
| 200 | if (!_rq_bio_info_cache) | ||
| 201 | goto out_free_rq_tio_cache; | ||
| 202 | |||
| 167 | r = dm_uevent_init(); | 203 | r = dm_uevent_init(); |
| 168 | if (r) | 204 | if (r) |
| 169 | goto out_free_tio_cache; | 205 | goto out_free_rq_bio_info_cache; |
| 170 | 206 | ||
| 171 | _major = major; | 207 | _major = major; |
| 172 | r = register_blkdev(_major, _name); | 208 | r = register_blkdev(_major, _name); |
| @@ -180,6 +216,10 @@ static int __init local_init(void) | |||
| 180 | 216 | ||
| 181 | out_uevent_exit: | 217 | out_uevent_exit: |
| 182 | dm_uevent_exit(); | 218 | dm_uevent_exit(); |
| 219 | out_free_rq_bio_info_cache: | ||
| 220 | kmem_cache_destroy(_rq_bio_info_cache); | ||
| 221 | out_free_rq_tio_cache: | ||
| 222 | kmem_cache_destroy(_rq_tio_cache); | ||
| 183 | out_free_tio_cache: | 223 | out_free_tio_cache: |
| 184 | kmem_cache_destroy(_tio_cache); | 224 | kmem_cache_destroy(_tio_cache); |
| 185 | out_free_io_cache: | 225 | out_free_io_cache: |
| @@ -190,6 +230,8 @@ out_free_io_cache: | |||
| 190 | 230 | ||
| 191 | static void local_exit(void) | 231 | static void local_exit(void) |
| 192 | { | 232 | { |
| 233 | kmem_cache_destroy(_rq_bio_info_cache); | ||
| 234 | kmem_cache_destroy(_rq_tio_cache); | ||
| 193 | kmem_cache_destroy(_tio_cache); | 235 | kmem_cache_destroy(_tio_cache); |
| 194 | kmem_cache_destroy(_io_cache); | 236 | kmem_cache_destroy(_io_cache); |
| 195 | unregister_blkdev(_major, _name); | 237 | unregister_blkdev(_major, _name); |
| @@ -796,7 +838,11 @@ static int __split_bio(struct mapped_device *md, struct bio *bio) | |||
| 796 | ci.map = dm_get_table(md); | 838 | ci.map = dm_get_table(md); |
| 797 | if (unlikely(!ci.map)) | 839 | if (unlikely(!ci.map)) |
| 798 | return -EIO; | 840 | return -EIO; |
| 799 | 841 | if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { | |
| 842 | dm_table_put(ci.map); | ||
| 843 | bio_endio(bio, -EOPNOTSUPP); | ||
| 844 | return 0; | ||
| 845 | } | ||
| 800 | ci.md = md; | 846 | ci.md = md; |
| 801 | ci.bio = bio; | 847 | ci.bio = bio; |
| 802 | ci.io = alloc_io(md); | 848 | ci.io = alloc_io(md); |
| @@ -880,15 +926,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
| 880 | struct mapped_device *md = q->queuedata; | 926 | struct mapped_device *md = q->queuedata; |
| 881 | int cpu; | 927 | int cpu; |
| 882 | 928 | ||
| 883 | /* | ||
| 884 | * There is no use in forwarding any barrier request since we can't | ||
| 885 | * guarantee it is (or can be) handled by the targets correctly. | ||
| 886 | */ | ||
| 887 | if (unlikely(bio_barrier(bio))) { | ||
| 888 | bio_endio(bio, -EOPNOTSUPP); | ||
| 889 | return 0; | ||
| 890 | } | ||
| 891 | |||
| 892 | down_read(&md->io_lock); | 929 | down_read(&md->io_lock); |
| 893 | 930 | ||
| 894 | cpu = part_stat_lock(); | 931 | cpu = part_stat_lock(); |
| @@ -943,8 +980,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
| 943 | struct mapped_device *md = congested_data; | 980 | struct mapped_device *md = congested_data; |
| 944 | struct dm_table *map; | 981 | struct dm_table *map; |
| 945 | 982 | ||
| 946 | atomic_inc(&md->pending); | ||
| 947 | |||
| 948 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { | 983 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { |
| 949 | map = dm_get_table(md); | 984 | map = dm_get_table(md); |
| 950 | if (map) { | 985 | if (map) { |
| @@ -953,10 +988,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
| 953 | } | 988 | } |
| 954 | } | 989 | } |
| 955 | 990 | ||
| 956 | if (!atomic_dec_return(&md->pending)) | ||
| 957 | /* nudge anyone waiting on suspend queue */ | ||
| 958 | wake_up(&md->wait); | ||
| 959 | |||
| 960 | return r; | 991 | return r; |
| 961 | } | 992 | } |
| 962 | 993 | ||
| @@ -1216,10 +1247,12 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
| 1216 | 1247 | ||
| 1217 | if (md->suspended_bdev) | 1248 | if (md->suspended_bdev) |
| 1218 | __set_size(md, size); | 1249 | __set_size(md, size); |
| 1219 | if (size == 0) | 1250 | |
| 1251 | if (!size) { | ||
| 1252 | dm_table_destroy(t); | ||
| 1220 | return 0; | 1253 | return 0; |
| 1254 | } | ||
| 1221 | 1255 | ||
| 1222 | dm_table_get(t); | ||
| 1223 | dm_table_event_callback(t, event_callback, md); | 1256 | dm_table_event_callback(t, event_callback, md); |
| 1224 | 1257 | ||
| 1225 | write_lock(&md->map_lock); | 1258 | write_lock(&md->map_lock); |
| @@ -1241,7 +1274,7 @@ static void __unbind(struct mapped_device *md) | |||
| 1241 | write_lock(&md->map_lock); | 1274 | write_lock(&md->map_lock); |
| 1242 | md->map = NULL; | 1275 | md->map = NULL; |
| 1243 | write_unlock(&md->map_lock); | 1276 | write_unlock(&md->map_lock); |
| 1244 | dm_table_put(map); | 1277 | dm_table_destroy(map); |
| 1245 | } | 1278 | } |
| 1246 | 1279 | ||
| 1247 | /* | 1280 | /* |
| @@ -1255,6 +1288,8 @@ int dm_create(int minor, struct mapped_device **result) | |||
| 1255 | if (!md) | 1288 | if (!md) |
| 1256 | return -ENXIO; | 1289 | return -ENXIO; |
| 1257 | 1290 | ||
| 1291 | dm_sysfs_init(md); | ||
| 1292 | |||
| 1258 | *result = md; | 1293 | *result = md; |
| 1259 | return 0; | 1294 | return 0; |
| 1260 | } | 1295 | } |
| @@ -1330,8 +1365,9 @@ void dm_put(struct mapped_device *md) | |||
| 1330 | dm_table_presuspend_targets(map); | 1365 | dm_table_presuspend_targets(map); |
| 1331 | dm_table_postsuspend_targets(map); | 1366 | dm_table_postsuspend_targets(map); |
| 1332 | } | 1367 | } |
| 1333 | __unbind(md); | 1368 | dm_sysfs_exit(md); |
| 1334 | dm_table_put(map); | 1369 | dm_table_put(map); |
| 1370 | __unbind(md); | ||
| 1335 | free_dev(md); | 1371 | free_dev(md); |
| 1336 | } | 1372 | } |
| 1337 | } | 1373 | } |
| @@ -1669,6 +1705,27 @@ struct gendisk *dm_disk(struct mapped_device *md) | |||
| 1669 | return md->disk; | 1705 | return md->disk; |
| 1670 | } | 1706 | } |
| 1671 | 1707 | ||
| 1708 | struct kobject *dm_kobject(struct mapped_device *md) | ||
| 1709 | { | ||
| 1710 | return &md->kobj; | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | /* | ||
| 1714 | * struct mapped_device should not be exported outside of dm.c | ||
| 1715 | * so use this check to verify that kobj is part of md structure | ||
| 1716 | */ | ||
| 1717 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | ||
| 1718 | { | ||
| 1719 | struct mapped_device *md; | ||
| 1720 | |||
| 1721 | md = container_of(kobj, struct mapped_device, kobj); | ||
| 1722 | if (&md->kobj != kobj) | ||
| 1723 | return NULL; | ||
| 1724 | |||
| 1725 | dm_get(md); | ||
| 1726 | return md; | ||
| 1727 | } | ||
| 1728 | |||
| 1672 | int dm_suspended(struct mapped_device *md) | 1729 | int dm_suspended(struct mapped_device *md) |
| 1673 | { | 1730 | { |
| 1674 | return test_bit(DMF_SUSPENDED, &md->flags); | 1731 | return test_bit(DMF_SUSPENDED, &md->flags); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 0ade60cdef42..20194e000c5a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
| @@ -36,6 +36,7 @@ struct dm_table; | |||
| 36 | /*----------------------------------------------------------------- | 36 | /*----------------------------------------------------------------- |
| 37 | * Internal table functions. | 37 | * Internal table functions. |
| 38 | *---------------------------------------------------------------*/ | 38 | *---------------------------------------------------------------*/ |
| 39 | void dm_table_destroy(struct dm_table *t); | ||
| 39 | void dm_table_event_callback(struct dm_table *t, | 40 | void dm_table_event_callback(struct dm_table *t, |
| 40 | void (*fn)(void *), void *context); | 41 | void (*fn)(void *), void *context); |
| 41 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); | 42 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); |
| @@ -51,6 +52,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits); | |||
| 51 | * To check the return value from dm_table_find_target(). | 52 | * To check the return value from dm_table_find_target(). |
| 52 | */ | 53 | */ |
| 53 | #define dm_target_is_valid(t) ((t)->table) | 54 | #define dm_target_is_valid(t) ((t)->table) |
| 55 | int dm_table_barrier_ok(struct dm_table *t); | ||
| 54 | 56 | ||
| 55 | /*----------------------------------------------------------------- | 57 | /*----------------------------------------------------------------- |
| 56 | * A registry of target types. | 58 | * A registry of target types. |
| @@ -72,6 +74,14 @@ int dm_interface_init(void); | |||
| 72 | void dm_interface_exit(void); | 74 | void dm_interface_exit(void); |
| 73 | 75 | ||
| 74 | /* | 76 | /* |
| 77 | * sysfs interface | ||
| 78 | */ | ||
| 79 | int dm_sysfs_init(struct mapped_device *md); | ||
| 80 | void dm_sysfs_exit(struct mapped_device *md); | ||
| 81 | struct kobject *dm_kobject(struct mapped_device *md); | ||
| 82 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj); | ||
| 83 | |||
| 84 | /* | ||
| 75 | * Targets for linear and striped mappings | 85 | * Targets for linear and striped mappings |
| 76 | */ | 86 | */ |
| 77 | int dm_linear_init(void); | 87 | int dm_linear_init(void); |
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index c17fd334e574..8209e08969f9 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h | |||
| @@ -45,6 +45,8 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); | |||
| 45 | */ | 45 | */ |
| 46 | typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, | 46 | typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, |
| 47 | union map_info *map_context); | 47 | union map_info *map_context); |
| 48 | typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, | ||
| 49 | union map_info *map_context); | ||
| 48 | 50 | ||
| 49 | /* | 51 | /* |
| 50 | * Returns: | 52 | * Returns: |
| @@ -57,6 +59,9 @@ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, | |||
| 57 | typedef int (*dm_endio_fn) (struct dm_target *ti, | 59 | typedef int (*dm_endio_fn) (struct dm_target *ti, |
| 58 | struct bio *bio, int error, | 60 | struct bio *bio, int error, |
| 59 | union map_info *map_context); | 61 | union map_info *map_context); |
| 62 | typedef int (*dm_request_endio_fn) (struct dm_target *ti, | ||
| 63 | struct request *clone, int error, | ||
| 64 | union map_info *map_context); | ||
| 60 | 65 | ||
| 61 | typedef void (*dm_flush_fn) (struct dm_target *ti); | 66 | typedef void (*dm_flush_fn) (struct dm_target *ti); |
| 62 | typedef void (*dm_presuspend_fn) (struct dm_target *ti); | 67 | typedef void (*dm_presuspend_fn) (struct dm_target *ti); |
| @@ -75,6 +80,13 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd, | |||
| 75 | typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, | 80 | typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, |
| 76 | struct bio_vec *biovec, int max_size); | 81 | struct bio_vec *biovec, int max_size); |
| 77 | 82 | ||
| 83 | /* | ||
| 84 | * Returns: | ||
| 85 | * 0: The target can handle the next I/O immediately. | ||
| 86 | * 1: The target can't handle the next I/O immediately. | ||
| 87 | */ | ||
| 88 | typedef int (*dm_busy_fn) (struct dm_target *ti); | ||
| 89 | |||
| 78 | void dm_error(const char *message); | 90 | void dm_error(const char *message); |
| 79 | 91 | ||
| 80 | /* | 92 | /* |
| @@ -100,14 +112,23 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d); | |||
| 100 | /* | 112 | /* |
| 101 | * Information about a target type | 113 | * Information about a target type |
| 102 | */ | 114 | */ |
| 115 | |||
| 116 | /* | ||
| 117 | * Target features | ||
| 118 | */ | ||
| 119 | #define DM_TARGET_SUPPORTS_BARRIERS 0x00000001 | ||
| 120 | |||
| 103 | struct target_type { | 121 | struct target_type { |
| 122 | uint64_t features; | ||
| 104 | const char *name; | 123 | const char *name; |
| 105 | struct module *module; | 124 | struct module *module; |
| 106 | unsigned version[3]; | 125 | unsigned version[3]; |
| 107 | dm_ctr_fn ctr; | 126 | dm_ctr_fn ctr; |
| 108 | dm_dtr_fn dtr; | 127 | dm_dtr_fn dtr; |
| 109 | dm_map_fn map; | 128 | dm_map_fn map; |
| 129 | dm_map_request_fn map_rq; | ||
| 110 | dm_endio_fn end_io; | 130 | dm_endio_fn end_io; |
| 131 | dm_request_endio_fn rq_end_io; | ||
| 111 | dm_flush_fn flush; | 132 | dm_flush_fn flush; |
| 112 | dm_presuspend_fn presuspend; | 133 | dm_presuspend_fn presuspend; |
| 113 | dm_postsuspend_fn postsuspend; | 134 | dm_postsuspend_fn postsuspend; |
| @@ -117,6 +138,7 @@ struct target_type { | |||
| 117 | dm_message_fn message; | 138 | dm_message_fn message; |
| 118 | dm_ioctl_fn ioctl; | 139 | dm_ioctl_fn ioctl; |
| 119 | dm_merge_fn merge; | 140 | dm_merge_fn merge; |
| 141 | dm_busy_fn busy; | ||
| 120 | }; | 142 | }; |
| 121 | 143 | ||
| 122 | struct io_restrictions { | 144 | struct io_restrictions { |
| @@ -157,8 +179,7 @@ struct dm_target { | |||
| 157 | }; | 179 | }; |
| 158 | 180 | ||
| 159 | int dm_register_target(struct target_type *t); | 181 | int dm_register_target(struct target_type *t); |
| 160 | int dm_unregister_target(struct target_type *t); | 182 | void dm_unregister_target(struct target_type *t); |
| 161 | |||
| 162 | 183 | ||
| 163 | /*----------------------------------------------------------------- | 184 | /*----------------------------------------------------------------- |
| 164 | * Functions for creating and manipulating mapped devices. | 185 | * Functions for creating and manipulating mapped devices. |
| @@ -276,6 +297,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); | |||
| 276 | *---------------------------------------------------------------*/ | 297 | *---------------------------------------------------------------*/ |
| 277 | #define DM_NAME "device-mapper" | 298 | #define DM_NAME "device-mapper" |
| 278 | 299 | ||
| 300 | #define DMCRIT(f, arg...) \ | ||
| 301 | printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) | ||
| 302 | |||
| 279 | #define DMERR(f, arg...) \ | 303 | #define DMERR(f, arg...) \ |
| 280 | printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) | 304 | printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) |
| 281 | #define DMERR_LIMIT(f, arg...) \ | 305 | #define DMERR_LIMIT(f, arg...) \ |
