diff options
Diffstat (limited to 'drivers/md')
31 files changed, 1702 insertions, 1222 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 1c615804ea76..72880b7e28d9 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -3,9 +3,10 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ | 5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ |
6 | dm-ioctl.o dm-io.o dm-kcopyd.o | 6 | dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o |
7 | dm-multipath-objs := dm-path-selector.o dm-mpath.o | 7 | dm-multipath-objs := dm-path-selector.o dm-mpath.o |
8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o | 8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \ |
9 | dm-snap-persistent.o | ||
9 | dm-mirror-objs := dm-raid1.o | 10 | dm-mirror-objs := dm-raid1.o |
10 | md-mod-objs := md.o bitmap.o | 11 | md-mod-objs := md.o bitmap.o |
11 | raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ | 12 | raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index ab7c8e4a61f9..719943763391 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -215,7 +215,6 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, | |||
215 | /* choose a good rdev and read the page from there */ | 215 | /* choose a good rdev and read the page from there */ |
216 | 216 | ||
217 | mdk_rdev_t *rdev; | 217 | mdk_rdev_t *rdev; |
218 | struct list_head *tmp; | ||
219 | sector_t target; | 218 | sector_t target; |
220 | 219 | ||
221 | if (!page) | 220 | if (!page) |
@@ -223,7 +222,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, | |||
223 | if (!page) | 222 | if (!page) |
224 | return ERR_PTR(-ENOMEM); | 223 | return ERR_PTR(-ENOMEM); |
225 | 224 | ||
226 | rdev_for_each(rdev, tmp, mddev) { | 225 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
227 | if (! test_bit(In_sync, &rdev->flags) | 226 | if (! test_bit(In_sync, &rdev->flags) |
228 | || test_bit(Faulty, &rdev->flags)) | 227 | || test_bit(Faulty, &rdev->flags)) |
229 | continue; | 228 | continue; |
@@ -964,9 +963,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
964 | */ | 963 | */ |
965 | page = bitmap->sb_page; | 964 | page = bitmap->sb_page; |
966 | offset = sizeof(bitmap_super_t); | 965 | offset = sizeof(bitmap_super_t); |
967 | read_sb_page(bitmap->mddev, bitmap->offset, | 966 | if (!file) |
968 | page, | 967 | read_sb_page(bitmap->mddev, |
969 | index, count); | 968 | bitmap->offset, |
969 | page, | ||
970 | index, count); | ||
970 | } else if (file) { | 971 | } else if (file) { |
971 | page = read_page(file, index, bitmap, count); | 972 | page = read_page(file, index, bitmap, count); |
972 | offset = 0; | 973 | offset = 0; |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ce26c84af064..35bda49796fb 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -1060,7 +1060,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1060 | goto bad_page_pool; | 1060 | goto bad_page_pool; |
1061 | } | 1061 | } |
1062 | 1062 | ||
1063 | cc->bs = bioset_create(MIN_IOS, MIN_IOS); | 1063 | cc->bs = bioset_create(MIN_IOS, 0); |
1064 | if (!cc->bs) { | 1064 | if (!cc->bs) { |
1065 | ti->error = "Cannot allocate crypt bioset"; | 1065 | ti->error = "Cannot allocate crypt bioset"; |
1066 | goto bad_bs; | 1066 | goto bad_bs; |
@@ -1322,11 +1322,7 @@ static int __init dm_crypt_init(void) | |||
1322 | 1322 | ||
1323 | static void __exit dm_crypt_exit(void) | 1323 | static void __exit dm_crypt_exit(void) |
1324 | { | 1324 | { |
1325 | int r = dm_unregister_target(&crypt_target); | 1325 | dm_unregister_target(&crypt_target); |
1326 | |||
1327 | if (r < 0) | ||
1328 | DMERR("unregister failed %d", r); | ||
1329 | |||
1330 | kmem_cache_destroy(_crypt_io_pool); | 1326 | kmem_cache_destroy(_crypt_io_pool); |
1331 | } | 1327 | } |
1332 | 1328 | ||
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 848b381f1173..59ee1b015d2d 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
@@ -364,11 +364,7 @@ bad_queue: | |||
364 | 364 | ||
365 | static void __exit dm_delay_exit(void) | 365 | static void __exit dm_delay_exit(void) |
366 | { | 366 | { |
367 | int r = dm_unregister_target(&delay_target); | 367 | dm_unregister_target(&delay_target); |
368 | |||
369 | if (r < 0) | ||
370 | DMERR("unregister failed %d", r); | ||
371 | |||
372 | kmem_cache_destroy(delayed_cache); | 368 | kmem_cache_destroy(delayed_cache); |
373 | destroy_workqueue(kdelayd_wq); | 369 | destroy_workqueue(kdelayd_wq); |
374 | } | 370 | } |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 01590f3e0009..dccbfb0e010f 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
@@ -1,756 +1,45 @@ | |||
1 | /* | 1 | /* |
2 | * dm-exception-store.c | ||
3 | * | ||
4 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
5 | * Copyright (C) 2006 Red Hat GmbH | 3 | * Copyright (C) 2006-2008 Red Hat GmbH |
6 | * | 4 | * |
7 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
8 | */ | 6 | */ |
9 | 7 | ||
10 | #include "dm-snap.h" | 8 | #include "dm-exception-store.h" |
11 | 9 | ||
12 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
13 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
14 | #include <linux/vmalloc.h> | 12 | #include <linux/vmalloc.h> |
15 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
16 | #include <linux/dm-io.h> | ||
17 | #include <linux/dm-kcopyd.h> | ||
18 | |||
19 | #define DM_MSG_PREFIX "snapshots" | ||
20 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | ||
21 | |||
22 | /*----------------------------------------------------------------- | ||
23 | * Persistent snapshots, by persistent we mean that the snapshot | ||
24 | * will survive a reboot. | ||
25 | *---------------------------------------------------------------*/ | ||
26 | |||
27 | /* | ||
28 | * We need to store a record of which parts of the origin have | ||
29 | * been copied to the snapshot device. The snapshot code | ||
30 | * requires that we copy exception chunks to chunk aligned areas | ||
31 | * of the COW store. It makes sense therefore, to store the | ||
32 | * metadata in chunk size blocks. | ||
33 | * | ||
34 | * There is no backward or forward compatibility implemented, | ||
35 | * snapshots with different disk versions than the kernel will | ||
36 | * not be usable. It is expected that "lvcreate" will blank out | ||
37 | * the start of a fresh COW device before calling the snapshot | ||
38 | * constructor. | ||
39 | * | ||
40 | * The first chunk of the COW device just contains the header. | ||
41 | * After this there is a chunk filled with exception metadata, | ||
42 | * followed by as many exception chunks as can fit in the | ||
43 | * metadata areas. | ||
44 | * | ||
45 | * All on disk structures are in little-endian format. The end | ||
46 | * of the exceptions info is indicated by an exception with a | ||
47 | * new_chunk of 0, which is invalid since it would point to the | ||
48 | * header chunk. | ||
49 | */ | ||
50 | |||
51 | /* | ||
52 | * Magic for persistent snapshots: "SnAp" - Feeble isn't it. | ||
53 | */ | ||
54 | #define SNAP_MAGIC 0x70416e53 | ||
55 | |||
56 | /* | ||
57 | * The on-disk version of the metadata. | ||
58 | */ | ||
59 | #define SNAPSHOT_DISK_VERSION 1 | ||
60 | |||
61 | struct disk_header { | ||
62 | uint32_t magic; | ||
63 | |||
64 | /* | ||
65 | * Is this snapshot valid. There is no way of recovering | ||
66 | * an invalid snapshot. | ||
67 | */ | ||
68 | uint32_t valid; | ||
69 | |||
70 | /* | ||
71 | * Simple, incrementing version. no backward | ||
72 | * compatibility. | ||
73 | */ | ||
74 | uint32_t version; | ||
75 | |||
76 | /* In sectors */ | ||
77 | uint32_t chunk_size; | ||
78 | }; | ||
79 | |||
80 | struct disk_exception { | ||
81 | uint64_t old_chunk; | ||
82 | uint64_t new_chunk; | ||
83 | }; | ||
84 | |||
85 | struct commit_callback { | ||
86 | void (*callback)(void *, int success); | ||
87 | void *context; | ||
88 | }; | ||
89 | |||
90 | /* | ||
91 | * The top level structure for a persistent exception store. | ||
92 | */ | ||
93 | struct pstore { | ||
94 | struct dm_snapshot *snap; /* up pointer to my snapshot */ | ||
95 | int version; | ||
96 | int valid; | ||
97 | uint32_t exceptions_per_area; | ||
98 | |||
99 | /* | ||
100 | * Now that we have an asynchronous kcopyd there is no | ||
101 | * need for large chunk sizes, so it wont hurt to have a | ||
102 | * whole chunks worth of metadata in memory at once. | ||
103 | */ | ||
104 | void *area; | ||
105 | |||
106 | /* | ||
107 | * An area of zeros used to clear the next area. | ||
108 | */ | ||
109 | void *zero_area; | ||
110 | |||
111 | /* | ||
112 | * Used to keep track of which metadata area the data in | ||
113 | * 'chunk' refers to. | ||
114 | */ | ||
115 | chunk_t current_area; | ||
116 | |||
117 | /* | ||
118 | * The next free chunk for an exception. | ||
119 | */ | ||
120 | chunk_t next_free; | ||
121 | |||
122 | /* | ||
123 | * The index of next free exception in the current | ||
124 | * metadata area. | ||
125 | */ | ||
126 | uint32_t current_committed; | ||
127 | |||
128 | atomic_t pending_count; | ||
129 | uint32_t callback_count; | ||
130 | struct commit_callback *callbacks; | ||
131 | struct dm_io_client *io_client; | ||
132 | |||
133 | struct workqueue_struct *metadata_wq; | ||
134 | }; | ||
135 | |||
136 | static unsigned sectors_to_pages(unsigned sectors) | ||
137 | { | ||
138 | return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); | ||
139 | } | ||
140 | |||
141 | static int alloc_area(struct pstore *ps) | ||
142 | { | ||
143 | int r = -ENOMEM; | ||
144 | size_t len; | ||
145 | |||
146 | len = ps->snap->chunk_size << SECTOR_SHIFT; | ||
147 | |||
148 | /* | ||
149 | * Allocate the chunk_size block of memory that will hold | ||
150 | * a single metadata area. | ||
151 | */ | ||
152 | ps->area = vmalloc(len); | ||
153 | if (!ps->area) | ||
154 | return r; | ||
155 | |||
156 | ps->zero_area = vmalloc(len); | ||
157 | if (!ps->zero_area) { | ||
158 | vfree(ps->area); | ||
159 | return r; | ||
160 | } | ||
161 | memset(ps->zero_area, 0, len); | ||
162 | |||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | static void free_area(struct pstore *ps) | ||
167 | { | ||
168 | vfree(ps->area); | ||
169 | ps->area = NULL; | ||
170 | vfree(ps->zero_area); | ||
171 | ps->zero_area = NULL; | ||
172 | } | ||
173 | |||
174 | struct mdata_req { | ||
175 | struct dm_io_region *where; | ||
176 | struct dm_io_request *io_req; | ||
177 | struct work_struct work; | ||
178 | int result; | ||
179 | }; | ||
180 | |||
181 | static void do_metadata(struct work_struct *work) | ||
182 | { | ||
183 | struct mdata_req *req = container_of(work, struct mdata_req, work); | ||
184 | |||
185 | req->result = dm_io(req->io_req, 1, req->where, NULL); | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Read or write a chunk aligned and sized block of data from a device. | ||
190 | */ | ||
191 | static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) | ||
192 | { | ||
193 | struct dm_io_region where = { | ||
194 | .bdev = ps->snap->cow->bdev, | ||
195 | .sector = ps->snap->chunk_size * chunk, | ||
196 | .count = ps->snap->chunk_size, | ||
197 | }; | ||
198 | struct dm_io_request io_req = { | ||
199 | .bi_rw = rw, | ||
200 | .mem.type = DM_IO_VMA, | ||
201 | .mem.ptr.vma = ps->area, | ||
202 | .client = ps->io_client, | ||
203 | .notify.fn = NULL, | ||
204 | }; | ||
205 | struct mdata_req req; | ||
206 | |||
207 | if (!metadata) | ||
208 | return dm_io(&io_req, 1, &where, NULL); | ||
209 | |||
210 | req.where = &where; | ||
211 | req.io_req = &io_req; | ||
212 | |||
213 | /* | ||
214 | * Issue the synchronous I/O from a different thread | ||
215 | * to avoid generic_make_request recursion. | ||
216 | */ | ||
217 | INIT_WORK(&req.work, do_metadata); | ||
218 | queue_work(ps->metadata_wq, &req.work); | ||
219 | flush_workqueue(ps->metadata_wq); | ||
220 | |||
221 | return req.result; | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * Convert a metadata area index to a chunk index. | ||
226 | */ | ||
227 | static chunk_t area_location(struct pstore *ps, chunk_t area) | ||
228 | { | ||
229 | return 1 + ((ps->exceptions_per_area + 1) * area); | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * Read or write a metadata area. Remembering to skip the first | ||
234 | * chunk which holds the header. | ||
235 | */ | ||
236 | static int area_io(struct pstore *ps, int rw) | ||
237 | { | ||
238 | int r; | ||
239 | chunk_t chunk; | ||
240 | |||
241 | chunk = area_location(ps, ps->current_area); | ||
242 | |||
243 | r = chunk_io(ps, chunk, rw, 0); | ||
244 | if (r) | ||
245 | return r; | ||
246 | |||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static void zero_memory_area(struct pstore *ps) | ||
251 | { | ||
252 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
253 | } | ||
254 | |||
255 | static int zero_disk_area(struct pstore *ps, chunk_t area) | ||
256 | { | ||
257 | struct dm_io_region where = { | ||
258 | .bdev = ps->snap->cow->bdev, | ||
259 | .sector = ps->snap->chunk_size * area_location(ps, area), | ||
260 | .count = ps->snap->chunk_size, | ||
261 | }; | ||
262 | struct dm_io_request io_req = { | ||
263 | .bi_rw = WRITE, | ||
264 | .mem.type = DM_IO_VMA, | ||
265 | .mem.ptr.vma = ps->zero_area, | ||
266 | .client = ps->io_client, | ||
267 | .notify.fn = NULL, | ||
268 | }; | ||
269 | |||
270 | return dm_io(&io_req, 1, &where, NULL); | ||
271 | } | ||
272 | |||
273 | static int read_header(struct pstore *ps, int *new_snapshot) | ||
274 | { | ||
275 | int r; | ||
276 | struct disk_header *dh; | ||
277 | chunk_t chunk_size; | ||
278 | int chunk_size_supplied = 1; | ||
279 | |||
280 | /* | ||
281 | * Use default chunk size (or hardsect_size, if larger) if none supplied | ||
282 | */ | ||
283 | if (!ps->snap->chunk_size) { | ||
284 | ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, | ||
285 | bdev_hardsect_size(ps->snap->cow->bdev) >> 9); | ||
286 | ps->snap->chunk_mask = ps->snap->chunk_size - 1; | ||
287 | ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; | ||
288 | chunk_size_supplied = 0; | ||
289 | } | ||
290 | |||
291 | ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> | ||
292 | chunk_size)); | ||
293 | if (IS_ERR(ps->io_client)) | ||
294 | return PTR_ERR(ps->io_client); | ||
295 | |||
296 | r = alloc_area(ps); | ||
297 | if (r) | ||
298 | return r; | ||
299 | |||
300 | r = chunk_io(ps, 0, READ, 1); | ||
301 | if (r) | ||
302 | goto bad; | ||
303 | |||
304 | dh = (struct disk_header *) ps->area; | ||
305 | |||
306 | if (le32_to_cpu(dh->magic) == 0) { | ||
307 | *new_snapshot = 1; | ||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { | ||
312 | DMWARN("Invalid or corrupt snapshot"); | ||
313 | r = -ENXIO; | ||
314 | goto bad; | ||
315 | } | ||
316 | |||
317 | *new_snapshot = 0; | ||
318 | ps->valid = le32_to_cpu(dh->valid); | ||
319 | ps->version = le32_to_cpu(dh->version); | ||
320 | chunk_size = le32_to_cpu(dh->chunk_size); | ||
321 | |||
322 | if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) | ||
323 | return 0; | ||
324 | |||
325 | DMWARN("chunk size %llu in device metadata overrides " | ||
326 | "table chunk size of %llu.", | ||
327 | (unsigned long long)chunk_size, | ||
328 | (unsigned long long)ps->snap->chunk_size); | ||
329 | |||
330 | /* We had a bogus chunk_size. Fix stuff up. */ | ||
331 | free_area(ps); | ||
332 | |||
333 | ps->snap->chunk_size = chunk_size; | ||
334 | ps->snap->chunk_mask = chunk_size - 1; | ||
335 | ps->snap->chunk_shift = ffs(chunk_size) - 1; | ||
336 | |||
337 | r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), | ||
338 | ps->io_client); | ||
339 | if (r) | ||
340 | return r; | ||
341 | |||
342 | r = alloc_area(ps); | ||
343 | return r; | ||
344 | |||
345 | bad: | ||
346 | free_area(ps); | ||
347 | return r; | ||
348 | } | ||
349 | |||
350 | static int write_header(struct pstore *ps) | ||
351 | { | ||
352 | struct disk_header *dh; | ||
353 | |||
354 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
355 | |||
356 | dh = (struct disk_header *) ps->area; | ||
357 | dh->magic = cpu_to_le32(SNAP_MAGIC); | ||
358 | dh->valid = cpu_to_le32(ps->valid); | ||
359 | dh->version = cpu_to_le32(ps->version); | ||
360 | dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); | ||
361 | |||
362 | return chunk_io(ps, 0, WRITE, 1); | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * Access functions for the disk exceptions, these do the endian conversions. | ||
367 | */ | ||
368 | static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | ||
369 | { | ||
370 | BUG_ON(index >= ps->exceptions_per_area); | ||
371 | |||
372 | return ((struct disk_exception *) ps->area) + index; | ||
373 | } | ||
374 | 14 | ||
375 | static void read_exception(struct pstore *ps, | 15 | #define DM_MSG_PREFIX "snapshot exception stores" |
376 | uint32_t index, struct disk_exception *result) | ||
377 | { | ||
378 | struct disk_exception *e = get_exception(ps, index); | ||
379 | |||
380 | /* copy it */ | ||
381 | result->old_chunk = le64_to_cpu(e->old_chunk); | ||
382 | result->new_chunk = le64_to_cpu(e->new_chunk); | ||
383 | } | ||
384 | |||
385 | static void write_exception(struct pstore *ps, | ||
386 | uint32_t index, struct disk_exception *de) | ||
387 | { | ||
388 | struct disk_exception *e = get_exception(ps, index); | ||
389 | |||
390 | /* copy it */ | ||
391 | e->old_chunk = cpu_to_le64(de->old_chunk); | ||
392 | e->new_chunk = cpu_to_le64(de->new_chunk); | ||
393 | } | ||
394 | 16 | ||
395 | /* | 17 | int dm_exception_store_init(void) |
396 | * Registers the exceptions that are present in the current area. | ||
397 | * 'full' is filled in to indicate if the area has been | ||
398 | * filled. | ||
399 | */ | ||
400 | static int insert_exceptions(struct pstore *ps, int *full) | ||
401 | { | 18 | { |
402 | int r; | 19 | int r; |
403 | unsigned int i; | ||
404 | struct disk_exception de; | ||
405 | |||
406 | /* presume the area is full */ | ||
407 | *full = 1; | ||
408 | |||
409 | for (i = 0; i < ps->exceptions_per_area; i++) { | ||
410 | read_exception(ps, i, &de); | ||
411 | |||
412 | /* | ||
413 | * If the new_chunk is pointing at the start of | ||
414 | * the COW device, where the first metadata area | ||
415 | * is we know that we've hit the end of the | ||
416 | * exceptions. Therefore the area is not full. | ||
417 | */ | ||
418 | if (de.new_chunk == 0LL) { | ||
419 | ps->current_committed = i; | ||
420 | *full = 0; | ||
421 | break; | ||
422 | } | ||
423 | |||
424 | /* | ||
425 | * Keep track of the start of the free chunks. | ||
426 | */ | ||
427 | if (ps->next_free <= de.new_chunk) | ||
428 | ps->next_free = de.new_chunk + 1; | ||
429 | |||
430 | /* | ||
431 | * Otherwise we add the exception to the snapshot. | ||
432 | */ | ||
433 | r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); | ||
434 | if (r) | ||
435 | return r; | ||
436 | } | ||
437 | |||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | static int read_exceptions(struct pstore *ps) | ||
442 | { | ||
443 | int r, full = 1; | ||
444 | |||
445 | /* | ||
446 | * Keeping reading chunks and inserting exceptions until | ||
447 | * we find a partially full area. | ||
448 | */ | ||
449 | for (ps->current_area = 0; full; ps->current_area++) { | ||
450 | r = area_io(ps, READ); | ||
451 | if (r) | ||
452 | return r; | ||
453 | 20 | ||
454 | r = insert_exceptions(ps, &full); | 21 | r = dm_transient_snapshot_init(); |
455 | if (r) | 22 | if (r) { |
456 | return r; | 23 | DMERR("Unable to register transient exception store type."); |
24 | goto transient_fail; | ||
457 | } | 25 | } |
458 | 26 | ||
459 | ps->current_area--; | 27 | r = dm_persistent_snapshot_init(); |
460 | 28 | if (r) { | |
461 | return 0; | 29 | DMERR("Unable to register persistent exception store type"); |
462 | } | 30 | goto persistent_fail; |
463 | |||
464 | static struct pstore *get_info(struct exception_store *store) | ||
465 | { | ||
466 | return (struct pstore *) store->context; | ||
467 | } | ||
468 | |||
469 | static void persistent_fraction_full(struct exception_store *store, | ||
470 | sector_t *numerator, sector_t *denominator) | ||
471 | { | ||
472 | *numerator = get_info(store)->next_free * store->snap->chunk_size; | ||
473 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
474 | } | ||
475 | |||
476 | static void persistent_destroy(struct exception_store *store) | ||
477 | { | ||
478 | struct pstore *ps = get_info(store); | ||
479 | |||
480 | destroy_workqueue(ps->metadata_wq); | ||
481 | dm_io_client_destroy(ps->io_client); | ||
482 | vfree(ps->callbacks); | ||
483 | free_area(ps); | ||
484 | kfree(ps); | ||
485 | } | ||
486 | |||
487 | static int persistent_read_metadata(struct exception_store *store) | ||
488 | { | ||
489 | int r, uninitialized_var(new_snapshot); | ||
490 | struct pstore *ps = get_info(store); | ||
491 | |||
492 | /* | ||
493 | * Read the snapshot header. | ||
494 | */ | ||
495 | r = read_header(ps, &new_snapshot); | ||
496 | if (r) | ||
497 | return r; | ||
498 | |||
499 | /* | ||
500 | * Now we know correct chunk_size, complete the initialisation. | ||
501 | */ | ||
502 | ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / | ||
503 | sizeof(struct disk_exception); | ||
504 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | ||
505 | sizeof(*ps->callbacks)); | ||
506 | if (!ps->callbacks) | ||
507 | return -ENOMEM; | ||
508 | |||
509 | /* | ||
510 | * Do we need to setup a new snapshot ? | ||
511 | */ | ||
512 | if (new_snapshot) { | ||
513 | r = write_header(ps); | ||
514 | if (r) { | ||
515 | DMWARN("write_header failed"); | ||
516 | return r; | ||
517 | } | ||
518 | |||
519 | ps->current_area = 0; | ||
520 | zero_memory_area(ps); | ||
521 | r = zero_disk_area(ps, 0); | ||
522 | if (r) { | ||
523 | DMWARN("zero_disk_area(0) failed"); | ||
524 | return r; | ||
525 | } | ||
526 | } else { | ||
527 | /* | ||
528 | * Sanity checks. | ||
529 | */ | ||
530 | if (ps->version != SNAPSHOT_DISK_VERSION) { | ||
531 | DMWARN("unable to handle snapshot disk version %d", | ||
532 | ps->version); | ||
533 | return -EINVAL; | ||
534 | } | ||
535 | |||
536 | /* | ||
537 | * Metadata are valid, but snapshot is invalidated | ||
538 | */ | ||
539 | if (!ps->valid) | ||
540 | return 1; | ||
541 | |||
542 | /* | ||
543 | * Read the metadata. | ||
544 | */ | ||
545 | r = read_exceptions(ps); | ||
546 | if (r) | ||
547 | return r; | ||
548 | } | 31 | } |
549 | 32 | ||
550 | return 0; | 33 | return 0; |
551 | } | ||
552 | |||
553 | static int persistent_prepare(struct exception_store *store, | ||
554 | struct dm_snap_exception *e) | ||
555 | { | ||
556 | struct pstore *ps = get_info(store); | ||
557 | uint32_t stride; | ||
558 | chunk_t next_free; | ||
559 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
560 | |||
561 | /* Is there enough room ? */ | ||
562 | if (size < ((ps->next_free + 1) * store->snap->chunk_size)) | ||
563 | return -ENOSPC; | ||
564 | 34 | ||
565 | e->new_chunk = ps->next_free; | 35 | persistent_fail: |
566 | 36 | dm_persistent_snapshot_exit(); | |
567 | /* | 37 | transient_fail: |
568 | * Move onto the next free pending, making sure to take | 38 | return r; |
569 | * into account the location of the metadata chunks. | ||
570 | */ | ||
571 | stride = (ps->exceptions_per_area + 1); | ||
572 | next_free = ++ps->next_free; | ||
573 | if (sector_div(next_free, stride) == 1) | ||
574 | ps->next_free++; | ||
575 | |||
576 | atomic_inc(&ps->pending_count); | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | static void persistent_commit(struct exception_store *store, | ||
581 | struct dm_snap_exception *e, | ||
582 | void (*callback) (void *, int success), | ||
583 | void *callback_context) | ||
584 | { | ||
585 | unsigned int i; | ||
586 | struct pstore *ps = get_info(store); | ||
587 | struct disk_exception de; | ||
588 | struct commit_callback *cb; | ||
589 | |||
590 | de.old_chunk = e->old_chunk; | ||
591 | de.new_chunk = e->new_chunk; | ||
592 | write_exception(ps, ps->current_committed++, &de); | ||
593 | |||
594 | /* | ||
595 | * Add the callback to the back of the array. This code | ||
596 | * is the only place where the callback array is | ||
597 | * manipulated, and we know that it will never be called | ||
598 | * multiple times concurrently. | ||
599 | */ | ||
600 | cb = ps->callbacks + ps->callback_count++; | ||
601 | cb->callback = callback; | ||
602 | cb->context = callback_context; | ||
603 | |||
604 | /* | ||
605 | * If there are exceptions in flight and we have not yet | ||
606 | * filled this metadata area there's nothing more to do. | ||
607 | */ | ||
608 | if (!atomic_dec_and_test(&ps->pending_count) && | ||
609 | (ps->current_committed != ps->exceptions_per_area)) | ||
610 | return; | ||
611 | |||
612 | /* | ||
613 | * If we completely filled the current area, then wipe the next one. | ||
614 | */ | ||
615 | if ((ps->current_committed == ps->exceptions_per_area) && | ||
616 | zero_disk_area(ps, ps->current_area + 1)) | ||
617 | ps->valid = 0; | ||
618 | |||
619 | /* | ||
620 | * Commit exceptions to disk. | ||
621 | */ | ||
622 | if (ps->valid && area_io(ps, WRITE)) | ||
623 | ps->valid = 0; | ||
624 | |||
625 | /* | ||
626 | * Advance to the next area if this one is full. | ||
627 | */ | ||
628 | if (ps->current_committed == ps->exceptions_per_area) { | ||
629 | ps->current_committed = 0; | ||
630 | ps->current_area++; | ||
631 | zero_memory_area(ps); | ||
632 | } | ||
633 | |||
634 | for (i = 0; i < ps->callback_count; i++) { | ||
635 | cb = ps->callbacks + i; | ||
636 | cb->callback(cb->context, ps->valid); | ||
637 | } | ||
638 | |||
639 | ps->callback_count = 0; | ||
640 | } | ||
641 | |||
642 | static void persistent_drop(struct exception_store *store) | ||
643 | { | ||
644 | struct pstore *ps = get_info(store); | ||
645 | |||
646 | ps->valid = 0; | ||
647 | if (write_header(ps)) | ||
648 | DMWARN("write header failed"); | ||
649 | } | ||
650 | |||
651 | int dm_create_persistent(struct exception_store *store) | ||
652 | { | ||
653 | struct pstore *ps; | ||
654 | |||
655 | /* allocate the pstore */ | ||
656 | ps = kmalloc(sizeof(*ps), GFP_KERNEL); | ||
657 | if (!ps) | ||
658 | return -ENOMEM; | ||
659 | |||
660 | ps->snap = store->snap; | ||
661 | ps->valid = 1; | ||
662 | ps->version = SNAPSHOT_DISK_VERSION; | ||
663 | ps->area = NULL; | ||
664 | ps->next_free = 2; /* skipping the header and first area */ | ||
665 | ps->current_committed = 0; | ||
666 | |||
667 | ps->callback_count = 0; | ||
668 | atomic_set(&ps->pending_count, 0); | ||
669 | ps->callbacks = NULL; | ||
670 | |||
671 | ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); | ||
672 | if (!ps->metadata_wq) { | ||
673 | kfree(ps); | ||
674 | DMERR("couldn't start header metadata update thread"); | ||
675 | return -ENOMEM; | ||
676 | } | ||
677 | |||
678 | store->destroy = persistent_destroy; | ||
679 | store->read_metadata = persistent_read_metadata; | ||
680 | store->prepare_exception = persistent_prepare; | ||
681 | store->commit_exception = persistent_commit; | ||
682 | store->drop_snapshot = persistent_drop; | ||
683 | store->fraction_full = persistent_fraction_full; | ||
684 | store->context = ps; | ||
685 | |||
686 | return 0; | ||
687 | } | ||
688 | |||
689 | /*----------------------------------------------------------------- | ||
690 | * Implementation of the store for non-persistent snapshots. | ||
691 | *---------------------------------------------------------------*/ | ||
692 | struct transient_c { | ||
693 | sector_t next_free; | ||
694 | }; | ||
695 | |||
696 | static void transient_destroy(struct exception_store *store) | ||
697 | { | ||
698 | kfree(store->context); | ||
699 | } | ||
700 | |||
701 | static int transient_read_metadata(struct exception_store *store) | ||
702 | { | ||
703 | return 0; | ||
704 | } | ||
705 | |||
706 | static int transient_prepare(struct exception_store *store, | ||
707 | struct dm_snap_exception *e) | ||
708 | { | ||
709 | struct transient_c *tc = (struct transient_c *) store->context; | ||
710 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
711 | |||
712 | if (size < (tc->next_free + store->snap->chunk_size)) | ||
713 | return -1; | ||
714 | |||
715 | e->new_chunk = sector_to_chunk(store->snap, tc->next_free); | ||
716 | tc->next_free += store->snap->chunk_size; | ||
717 | |||
718 | return 0; | ||
719 | } | ||
720 | |||
721 | static void transient_commit(struct exception_store *store, | ||
722 | struct dm_snap_exception *e, | ||
723 | void (*callback) (void *, int success), | ||
724 | void *callback_context) | ||
725 | { | ||
726 | /* Just succeed */ | ||
727 | callback(callback_context, 1); | ||
728 | } | ||
729 | |||
730 | static void transient_fraction_full(struct exception_store *store, | ||
731 | sector_t *numerator, sector_t *denominator) | ||
732 | { | ||
733 | *numerator = ((struct transient_c *) store->context)->next_free; | ||
734 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
735 | } | 39 | } |
736 | 40 | ||
737 | int dm_create_transient(struct exception_store *store) | 41 | void dm_exception_store_exit(void) |
738 | { | 42 | { |
739 | struct transient_c *tc; | 43 | dm_persistent_snapshot_exit(); |
740 | 44 | dm_transient_snapshot_exit(); | |
741 | store->destroy = transient_destroy; | ||
742 | store->read_metadata = transient_read_metadata; | ||
743 | store->prepare_exception = transient_prepare; | ||
744 | store->commit_exception = transient_commit; | ||
745 | store->drop_snapshot = NULL; | ||
746 | store->fraction_full = transient_fraction_full; | ||
747 | |||
748 | tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); | ||
749 | if (!tc) | ||
750 | return -ENOMEM; | ||
751 | |||
752 | tc->next_free = 0; | ||
753 | store->context = tc; | ||
754 | |||
755 | return 0; | ||
756 | } | 45 | } |
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h new file mode 100644 index 000000000000..bb9f33d5daa2 --- /dev/null +++ b/drivers/md/dm-exception-store.h | |||
@@ -0,0 +1,148 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * Device-mapper snapshot exception store. | ||
6 | * | ||
7 | * This file is released under the GPL. | ||
8 | */ | ||
9 | |||
10 | #ifndef _LINUX_DM_EXCEPTION_STORE | ||
11 | #define _LINUX_DM_EXCEPTION_STORE | ||
12 | |||
13 | #include <linux/blkdev.h> | ||
14 | #include <linux/device-mapper.h> | ||
15 | |||
16 | /* | ||
17 | * The snapshot code deals with largish chunks of the disk at a | ||
18 | * time. Typically 32k - 512k. | ||
19 | */ | ||
20 | typedef sector_t chunk_t; | ||
21 | |||
22 | /* | ||
23 | * An exception is used where an old chunk of data has been | ||
24 | * replaced by a new one. | ||
25 | * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number | ||
26 | * of chunks that follow contiguously. Remaining bits hold the number of the | ||
27 | * chunk within the device. | ||
28 | */ | ||
29 | struct dm_snap_exception { | ||
30 | struct list_head hash_list; | ||
31 | |||
32 | chunk_t old_chunk; | ||
33 | chunk_t new_chunk; | ||
34 | }; | ||
35 | |||
36 | /* | ||
37 | * Abstraction to handle the meta/layout of exception stores (the | ||
38 | * COW device). | ||
39 | */ | ||
40 | struct dm_exception_store { | ||
41 | /* | ||
42 | * Destroys this object when you've finished with it. | ||
43 | */ | ||
44 | void (*destroy) (struct dm_exception_store *store); | ||
45 | |||
46 | /* | ||
47 | * The target shouldn't read the COW device until this is | ||
48 | * called. As exceptions are read from the COW, they are | ||
49 | * reported back via the callback. | ||
50 | */ | ||
51 | int (*read_metadata) (struct dm_exception_store *store, | ||
52 | int (*callback)(void *callback_context, | ||
53 | chunk_t old, chunk_t new), | ||
54 | void *callback_context); | ||
55 | |||
56 | /* | ||
57 | * Find somewhere to store the next exception. | ||
58 | */ | ||
59 | int (*prepare_exception) (struct dm_exception_store *store, | ||
60 | struct dm_snap_exception *e); | ||
61 | |||
62 | /* | ||
63 | * Update the metadata with this exception. | ||
64 | */ | ||
65 | void (*commit_exception) (struct dm_exception_store *store, | ||
66 | struct dm_snap_exception *e, | ||
67 | void (*callback) (void *, int success), | ||
68 | void *callback_context); | ||
69 | |||
70 | /* | ||
71 | * The snapshot is invalid, note this in the metadata. | ||
72 | */ | ||
73 | void (*drop_snapshot) (struct dm_exception_store *store); | ||
74 | |||
75 | int (*status) (struct dm_exception_store *store, status_type_t status, | ||
76 | char *result, unsigned int maxlen); | ||
77 | |||
78 | /* | ||
79 | * Return how full the snapshot is. | ||
80 | */ | ||
81 | void (*fraction_full) (struct dm_exception_store *store, | ||
82 | sector_t *numerator, | ||
83 | sector_t *denominator); | ||
84 | |||
85 | struct dm_snapshot *snap; | ||
86 | void *context; | ||
87 | }; | ||
88 | |||
89 | /* | ||
90 | * Funtions to manipulate consecutive chunks | ||
91 | */ | ||
92 | # if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) | ||
93 | # define DM_CHUNK_CONSECUTIVE_BITS 8 | ||
94 | # define DM_CHUNK_NUMBER_BITS 56 | ||
95 | |||
96 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
97 | { | ||
98 | return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); | ||
99 | } | ||
100 | |||
101 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
102 | { | ||
103 | return e->new_chunk >> DM_CHUNK_NUMBER_BITS; | ||
104 | } | ||
105 | |||
106 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
107 | { | ||
108 | e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); | ||
109 | |||
110 | BUG_ON(!dm_consecutive_chunk_count(e)); | ||
111 | } | ||
112 | |||
113 | # else | ||
114 | # define DM_CHUNK_CONSECUTIVE_BITS 0 | ||
115 | |||
116 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
117 | { | ||
118 | return chunk; | ||
119 | } | ||
120 | |||
121 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
122 | { | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
127 | { | ||
128 | } | ||
129 | |||
130 | # endif | ||
131 | |||
132 | int dm_exception_store_init(void); | ||
133 | void dm_exception_store_exit(void); | ||
134 | |||
135 | /* | ||
136 | * Two exception store implementations. | ||
137 | */ | ||
138 | int dm_persistent_snapshot_init(void); | ||
139 | void dm_persistent_snapshot_exit(void); | ||
140 | |||
141 | int dm_transient_snapshot_init(void); | ||
142 | void dm_transient_snapshot_exit(void); | ||
143 | |||
144 | int dm_create_persistent(struct dm_exception_store *store); | ||
145 | |||
146 | int dm_create_transient(struct dm_exception_store *store); | ||
147 | |||
148 | #endif /* _LINUX_DM_EXCEPTION_STORE */ | ||
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 2fd6d4450637..a34338567a2a 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -56,7 +56,7 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages) | |||
56 | if (!client->pool) | 56 | if (!client->pool) |
57 | goto bad; | 57 | goto bad; |
58 | 58 | ||
59 | client->bios = bioset_create(16, 16); | 59 | client->bios = bioset_create(16, 0); |
60 | if (!client->bios) | 60 | if (!client->bios) |
61 | goto bad; | 61 | goto bad; |
62 | 62 | ||
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 777c948180f9..54d0588fc1f6 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
@@ -233,7 +233,7 @@ static void __hash_remove(struct hash_cell *hc) | |||
233 | } | 233 | } |
234 | 234 | ||
235 | if (hc->new_map) | 235 | if (hc->new_map) |
236 | dm_table_put(hc->new_map); | 236 | dm_table_destroy(hc->new_map); |
237 | dm_put(hc->md); | 237 | dm_put(hc->md); |
238 | free_cell(hc); | 238 | free_cell(hc); |
239 | } | 239 | } |
@@ -827,8 +827,8 @@ static int do_resume(struct dm_ioctl *param) | |||
827 | 827 | ||
828 | r = dm_swap_table(md, new_map); | 828 | r = dm_swap_table(md, new_map); |
829 | if (r) { | 829 | if (r) { |
830 | dm_table_destroy(new_map); | ||
830 | dm_put(md); | 831 | dm_put(md); |
831 | dm_table_put(new_map); | ||
832 | return r; | 832 | return r; |
833 | } | 833 | } |
834 | 834 | ||
@@ -836,8 +836,6 @@ static int do_resume(struct dm_ioctl *param) | |||
836 | set_disk_ro(dm_disk(md), 0); | 836 | set_disk_ro(dm_disk(md), 0); |
837 | else | 837 | else |
838 | set_disk_ro(dm_disk(md), 1); | 838 | set_disk_ro(dm_disk(md), 1); |
839 | |||
840 | dm_table_put(new_map); | ||
841 | } | 839 | } |
842 | 840 | ||
843 | if (dm_suspended(md)) | 841 | if (dm_suspended(md)) |
@@ -1080,7 +1078,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size) | |||
1080 | } | 1078 | } |
1081 | 1079 | ||
1082 | if (hc->new_map) | 1080 | if (hc->new_map) |
1083 | dm_table_put(hc->new_map); | 1081 | dm_table_destroy(hc->new_map); |
1084 | hc->new_map = t; | 1082 | hc->new_map = t; |
1085 | up_write(&_hash_lock); | 1083 | up_write(&_hash_lock); |
1086 | 1084 | ||
@@ -1109,7 +1107,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) | |||
1109 | } | 1107 | } |
1110 | 1108 | ||
1111 | if (hc->new_map) { | 1109 | if (hc->new_map) { |
1112 | dm_table_put(hc->new_map); | 1110 | dm_table_destroy(hc->new_map); |
1113 | hc->new_map = NULL; | 1111 | hc->new_map = NULL; |
1114 | } | 1112 | } |
1115 | 1113 | ||
@@ -1550,8 +1548,10 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid) | |||
1550 | goto out; | 1548 | goto out; |
1551 | } | 1549 | } |
1552 | 1550 | ||
1553 | strcpy(name, hc->name); | 1551 | if (name) |
1554 | strcpy(uuid, hc->uuid ? : ""); | 1552 | strcpy(name, hc->name); |
1553 | if (uuid) | ||
1554 | strcpy(uuid, hc->uuid ? : ""); | ||
1555 | 1555 | ||
1556 | out: | 1556 | out: |
1557 | up_read(&_hash_lock); | 1557 | up_read(&_hash_lock); |
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 44042becad8a..bfa107f59d96 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c | |||
@@ -142,6 +142,7 @@ static struct target_type linear_target = { | |||
142 | .status = linear_status, | 142 | .status = linear_status, |
143 | .ioctl = linear_ioctl, | 143 | .ioctl = linear_ioctl, |
144 | .merge = linear_merge, | 144 | .merge = linear_merge, |
145 | .features = DM_TARGET_SUPPORTS_BARRIERS, | ||
145 | }; | 146 | }; |
146 | 147 | ||
147 | int __init dm_linear_init(void) | 148 | int __init dm_linear_init(void) |
@@ -156,8 +157,5 @@ int __init dm_linear_init(void) | |||
156 | 157 | ||
157 | void dm_linear_exit(void) | 158 | void dm_linear_exit(void) |
158 | { | 159 | { |
159 | int r = dm_unregister_target(&linear_target); | 160 | dm_unregister_target(&linear_target); |
160 | |||
161 | if (r < 0) | ||
162 | DMERR("unregister failed %d", r); | ||
163 | } | 161 | } |
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index a8c0fc79ca78..737961f275c1 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -326,8 +326,6 @@ static void header_from_disk(struct log_header *core, struct log_header *disk) | |||
326 | static int rw_header(struct log_c *lc, int rw) | 326 | static int rw_header(struct log_c *lc, int rw) |
327 | { | 327 | { |
328 | lc->io_req.bi_rw = rw; | 328 | lc->io_req.bi_rw = rw; |
329 | lc->io_req.mem.ptr.vma = lc->disk_header; | ||
330 | lc->io_req.notify.fn = NULL; | ||
331 | 329 | ||
332 | return dm_io(&lc->io_req, 1, &lc->header_location, NULL); | 330 | return dm_io(&lc->io_req, 1, &lc->header_location, NULL); |
333 | } | 331 | } |
@@ -362,10 +360,15 @@ static int read_header(struct log_c *log) | |||
362 | return 0; | 360 | return 0; |
363 | } | 361 | } |
364 | 362 | ||
365 | static inline int write_header(struct log_c *log) | 363 | static int _check_region_size(struct dm_target *ti, uint32_t region_size) |
366 | { | 364 | { |
367 | header_to_disk(&log->header, log->disk_header); | 365 | if (region_size < 2 || region_size > ti->len) |
368 | return rw_header(log, WRITE); | 366 | return 0; |
367 | |||
368 | if (!is_power_of_2(region_size)) | ||
369 | return 0; | ||
370 | |||
371 | return 1; | ||
369 | } | 372 | } |
370 | 373 | ||
371 | /*---------------------------------------------------------------- | 374 | /*---------------------------------------------------------------- |
@@ -403,8 +406,9 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
403 | } | 406 | } |
404 | } | 407 | } |
405 | 408 | ||
406 | if (sscanf(argv[0], "%u", ®ion_size) != 1) { | 409 | if (sscanf(argv[0], "%u", ®ion_size) != 1 || |
407 | DMWARN("invalid region size string"); | 410 | !_check_region_size(ti, region_size)) { |
411 | DMWARN("invalid region size %s", argv[0]); | ||
408 | return -EINVAL; | 412 | return -EINVAL; |
409 | } | 413 | } |
410 | 414 | ||
@@ -453,8 +457,18 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
453 | */ | 457 | */ |
454 | buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + | 458 | buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + |
455 | bitset_size, ti->limits.hardsect_size); | 459 | bitset_size, ti->limits.hardsect_size); |
460 | |||
461 | if (buf_size > dev->bdev->bd_inode->i_size) { | ||
462 | DMWARN("log device %s too small: need %llu bytes", | ||
463 | dev->name, (unsigned long long)buf_size); | ||
464 | kfree(lc); | ||
465 | return -EINVAL; | ||
466 | } | ||
467 | |||
456 | lc->header_location.count = buf_size >> SECTOR_SHIFT; | 468 | lc->header_location.count = buf_size >> SECTOR_SHIFT; |
469 | |||
457 | lc->io_req.mem.type = DM_IO_VMA; | 470 | lc->io_req.mem.type = DM_IO_VMA; |
471 | lc->io_req.notify.fn = NULL; | ||
458 | lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, | 472 | lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, |
459 | PAGE_SIZE)); | 473 | PAGE_SIZE)); |
460 | if (IS_ERR(lc->io_req.client)) { | 474 | if (IS_ERR(lc->io_req.client)) { |
@@ -467,10 +481,12 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
467 | lc->disk_header = vmalloc(buf_size); | 481 | lc->disk_header = vmalloc(buf_size); |
468 | if (!lc->disk_header) { | 482 | if (!lc->disk_header) { |
469 | DMWARN("couldn't allocate disk log buffer"); | 483 | DMWARN("couldn't allocate disk log buffer"); |
484 | dm_io_client_destroy(lc->io_req.client); | ||
470 | kfree(lc); | 485 | kfree(lc); |
471 | return -ENOMEM; | 486 | return -ENOMEM; |
472 | } | 487 | } |
473 | 488 | ||
489 | lc->io_req.mem.ptr.vma = lc->disk_header; | ||
474 | lc->clean_bits = (void *)lc->disk_header + | 490 | lc->clean_bits = (void *)lc->disk_header + |
475 | (LOG_OFFSET << SECTOR_SHIFT); | 491 | (LOG_OFFSET << SECTOR_SHIFT); |
476 | } | 492 | } |
@@ -482,6 +498,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
482 | DMWARN("couldn't allocate sync bitset"); | 498 | DMWARN("couldn't allocate sync bitset"); |
483 | if (!dev) | 499 | if (!dev) |
484 | vfree(lc->clean_bits); | 500 | vfree(lc->clean_bits); |
501 | else | ||
502 | dm_io_client_destroy(lc->io_req.client); | ||
485 | vfree(lc->disk_header); | 503 | vfree(lc->disk_header); |
486 | kfree(lc); | 504 | kfree(lc); |
487 | return -ENOMEM; | 505 | return -ENOMEM; |
@@ -495,6 +513,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
495 | vfree(lc->sync_bits); | 513 | vfree(lc->sync_bits); |
496 | if (!dev) | 514 | if (!dev) |
497 | vfree(lc->clean_bits); | 515 | vfree(lc->clean_bits); |
516 | else | ||
517 | dm_io_client_destroy(lc->io_req.client); | ||
498 | vfree(lc->disk_header); | 518 | vfree(lc->disk_header); |
499 | kfree(lc); | 519 | kfree(lc); |
500 | return -ENOMEM; | 520 | return -ENOMEM; |
@@ -631,8 +651,10 @@ static int disk_resume(struct dm_dirty_log *log) | |||
631 | /* set the correct number of regions in the header */ | 651 | /* set the correct number of regions in the header */ |
632 | lc->header.nr_regions = lc->region_count; | 652 | lc->header.nr_regions = lc->region_count; |
633 | 653 | ||
654 | header_to_disk(&lc->header, lc->disk_header); | ||
655 | |||
634 | /* write the new header */ | 656 | /* write the new header */ |
635 | r = write_header(lc); | 657 | r = rw_header(lc, WRITE); |
636 | if (r) { | 658 | if (r) { |
637 | DMWARN("%s: Failed to write header on dirty region log device", | 659 | DMWARN("%s: Failed to write header on dirty region log device", |
638 | lc->log_dev->name); | 660 | lc->log_dev->name); |
@@ -682,7 +704,7 @@ static int disk_flush(struct dm_dirty_log *log) | |||
682 | if (!lc->touched) | 704 | if (!lc->touched) |
683 | return 0; | 705 | return 0; |
684 | 706 | ||
685 | r = write_header(lc); | 707 | r = rw_header(lc, WRITE); |
686 | if (r) | 708 | if (r) |
687 | fail_log_device(lc); | 709 | fail_log_device(lc); |
688 | else | 710 | else |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3d7f4923cd13..095f77bf9681 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -889,7 +889,7 @@ static int fail_path(struct pgpath *pgpath) | |||
889 | dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, | 889 | dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, |
890 | pgpath->path.dev->name, m->nr_valid_paths); | 890 | pgpath->path.dev->name, m->nr_valid_paths); |
891 | 891 | ||
892 | queue_work(kmultipathd, &m->trigger_event); | 892 | schedule_work(&m->trigger_event); |
893 | queue_work(kmultipathd, &pgpath->deactivate_path); | 893 | queue_work(kmultipathd, &pgpath->deactivate_path); |
894 | 894 | ||
895 | out: | 895 | out: |
@@ -932,7 +932,7 @@ static int reinstate_path(struct pgpath *pgpath) | |||
932 | dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, | 932 | dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, |
933 | pgpath->path.dev->name, m->nr_valid_paths); | 933 | pgpath->path.dev->name, m->nr_valid_paths); |
934 | 934 | ||
935 | queue_work(kmultipathd, &m->trigger_event); | 935 | schedule_work(&m->trigger_event); |
936 | 936 | ||
937 | out: | 937 | out: |
938 | spin_unlock_irqrestore(&m->lock, flags); | 938 | spin_unlock_irqrestore(&m->lock, flags); |
@@ -976,7 +976,7 @@ static void bypass_pg(struct multipath *m, struct priority_group *pg, | |||
976 | 976 | ||
977 | spin_unlock_irqrestore(&m->lock, flags); | 977 | spin_unlock_irqrestore(&m->lock, flags); |
978 | 978 | ||
979 | queue_work(kmultipathd, &m->trigger_event); | 979 | schedule_work(&m->trigger_event); |
980 | } | 980 | } |
981 | 981 | ||
982 | /* | 982 | /* |
@@ -1006,7 +1006,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr) | |||
1006 | } | 1006 | } |
1007 | spin_unlock_irqrestore(&m->lock, flags); | 1007 | spin_unlock_irqrestore(&m->lock, flags); |
1008 | 1008 | ||
1009 | queue_work(kmultipathd, &m->trigger_event); | 1009 | schedule_work(&m->trigger_event); |
1010 | return 0; | 1010 | return 0; |
1011 | } | 1011 | } |
1012 | 1012 | ||
@@ -1495,14 +1495,10 @@ static int __init dm_multipath_init(void) | |||
1495 | 1495 | ||
1496 | static void __exit dm_multipath_exit(void) | 1496 | static void __exit dm_multipath_exit(void) |
1497 | { | 1497 | { |
1498 | int r; | ||
1499 | |||
1500 | destroy_workqueue(kmpath_handlerd); | 1498 | destroy_workqueue(kmpath_handlerd); |
1501 | destroy_workqueue(kmultipathd); | 1499 | destroy_workqueue(kmultipathd); |
1502 | 1500 | ||
1503 | r = dm_unregister_target(&multipath_target); | 1501 | dm_unregister_target(&multipath_target); |
1504 | if (r < 0) | ||
1505 | DMERR("target unregister failed %d", r); | ||
1506 | kmem_cache_destroy(_mpio_cache); | 1502 | kmem_cache_destroy(_mpio_cache); |
1507 | } | 1503 | } |
1508 | 1504 | ||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ec43f9fa4b2a..4d6bc101962e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -197,9 +197,6 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) | |||
197 | struct mirror_set *ms = m->ms; | 197 | struct mirror_set *ms = m->ms; |
198 | struct mirror *new; | 198 | struct mirror *new; |
199 | 199 | ||
200 | if (!errors_handled(ms)) | ||
201 | return; | ||
202 | |||
203 | /* | 200 | /* |
204 | * error_count is used for nothing more than a | 201 | * error_count is used for nothing more than a |
205 | * simple way to tell if a device has encountered | 202 | * simple way to tell if a device has encountered |
@@ -210,6 +207,9 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) | |||
210 | if (test_and_set_bit(error_type, &m->error_type)) | 207 | if (test_and_set_bit(error_type, &m->error_type)) |
211 | return; | 208 | return; |
212 | 209 | ||
210 | if (!errors_handled(ms)) | ||
211 | return; | ||
212 | |||
213 | if (m != get_default_mirror(ms)) | 213 | if (m != get_default_mirror(ms)) |
214 | goto out; | 214 | goto out; |
215 | 215 | ||
@@ -808,12 +808,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, | |||
808 | kfree(ms); | 808 | kfree(ms); |
809 | } | 809 | } |
810 | 810 | ||
811 | static inline int _check_region_size(struct dm_target *ti, uint32_t size) | ||
812 | { | ||
813 | return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) || | ||
814 | size > ti->len); | ||
815 | } | ||
816 | |||
817 | static int get_mirror(struct mirror_set *ms, struct dm_target *ti, | 811 | static int get_mirror(struct mirror_set *ms, struct dm_target *ti, |
818 | unsigned int mirror, char **argv) | 812 | unsigned int mirror, char **argv) |
819 | { | 813 | { |
@@ -872,12 +866,6 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, | |||
872 | return NULL; | 866 | return NULL; |
873 | } | 867 | } |
874 | 868 | ||
875 | if (!_check_region_size(ti, dl->type->get_region_size(dl))) { | ||
876 | ti->error = "Invalid region size"; | ||
877 | dm_dirty_log_destroy(dl); | ||
878 | return NULL; | ||
879 | } | ||
880 | |||
881 | return dl; | 869 | return dl; |
882 | } | 870 | } |
883 | 871 | ||
@@ -1300,11 +1288,7 @@ static int __init dm_mirror_init(void) | |||
1300 | 1288 | ||
1301 | static void __exit dm_mirror_exit(void) | 1289 | static void __exit dm_mirror_exit(void) |
1302 | { | 1290 | { |
1303 | int r; | 1291 | dm_unregister_target(&mirror_target); |
1304 | |||
1305 | r = dm_unregister_target(&mirror_target); | ||
1306 | if (r < 0) | ||
1307 | DMERR("unregister failed %d", r); | ||
1308 | } | 1292 | } |
1309 | 1293 | ||
1310 | /* Module hooks */ | 1294 | /* Module hooks */ |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c new file mode 100644 index 000000000000..936b34e0959f --- /dev/null +++ b/drivers/md/dm-snap-persistent.c | |||
@@ -0,0 +1,704 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
3 | * Copyright (C) 2006-2008 Red Hat GmbH | ||
4 | * | ||
5 | * This file is released under the GPL. | ||
6 | */ | ||
7 | |||
8 | #include "dm-exception-store.h" | ||
9 | #include "dm-snap.h" | ||
10 | |||
11 | #include <linux/mm.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/vmalloc.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/dm-io.h> | ||
16 | |||
17 | #define DM_MSG_PREFIX "persistent snapshot" | ||
18 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | ||
19 | |||
20 | /*----------------------------------------------------------------- | ||
21 | * Persistent snapshots, by persistent we mean that the snapshot | ||
22 | * will survive a reboot. | ||
23 | *---------------------------------------------------------------*/ | ||
24 | |||
25 | /* | ||
26 | * We need to store a record of which parts of the origin have | ||
27 | * been copied to the snapshot device. The snapshot code | ||
28 | * requires that we copy exception chunks to chunk aligned areas | ||
29 | * of the COW store. It makes sense therefore, to store the | ||
30 | * metadata in chunk size blocks. | ||
31 | * | ||
32 | * There is no backward or forward compatibility implemented, | ||
33 | * snapshots with different disk versions than the kernel will | ||
34 | * not be usable. It is expected that "lvcreate" will blank out | ||
35 | * the start of a fresh COW device before calling the snapshot | ||
36 | * constructor. | ||
37 | * | ||
38 | * The first chunk of the COW device just contains the header. | ||
39 | * After this there is a chunk filled with exception metadata, | ||
40 | * followed by as many exception chunks as can fit in the | ||
41 | * metadata areas. | ||
42 | * | ||
43 | * All on disk structures are in little-endian format. The end | ||
44 | * of the exceptions info is indicated by an exception with a | ||
45 | * new_chunk of 0, which is invalid since it would point to the | ||
46 | * header chunk. | ||
47 | */ | ||
48 | |||
49 | /* | ||
50 | * Magic for persistent snapshots: "SnAp" - Feeble isn't it. | ||
51 | */ | ||
52 | #define SNAP_MAGIC 0x70416e53 | ||
53 | |||
54 | /* | ||
55 | * The on-disk version of the metadata. | ||
56 | */ | ||
57 | #define SNAPSHOT_DISK_VERSION 1 | ||
58 | |||
59 | struct disk_header { | ||
60 | uint32_t magic; | ||
61 | |||
62 | /* | ||
63 | * Is this snapshot valid. There is no way of recovering | ||
64 | * an invalid snapshot. | ||
65 | */ | ||
66 | uint32_t valid; | ||
67 | |||
68 | /* | ||
69 | * Simple, incrementing version. no backward | ||
70 | * compatibility. | ||
71 | */ | ||
72 | uint32_t version; | ||
73 | |||
74 | /* In sectors */ | ||
75 | uint32_t chunk_size; | ||
76 | }; | ||
77 | |||
78 | struct disk_exception { | ||
79 | uint64_t old_chunk; | ||
80 | uint64_t new_chunk; | ||
81 | }; | ||
82 | |||
83 | struct commit_callback { | ||
84 | void (*callback)(void *, int success); | ||
85 | void *context; | ||
86 | }; | ||
87 | |||
88 | /* | ||
89 | * The top level structure for a persistent exception store. | ||
90 | */ | ||
91 | struct pstore { | ||
92 | struct dm_snapshot *snap; /* up pointer to my snapshot */ | ||
93 | int version; | ||
94 | int valid; | ||
95 | uint32_t exceptions_per_area; | ||
96 | |||
97 | /* | ||
98 | * Now that we have an asynchronous kcopyd there is no | ||
99 | * need for large chunk sizes, so it wont hurt to have a | ||
100 | * whole chunks worth of metadata in memory at once. | ||
101 | */ | ||
102 | void *area; | ||
103 | |||
104 | /* | ||
105 | * An area of zeros used to clear the next area. | ||
106 | */ | ||
107 | void *zero_area; | ||
108 | |||
109 | /* | ||
110 | * Used to keep track of which metadata area the data in | ||
111 | * 'chunk' refers to. | ||
112 | */ | ||
113 | chunk_t current_area; | ||
114 | |||
115 | /* | ||
116 | * The next free chunk for an exception. | ||
117 | */ | ||
118 | chunk_t next_free; | ||
119 | |||
120 | /* | ||
121 | * The index of next free exception in the current | ||
122 | * metadata area. | ||
123 | */ | ||
124 | uint32_t current_committed; | ||
125 | |||
126 | atomic_t pending_count; | ||
127 | uint32_t callback_count; | ||
128 | struct commit_callback *callbacks; | ||
129 | struct dm_io_client *io_client; | ||
130 | |||
131 | struct workqueue_struct *metadata_wq; | ||
132 | }; | ||
133 | |||
134 | static unsigned sectors_to_pages(unsigned sectors) | ||
135 | { | ||
136 | return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); | ||
137 | } | ||
138 | |||
139 | static int alloc_area(struct pstore *ps) | ||
140 | { | ||
141 | int r = -ENOMEM; | ||
142 | size_t len; | ||
143 | |||
144 | len = ps->snap->chunk_size << SECTOR_SHIFT; | ||
145 | |||
146 | /* | ||
147 | * Allocate the chunk_size block of memory that will hold | ||
148 | * a single metadata area. | ||
149 | */ | ||
150 | ps->area = vmalloc(len); | ||
151 | if (!ps->area) | ||
152 | return r; | ||
153 | |||
154 | ps->zero_area = vmalloc(len); | ||
155 | if (!ps->zero_area) { | ||
156 | vfree(ps->area); | ||
157 | return r; | ||
158 | } | ||
159 | memset(ps->zero_area, 0, len); | ||
160 | |||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | static void free_area(struct pstore *ps) | ||
165 | { | ||
166 | vfree(ps->area); | ||
167 | ps->area = NULL; | ||
168 | vfree(ps->zero_area); | ||
169 | ps->zero_area = NULL; | ||
170 | } | ||
171 | |||
172 | struct mdata_req { | ||
173 | struct dm_io_region *where; | ||
174 | struct dm_io_request *io_req; | ||
175 | struct work_struct work; | ||
176 | int result; | ||
177 | }; | ||
178 | |||
179 | static void do_metadata(struct work_struct *work) | ||
180 | { | ||
181 | struct mdata_req *req = container_of(work, struct mdata_req, work); | ||
182 | |||
183 | req->result = dm_io(req->io_req, 1, req->where, NULL); | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Read or write a chunk aligned and sized block of data from a device. | ||
188 | */ | ||
189 | static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) | ||
190 | { | ||
191 | struct dm_io_region where = { | ||
192 | .bdev = ps->snap->cow->bdev, | ||
193 | .sector = ps->snap->chunk_size * chunk, | ||
194 | .count = ps->snap->chunk_size, | ||
195 | }; | ||
196 | struct dm_io_request io_req = { | ||
197 | .bi_rw = rw, | ||
198 | .mem.type = DM_IO_VMA, | ||
199 | .mem.ptr.vma = ps->area, | ||
200 | .client = ps->io_client, | ||
201 | .notify.fn = NULL, | ||
202 | }; | ||
203 | struct mdata_req req; | ||
204 | |||
205 | if (!metadata) | ||
206 | return dm_io(&io_req, 1, &where, NULL); | ||
207 | |||
208 | req.where = &where; | ||
209 | req.io_req = &io_req; | ||
210 | |||
211 | /* | ||
212 | * Issue the synchronous I/O from a different thread | ||
213 | * to avoid generic_make_request recursion. | ||
214 | */ | ||
215 | INIT_WORK(&req.work, do_metadata); | ||
216 | queue_work(ps->metadata_wq, &req.work); | ||
217 | flush_workqueue(ps->metadata_wq); | ||
218 | |||
219 | return req.result; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Convert a metadata area index to a chunk index. | ||
224 | */ | ||
225 | static chunk_t area_location(struct pstore *ps, chunk_t area) | ||
226 | { | ||
227 | return 1 + ((ps->exceptions_per_area + 1) * area); | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Read or write a metadata area. Remembering to skip the first | ||
232 | * chunk which holds the header. | ||
233 | */ | ||
234 | static int area_io(struct pstore *ps, int rw) | ||
235 | { | ||
236 | int r; | ||
237 | chunk_t chunk; | ||
238 | |||
239 | chunk = area_location(ps, ps->current_area); | ||
240 | |||
241 | r = chunk_io(ps, chunk, rw, 0); | ||
242 | if (r) | ||
243 | return r; | ||
244 | |||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | static void zero_memory_area(struct pstore *ps) | ||
249 | { | ||
250 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
251 | } | ||
252 | |||
253 | static int zero_disk_area(struct pstore *ps, chunk_t area) | ||
254 | { | ||
255 | struct dm_io_region where = { | ||
256 | .bdev = ps->snap->cow->bdev, | ||
257 | .sector = ps->snap->chunk_size * area_location(ps, area), | ||
258 | .count = ps->snap->chunk_size, | ||
259 | }; | ||
260 | struct dm_io_request io_req = { | ||
261 | .bi_rw = WRITE, | ||
262 | .mem.type = DM_IO_VMA, | ||
263 | .mem.ptr.vma = ps->zero_area, | ||
264 | .client = ps->io_client, | ||
265 | .notify.fn = NULL, | ||
266 | }; | ||
267 | |||
268 | return dm_io(&io_req, 1, &where, NULL); | ||
269 | } | ||
270 | |||
271 | static int read_header(struct pstore *ps, int *new_snapshot) | ||
272 | { | ||
273 | int r; | ||
274 | struct disk_header *dh; | ||
275 | chunk_t chunk_size; | ||
276 | int chunk_size_supplied = 1; | ||
277 | |||
278 | /* | ||
279 | * Use default chunk size (or hardsect_size, if larger) if none supplied | ||
280 | */ | ||
281 | if (!ps->snap->chunk_size) { | ||
282 | ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, | ||
283 | bdev_hardsect_size(ps->snap->cow->bdev) >> 9); | ||
284 | ps->snap->chunk_mask = ps->snap->chunk_size - 1; | ||
285 | ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; | ||
286 | chunk_size_supplied = 0; | ||
287 | } | ||
288 | |||
289 | ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> | ||
290 | chunk_size)); | ||
291 | if (IS_ERR(ps->io_client)) | ||
292 | return PTR_ERR(ps->io_client); | ||
293 | |||
294 | r = alloc_area(ps); | ||
295 | if (r) | ||
296 | return r; | ||
297 | |||
298 | r = chunk_io(ps, 0, READ, 1); | ||
299 | if (r) | ||
300 | goto bad; | ||
301 | |||
302 | dh = (struct disk_header *) ps->area; | ||
303 | |||
304 | if (le32_to_cpu(dh->magic) == 0) { | ||
305 | *new_snapshot = 1; | ||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { | ||
310 | DMWARN("Invalid or corrupt snapshot"); | ||
311 | r = -ENXIO; | ||
312 | goto bad; | ||
313 | } | ||
314 | |||
315 | *new_snapshot = 0; | ||
316 | ps->valid = le32_to_cpu(dh->valid); | ||
317 | ps->version = le32_to_cpu(dh->version); | ||
318 | chunk_size = le32_to_cpu(dh->chunk_size); | ||
319 | |||
320 | if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) | ||
321 | return 0; | ||
322 | |||
323 | DMWARN("chunk size %llu in device metadata overrides " | ||
324 | "table chunk size of %llu.", | ||
325 | (unsigned long long)chunk_size, | ||
326 | (unsigned long long)ps->snap->chunk_size); | ||
327 | |||
328 | /* We had a bogus chunk_size. Fix stuff up. */ | ||
329 | free_area(ps); | ||
330 | |||
331 | ps->snap->chunk_size = chunk_size; | ||
332 | ps->snap->chunk_mask = chunk_size - 1; | ||
333 | ps->snap->chunk_shift = ffs(chunk_size) - 1; | ||
334 | |||
335 | r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), | ||
336 | ps->io_client); | ||
337 | if (r) | ||
338 | return r; | ||
339 | |||
340 | r = alloc_area(ps); | ||
341 | return r; | ||
342 | |||
343 | bad: | ||
344 | free_area(ps); | ||
345 | return r; | ||
346 | } | ||
347 | |||
348 | static int write_header(struct pstore *ps) | ||
349 | { | ||
350 | struct disk_header *dh; | ||
351 | |||
352 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | ||
353 | |||
354 | dh = (struct disk_header *) ps->area; | ||
355 | dh->magic = cpu_to_le32(SNAP_MAGIC); | ||
356 | dh->valid = cpu_to_le32(ps->valid); | ||
357 | dh->version = cpu_to_le32(ps->version); | ||
358 | dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); | ||
359 | |||
360 | return chunk_io(ps, 0, WRITE, 1); | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * Access functions for the disk exceptions, these do the endian conversions. | ||
365 | */ | ||
366 | static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | ||
367 | { | ||
368 | BUG_ON(index >= ps->exceptions_per_area); | ||
369 | |||
370 | return ((struct disk_exception *) ps->area) + index; | ||
371 | } | ||
372 | |||
373 | static void read_exception(struct pstore *ps, | ||
374 | uint32_t index, struct disk_exception *result) | ||
375 | { | ||
376 | struct disk_exception *e = get_exception(ps, index); | ||
377 | |||
378 | /* copy it */ | ||
379 | result->old_chunk = le64_to_cpu(e->old_chunk); | ||
380 | result->new_chunk = le64_to_cpu(e->new_chunk); | ||
381 | } | ||
382 | |||
383 | static void write_exception(struct pstore *ps, | ||
384 | uint32_t index, struct disk_exception *de) | ||
385 | { | ||
386 | struct disk_exception *e = get_exception(ps, index); | ||
387 | |||
388 | /* copy it */ | ||
389 | e->old_chunk = cpu_to_le64(de->old_chunk); | ||
390 | e->new_chunk = cpu_to_le64(de->new_chunk); | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * Registers the exceptions that are present in the current area. | ||
395 | * 'full' is filled in to indicate if the area has been | ||
396 | * filled. | ||
397 | */ | ||
398 | static int insert_exceptions(struct pstore *ps, | ||
399 | int (*callback)(void *callback_context, | ||
400 | chunk_t old, chunk_t new), | ||
401 | void *callback_context, | ||
402 | int *full) | ||
403 | { | ||
404 | int r; | ||
405 | unsigned int i; | ||
406 | struct disk_exception de; | ||
407 | |||
408 | /* presume the area is full */ | ||
409 | *full = 1; | ||
410 | |||
411 | for (i = 0; i < ps->exceptions_per_area; i++) { | ||
412 | read_exception(ps, i, &de); | ||
413 | |||
414 | /* | ||
415 | * If the new_chunk is pointing at the start of | ||
416 | * the COW device, where the first metadata area | ||
417 | * is we know that we've hit the end of the | ||
418 | * exceptions. Therefore the area is not full. | ||
419 | */ | ||
420 | if (de.new_chunk == 0LL) { | ||
421 | ps->current_committed = i; | ||
422 | *full = 0; | ||
423 | break; | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Keep track of the start of the free chunks. | ||
428 | */ | ||
429 | if (ps->next_free <= de.new_chunk) | ||
430 | ps->next_free = de.new_chunk + 1; | ||
431 | |||
432 | /* | ||
433 | * Otherwise we add the exception to the snapshot. | ||
434 | */ | ||
435 | r = callback(callback_context, de.old_chunk, de.new_chunk); | ||
436 | if (r) | ||
437 | return r; | ||
438 | } | ||
439 | |||
440 | return 0; | ||
441 | } | ||
442 | |||
443 | static int read_exceptions(struct pstore *ps, | ||
444 | int (*callback)(void *callback_context, chunk_t old, | ||
445 | chunk_t new), | ||
446 | void *callback_context) | ||
447 | { | ||
448 | int r, full = 1; | ||
449 | |||
450 | /* | ||
451 | * Keeping reading chunks and inserting exceptions until | ||
452 | * we find a partially full area. | ||
453 | */ | ||
454 | for (ps->current_area = 0; full; ps->current_area++) { | ||
455 | r = area_io(ps, READ); | ||
456 | if (r) | ||
457 | return r; | ||
458 | |||
459 | r = insert_exceptions(ps, callback, callback_context, &full); | ||
460 | if (r) | ||
461 | return r; | ||
462 | } | ||
463 | |||
464 | ps->current_area--; | ||
465 | |||
466 | return 0; | ||
467 | } | ||
468 | |||
469 | static struct pstore *get_info(struct dm_exception_store *store) | ||
470 | { | ||
471 | return (struct pstore *) store->context; | ||
472 | } | ||
473 | |||
474 | static void persistent_fraction_full(struct dm_exception_store *store, | ||
475 | sector_t *numerator, sector_t *denominator) | ||
476 | { | ||
477 | *numerator = get_info(store)->next_free * store->snap->chunk_size; | ||
478 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
479 | } | ||
480 | |||
481 | static void persistent_destroy(struct dm_exception_store *store) | ||
482 | { | ||
483 | struct pstore *ps = get_info(store); | ||
484 | |||
485 | destroy_workqueue(ps->metadata_wq); | ||
486 | dm_io_client_destroy(ps->io_client); | ||
487 | vfree(ps->callbacks); | ||
488 | free_area(ps); | ||
489 | kfree(ps); | ||
490 | } | ||
491 | |||
492 | static int persistent_read_metadata(struct dm_exception_store *store, | ||
493 | int (*callback)(void *callback_context, | ||
494 | chunk_t old, chunk_t new), | ||
495 | void *callback_context) | ||
496 | { | ||
497 | int r, uninitialized_var(new_snapshot); | ||
498 | struct pstore *ps = get_info(store); | ||
499 | |||
500 | /* | ||
501 | * Read the snapshot header. | ||
502 | */ | ||
503 | r = read_header(ps, &new_snapshot); | ||
504 | if (r) | ||
505 | return r; | ||
506 | |||
507 | /* | ||
508 | * Now we know correct chunk_size, complete the initialisation. | ||
509 | */ | ||
510 | ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / | ||
511 | sizeof(struct disk_exception); | ||
512 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | ||
513 | sizeof(*ps->callbacks)); | ||
514 | if (!ps->callbacks) | ||
515 | return -ENOMEM; | ||
516 | |||
517 | /* | ||
518 | * Do we need to setup a new snapshot ? | ||
519 | */ | ||
520 | if (new_snapshot) { | ||
521 | r = write_header(ps); | ||
522 | if (r) { | ||
523 | DMWARN("write_header failed"); | ||
524 | return r; | ||
525 | } | ||
526 | |||
527 | ps->current_area = 0; | ||
528 | zero_memory_area(ps); | ||
529 | r = zero_disk_area(ps, 0); | ||
530 | if (r) { | ||
531 | DMWARN("zero_disk_area(0) failed"); | ||
532 | return r; | ||
533 | } | ||
534 | } else { | ||
535 | /* | ||
536 | * Sanity checks. | ||
537 | */ | ||
538 | if (ps->version != SNAPSHOT_DISK_VERSION) { | ||
539 | DMWARN("unable to handle snapshot disk version %d", | ||
540 | ps->version); | ||
541 | return -EINVAL; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * Metadata are valid, but snapshot is invalidated | ||
546 | */ | ||
547 | if (!ps->valid) | ||
548 | return 1; | ||
549 | |||
550 | /* | ||
551 | * Read the metadata. | ||
552 | */ | ||
553 | r = read_exceptions(ps, callback, callback_context); | ||
554 | if (r) | ||
555 | return r; | ||
556 | } | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | static int persistent_prepare_exception(struct dm_exception_store *store, | ||
562 | struct dm_snap_exception *e) | ||
563 | { | ||
564 | struct pstore *ps = get_info(store); | ||
565 | uint32_t stride; | ||
566 | chunk_t next_free; | ||
567 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
568 | |||
569 | /* Is there enough room ? */ | ||
570 | if (size < ((ps->next_free + 1) * store->snap->chunk_size)) | ||
571 | return -ENOSPC; | ||
572 | |||
573 | e->new_chunk = ps->next_free; | ||
574 | |||
575 | /* | ||
576 | * Move onto the next free pending, making sure to take | ||
577 | * into account the location of the metadata chunks. | ||
578 | */ | ||
579 | stride = (ps->exceptions_per_area + 1); | ||
580 | next_free = ++ps->next_free; | ||
581 | if (sector_div(next_free, stride) == 1) | ||
582 | ps->next_free++; | ||
583 | |||
584 | atomic_inc(&ps->pending_count); | ||
585 | return 0; | ||
586 | } | ||
587 | |||
588 | static void persistent_commit_exception(struct dm_exception_store *store, | ||
589 | struct dm_snap_exception *e, | ||
590 | void (*callback) (void *, int success), | ||
591 | void *callback_context) | ||
592 | { | ||
593 | unsigned int i; | ||
594 | struct pstore *ps = get_info(store); | ||
595 | struct disk_exception de; | ||
596 | struct commit_callback *cb; | ||
597 | |||
598 | de.old_chunk = e->old_chunk; | ||
599 | de.new_chunk = e->new_chunk; | ||
600 | write_exception(ps, ps->current_committed++, &de); | ||
601 | |||
602 | /* | ||
603 | * Add the callback to the back of the array. This code | ||
604 | * is the only place where the callback array is | ||
605 | * manipulated, and we know that it will never be called | ||
606 | * multiple times concurrently. | ||
607 | */ | ||
608 | cb = ps->callbacks + ps->callback_count++; | ||
609 | cb->callback = callback; | ||
610 | cb->context = callback_context; | ||
611 | |||
612 | /* | ||
613 | * If there are exceptions in flight and we have not yet | ||
614 | * filled this metadata area there's nothing more to do. | ||
615 | */ | ||
616 | if (!atomic_dec_and_test(&ps->pending_count) && | ||
617 | (ps->current_committed != ps->exceptions_per_area)) | ||
618 | return; | ||
619 | |||
620 | /* | ||
621 | * If we completely filled the current area, then wipe the next one. | ||
622 | */ | ||
623 | if ((ps->current_committed == ps->exceptions_per_area) && | ||
624 | zero_disk_area(ps, ps->current_area + 1)) | ||
625 | ps->valid = 0; | ||
626 | |||
627 | /* | ||
628 | * Commit exceptions to disk. | ||
629 | */ | ||
630 | if (ps->valid && area_io(ps, WRITE)) | ||
631 | ps->valid = 0; | ||
632 | |||
633 | /* | ||
634 | * Advance to the next area if this one is full. | ||
635 | */ | ||
636 | if (ps->current_committed == ps->exceptions_per_area) { | ||
637 | ps->current_committed = 0; | ||
638 | ps->current_area++; | ||
639 | zero_memory_area(ps); | ||
640 | } | ||
641 | |||
642 | for (i = 0; i < ps->callback_count; i++) { | ||
643 | cb = ps->callbacks + i; | ||
644 | cb->callback(cb->context, ps->valid); | ||
645 | } | ||
646 | |||
647 | ps->callback_count = 0; | ||
648 | } | ||
649 | |||
650 | static void persistent_drop_snapshot(struct dm_exception_store *store) | ||
651 | { | ||
652 | struct pstore *ps = get_info(store); | ||
653 | |||
654 | ps->valid = 0; | ||
655 | if (write_header(ps)) | ||
656 | DMWARN("write header failed"); | ||
657 | } | ||
658 | |||
659 | int dm_create_persistent(struct dm_exception_store *store) | ||
660 | { | ||
661 | struct pstore *ps; | ||
662 | |||
663 | /* allocate the pstore */ | ||
664 | ps = kmalloc(sizeof(*ps), GFP_KERNEL); | ||
665 | if (!ps) | ||
666 | return -ENOMEM; | ||
667 | |||
668 | ps->snap = store->snap; | ||
669 | ps->valid = 1; | ||
670 | ps->version = SNAPSHOT_DISK_VERSION; | ||
671 | ps->area = NULL; | ||
672 | ps->next_free = 2; /* skipping the header and first area */ | ||
673 | ps->current_committed = 0; | ||
674 | |||
675 | ps->callback_count = 0; | ||
676 | atomic_set(&ps->pending_count, 0); | ||
677 | ps->callbacks = NULL; | ||
678 | |||
679 | ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); | ||
680 | if (!ps->metadata_wq) { | ||
681 | kfree(ps); | ||
682 | DMERR("couldn't start header metadata update thread"); | ||
683 | return -ENOMEM; | ||
684 | } | ||
685 | |||
686 | store->destroy = persistent_destroy; | ||
687 | store->read_metadata = persistent_read_metadata; | ||
688 | store->prepare_exception = persistent_prepare_exception; | ||
689 | store->commit_exception = persistent_commit_exception; | ||
690 | store->drop_snapshot = persistent_drop_snapshot; | ||
691 | store->fraction_full = persistent_fraction_full; | ||
692 | store->context = ps; | ||
693 | |||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | int dm_persistent_snapshot_init(void) | ||
698 | { | ||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | void dm_persistent_snapshot_exit(void) | ||
703 | { | ||
704 | } | ||
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c new file mode 100644 index 000000000000..7f6e2e6dcb0d --- /dev/null +++ b/drivers/md/dm-snap-transient.c | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | ||
3 | * Copyright (C) 2006-2008 Red Hat GmbH | ||
4 | * | ||
5 | * This file is released under the GPL. | ||
6 | */ | ||
7 | |||
8 | #include "dm-exception-store.h" | ||
9 | #include "dm-snap.h" | ||
10 | |||
11 | #include <linux/mm.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/vmalloc.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/dm-io.h> | ||
16 | |||
17 | #define DM_MSG_PREFIX "transient snapshot" | ||
18 | |||
19 | /*----------------------------------------------------------------- | ||
20 | * Implementation of the store for non-persistent snapshots. | ||
21 | *---------------------------------------------------------------*/ | ||
22 | struct transient_c { | ||
23 | sector_t next_free; | ||
24 | }; | ||
25 | |||
26 | static void transient_destroy(struct dm_exception_store *store) | ||
27 | { | ||
28 | kfree(store->context); | ||
29 | } | ||
30 | |||
31 | static int transient_read_metadata(struct dm_exception_store *store, | ||
32 | int (*callback)(void *callback_context, | ||
33 | chunk_t old, chunk_t new), | ||
34 | void *callback_context) | ||
35 | { | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | static int transient_prepare_exception(struct dm_exception_store *store, | ||
40 | struct dm_snap_exception *e) | ||
41 | { | ||
42 | struct transient_c *tc = (struct transient_c *) store->context; | ||
43 | sector_t size = get_dev_size(store->snap->cow->bdev); | ||
44 | |||
45 | if (size < (tc->next_free + store->snap->chunk_size)) | ||
46 | return -1; | ||
47 | |||
48 | e->new_chunk = sector_to_chunk(store->snap, tc->next_free); | ||
49 | tc->next_free += store->snap->chunk_size; | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | static void transient_commit_exception(struct dm_exception_store *store, | ||
55 | struct dm_snap_exception *e, | ||
56 | void (*callback) (void *, int success), | ||
57 | void *callback_context) | ||
58 | { | ||
59 | /* Just succeed */ | ||
60 | callback(callback_context, 1); | ||
61 | } | ||
62 | |||
63 | static void transient_fraction_full(struct dm_exception_store *store, | ||
64 | sector_t *numerator, sector_t *denominator) | ||
65 | { | ||
66 | *numerator = ((struct transient_c *) store->context)->next_free; | ||
67 | *denominator = get_dev_size(store->snap->cow->bdev); | ||
68 | } | ||
69 | |||
70 | int dm_create_transient(struct dm_exception_store *store) | ||
71 | { | ||
72 | struct transient_c *tc; | ||
73 | |||
74 | store->destroy = transient_destroy; | ||
75 | store->read_metadata = transient_read_metadata; | ||
76 | store->prepare_exception = transient_prepare_exception; | ||
77 | store->commit_exception = transient_commit_exception; | ||
78 | store->drop_snapshot = NULL; | ||
79 | store->fraction_full = transient_fraction_full; | ||
80 | |||
81 | tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); | ||
82 | if (!tc) | ||
83 | return -ENOMEM; | ||
84 | |||
85 | tc->next_free = 0; | ||
86 | store->context = tc; | ||
87 | |||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | int dm_transient_snapshot_init(void) | ||
92 | { | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | void dm_transient_snapshot_exit(void) | ||
97 | { | ||
98 | } | ||
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6c96db26b87c..65ff82ff124e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/blkdev.h> | 9 | #include <linux/blkdev.h> |
10 | #include <linux/ctype.h> | 10 | #include <linux/ctype.h> |
11 | #include <linux/device-mapper.h> | 11 | #include <linux/device-mapper.h> |
12 | #include <linux/delay.h> | ||
12 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
13 | #include <linux/init.h> | 14 | #include <linux/init.h> |
14 | #include <linux/kdev_t.h> | 15 | #include <linux/kdev_t.h> |
@@ -20,6 +21,7 @@ | |||
20 | #include <linux/log2.h> | 21 | #include <linux/log2.h> |
21 | #include <linux/dm-kcopyd.h> | 22 | #include <linux/dm-kcopyd.h> |
22 | 23 | ||
24 | #include "dm-exception-store.h" | ||
23 | #include "dm-snap.h" | 25 | #include "dm-snap.h" |
24 | #include "dm-bio-list.h" | 26 | #include "dm-bio-list.h" |
25 | 27 | ||
@@ -428,8 +430,13 @@ out: | |||
428 | list_add(&new_e->hash_list, e ? &e->hash_list : l); | 430 | list_add(&new_e->hash_list, e ? &e->hash_list : l); |
429 | } | 431 | } |
430 | 432 | ||
431 | int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) | 433 | /* |
434 | * Callback used by the exception stores to load exceptions when | ||
435 | * initialising. | ||
436 | */ | ||
437 | static int dm_add_exception(void *context, chunk_t old, chunk_t new) | ||
432 | { | 438 | { |
439 | struct dm_snapshot *s = context; | ||
433 | struct dm_snap_exception *e; | 440 | struct dm_snap_exception *e; |
434 | 441 | ||
435 | e = alloc_exception(); | 442 | e = alloc_exception(); |
@@ -658,7 +665,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
658 | spin_lock_init(&s->tracked_chunk_lock); | 665 | spin_lock_init(&s->tracked_chunk_lock); |
659 | 666 | ||
660 | /* Metadata must only be loaded into one table at once */ | 667 | /* Metadata must only be loaded into one table at once */ |
661 | r = s->store.read_metadata(&s->store); | 668 | r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s); |
662 | if (r < 0) { | 669 | if (r < 0) { |
663 | ti->error = "Failed to read snapshot metadata"; | 670 | ti->error = "Failed to read snapshot metadata"; |
664 | goto bad_load_and_register; | 671 | goto bad_load_and_register; |
@@ -735,7 +742,7 @@ static void snapshot_dtr(struct dm_target *ti) | |||
735 | unregister_snapshot(s); | 742 | unregister_snapshot(s); |
736 | 743 | ||
737 | while (atomic_read(&s->pending_exceptions_count)) | 744 | while (atomic_read(&s->pending_exceptions_count)) |
738 | yield(); | 745 | msleep(1); |
739 | /* | 746 | /* |
740 | * Ensure instructions in mempool_destroy aren't reordered | 747 | * Ensure instructions in mempool_destroy aren't reordered |
741 | * before atomic_read. | 748 | * before atomic_read. |
@@ -888,10 +895,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) | |||
888 | 895 | ||
889 | /* | 896 | /* |
890 | * Check for conflicting reads. This is extremely improbable, | 897 | * Check for conflicting reads. This is extremely improbable, |
891 | * so yield() is sufficient and there is no need for a wait queue. | 898 | * so msleep(1) is sufficient and there is no need for a wait queue. |
892 | */ | 899 | */ |
893 | while (__chunk_is_tracked(s, pe->e.old_chunk)) | 900 | while (__chunk_is_tracked(s, pe->e.old_chunk)) |
894 | yield(); | 901 | msleep(1); |
895 | 902 | ||
896 | /* | 903 | /* |
897 | * Add a proper exception, and remove the | 904 | * Add a proper exception, and remove the |
@@ -1404,6 +1411,12 @@ static int __init dm_snapshot_init(void) | |||
1404 | { | 1411 | { |
1405 | int r; | 1412 | int r; |
1406 | 1413 | ||
1414 | r = dm_exception_store_init(); | ||
1415 | if (r) { | ||
1416 | DMERR("Failed to initialize exception stores"); | ||
1417 | return r; | ||
1418 | } | ||
1419 | |||
1407 | r = dm_register_target(&snapshot_target); | 1420 | r = dm_register_target(&snapshot_target); |
1408 | if (r) { | 1421 | if (r) { |
1409 | DMERR("snapshot target register failed %d", r); | 1422 | DMERR("snapshot target register failed %d", r); |
@@ -1452,39 +1465,34 @@ static int __init dm_snapshot_init(void) | |||
1452 | 1465 | ||
1453 | return 0; | 1466 | return 0; |
1454 | 1467 | ||
1455 | bad_pending_pool: | 1468 | bad_pending_pool: |
1456 | kmem_cache_destroy(tracked_chunk_cache); | 1469 | kmem_cache_destroy(tracked_chunk_cache); |
1457 | bad5: | 1470 | bad5: |
1458 | kmem_cache_destroy(pending_cache); | 1471 | kmem_cache_destroy(pending_cache); |
1459 | bad4: | 1472 | bad4: |
1460 | kmem_cache_destroy(exception_cache); | 1473 | kmem_cache_destroy(exception_cache); |
1461 | bad3: | 1474 | bad3: |
1462 | exit_origin_hash(); | 1475 | exit_origin_hash(); |
1463 | bad2: | 1476 | bad2: |
1464 | dm_unregister_target(&origin_target); | 1477 | dm_unregister_target(&origin_target); |
1465 | bad1: | 1478 | bad1: |
1466 | dm_unregister_target(&snapshot_target); | 1479 | dm_unregister_target(&snapshot_target); |
1467 | return r; | 1480 | return r; |
1468 | } | 1481 | } |
1469 | 1482 | ||
1470 | static void __exit dm_snapshot_exit(void) | 1483 | static void __exit dm_snapshot_exit(void) |
1471 | { | 1484 | { |
1472 | int r; | ||
1473 | |||
1474 | destroy_workqueue(ksnapd); | 1485 | destroy_workqueue(ksnapd); |
1475 | 1486 | ||
1476 | r = dm_unregister_target(&snapshot_target); | 1487 | dm_unregister_target(&snapshot_target); |
1477 | if (r) | 1488 | dm_unregister_target(&origin_target); |
1478 | DMERR("snapshot unregister failed %d", r); | ||
1479 | |||
1480 | r = dm_unregister_target(&origin_target); | ||
1481 | if (r) | ||
1482 | DMERR("origin unregister failed %d", r); | ||
1483 | 1489 | ||
1484 | exit_origin_hash(); | 1490 | exit_origin_hash(); |
1485 | kmem_cache_destroy(pending_cache); | 1491 | kmem_cache_destroy(pending_cache); |
1486 | kmem_cache_destroy(exception_cache); | 1492 | kmem_cache_destroy(exception_cache); |
1487 | kmem_cache_destroy(tracked_chunk_cache); | 1493 | kmem_cache_destroy(tracked_chunk_cache); |
1494 | |||
1495 | dm_exception_store_exit(); | ||
1488 | } | 1496 | } |
1489 | 1497 | ||
1490 | /* Module hooks */ | 1498 | /* Module hooks */ |
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 99c0106ede2d..d9e62b43cf85 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h | |||
@@ -1,6 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * dm-snapshot.c | ||
3 | * | ||
4 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
5 | * | 3 | * |
6 | * This file is released under the GPL. | 4 | * This file is released under the GPL. |
@@ -10,6 +8,7 @@ | |||
10 | #define DM_SNAPSHOT_H | 8 | #define DM_SNAPSHOT_H |
11 | 9 | ||
12 | #include <linux/device-mapper.h> | 10 | #include <linux/device-mapper.h> |
11 | #include "dm-exception-store.h" | ||
13 | #include "dm-bio-list.h" | 12 | #include "dm-bio-list.h" |
14 | #include <linux/blkdev.h> | 13 | #include <linux/blkdev.h> |
15 | #include <linux/workqueue.h> | 14 | #include <linux/workqueue.h> |
@@ -20,116 +19,6 @@ struct exception_table { | |||
20 | struct list_head *table; | 19 | struct list_head *table; |
21 | }; | 20 | }; |
22 | 21 | ||
23 | /* | ||
24 | * The snapshot code deals with largish chunks of the disk at a | ||
25 | * time. Typically 32k - 512k. | ||
26 | */ | ||
27 | typedef sector_t chunk_t; | ||
28 | |||
29 | /* | ||
30 | * An exception is used where an old chunk of data has been | ||
31 | * replaced by a new one. | ||
32 | * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number | ||
33 | * of chunks that follow contiguously. Remaining bits hold the number of the | ||
34 | * chunk within the device. | ||
35 | */ | ||
36 | struct dm_snap_exception { | ||
37 | struct list_head hash_list; | ||
38 | |||
39 | chunk_t old_chunk; | ||
40 | chunk_t new_chunk; | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * Funtions to manipulate consecutive chunks | ||
45 | */ | ||
46 | # if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) | ||
47 | # define DM_CHUNK_CONSECUTIVE_BITS 8 | ||
48 | # define DM_CHUNK_NUMBER_BITS 56 | ||
49 | |||
50 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
51 | { | ||
52 | return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); | ||
53 | } | ||
54 | |||
55 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
56 | { | ||
57 | return e->new_chunk >> DM_CHUNK_NUMBER_BITS; | ||
58 | } | ||
59 | |||
60 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
61 | { | ||
62 | e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); | ||
63 | |||
64 | BUG_ON(!dm_consecutive_chunk_count(e)); | ||
65 | } | ||
66 | |||
67 | # else | ||
68 | # define DM_CHUNK_CONSECUTIVE_BITS 0 | ||
69 | |||
70 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
71 | { | ||
72 | return chunk; | ||
73 | } | ||
74 | |||
75 | static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) | ||
76 | { | ||
77 | return 0; | ||
78 | } | ||
79 | |||
80 | static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) | ||
81 | { | ||
82 | } | ||
83 | |||
84 | # endif | ||
85 | |||
86 | /* | ||
87 | * Abstraction to handle the meta/layout of exception stores (the | ||
88 | * COW device). | ||
89 | */ | ||
90 | struct exception_store { | ||
91 | |||
92 | /* | ||
93 | * Destroys this object when you've finished with it. | ||
94 | */ | ||
95 | void (*destroy) (struct exception_store *store); | ||
96 | |||
97 | /* | ||
98 | * The target shouldn't read the COW device until this is | ||
99 | * called. | ||
100 | */ | ||
101 | int (*read_metadata) (struct exception_store *store); | ||
102 | |||
103 | /* | ||
104 | * Find somewhere to store the next exception. | ||
105 | */ | ||
106 | int (*prepare_exception) (struct exception_store *store, | ||
107 | struct dm_snap_exception *e); | ||
108 | |||
109 | /* | ||
110 | * Update the metadata with this exception. | ||
111 | */ | ||
112 | void (*commit_exception) (struct exception_store *store, | ||
113 | struct dm_snap_exception *e, | ||
114 | void (*callback) (void *, int success), | ||
115 | void *callback_context); | ||
116 | |||
117 | /* | ||
118 | * The snapshot is invalid, note this in the metadata. | ||
119 | */ | ||
120 | void (*drop_snapshot) (struct exception_store *store); | ||
121 | |||
122 | /* | ||
123 | * Return how full the snapshot is. | ||
124 | */ | ||
125 | void (*fraction_full) (struct exception_store *store, | ||
126 | sector_t *numerator, | ||
127 | sector_t *denominator); | ||
128 | |||
129 | struct dm_snapshot *snap; | ||
130 | void *context; | ||
131 | }; | ||
132 | |||
133 | #define DM_TRACKED_CHUNK_HASH_SIZE 16 | 22 | #define DM_TRACKED_CHUNK_HASH_SIZE 16 |
134 | #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ | 23 | #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ |
135 | (DM_TRACKED_CHUNK_HASH_SIZE - 1)) | 24 | (DM_TRACKED_CHUNK_HASH_SIZE - 1)) |
@@ -172,7 +61,7 @@ struct dm_snapshot { | |||
172 | spinlock_t pe_lock; | 61 | spinlock_t pe_lock; |
173 | 62 | ||
174 | /* The on disk metadata handler */ | 63 | /* The on disk metadata handler */ |
175 | struct exception_store store; | 64 | struct dm_exception_store store; |
176 | 65 | ||
177 | struct dm_kcopyd_client *kcopyd_client; | 66 | struct dm_kcopyd_client *kcopyd_client; |
178 | 67 | ||
@@ -187,20 +76,6 @@ struct dm_snapshot { | |||
187 | }; | 76 | }; |
188 | 77 | ||
189 | /* | 78 | /* |
190 | * Used by the exception stores to load exceptions hen | ||
191 | * initialising. | ||
192 | */ | ||
193 | int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); | ||
194 | |||
195 | /* | ||
196 | * Constructor and destructor for the default persistent | ||
197 | * store. | ||
198 | */ | ||
199 | int dm_create_persistent(struct exception_store *store); | ||
200 | |||
201 | int dm_create_transient(struct exception_store *store); | ||
202 | |||
203 | /* | ||
204 | * Return the number of sectors in the device. | 79 | * Return the number of sectors in the device. |
205 | */ | 80 | */ |
206 | static inline sector_t get_dev_size(struct block_device *bdev) | 81 | static inline sector_t get_dev_size(struct block_device *bdev) |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 9e4ef88d421e..41569bc60abc 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -337,9 +337,7 @@ int __init dm_stripe_init(void) | |||
337 | 337 | ||
338 | void dm_stripe_exit(void) | 338 | void dm_stripe_exit(void) |
339 | { | 339 | { |
340 | if (dm_unregister_target(&stripe_target)) | 340 | dm_unregister_target(&stripe_target); |
341 | DMWARN("target unregistration failed"); | ||
342 | |||
343 | destroy_workqueue(kstriped); | 341 | destroy_workqueue(kstriped); |
344 | 342 | ||
345 | return; | 343 | return; |
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c new file mode 100644 index 000000000000..a2a45e6c7c8b --- /dev/null +++ b/drivers/md/dm-sysfs.c | |||
@@ -0,0 +1,99 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Red Hat, Inc. All rights reserved. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #include <linux/sysfs.h> | ||
8 | #include <linux/dm-ioctl.h> | ||
9 | #include "dm.h" | ||
10 | |||
11 | struct dm_sysfs_attr { | ||
12 | struct attribute attr; | ||
13 | ssize_t (*show)(struct mapped_device *, char *); | ||
14 | ssize_t (*store)(struct mapped_device *, char *); | ||
15 | }; | ||
16 | |||
17 | #define DM_ATTR_RO(_name) \ | ||
18 | struct dm_sysfs_attr dm_attr_##_name = \ | ||
19 | __ATTR(_name, S_IRUGO, dm_attr_##_name##_show, NULL) | ||
20 | |||
21 | static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr, | ||
22 | char *page) | ||
23 | { | ||
24 | struct dm_sysfs_attr *dm_attr; | ||
25 | struct mapped_device *md; | ||
26 | ssize_t ret; | ||
27 | |||
28 | dm_attr = container_of(attr, struct dm_sysfs_attr, attr); | ||
29 | if (!dm_attr->show) | ||
30 | return -EIO; | ||
31 | |||
32 | md = dm_get_from_kobject(kobj); | ||
33 | if (!md) | ||
34 | return -EINVAL; | ||
35 | |||
36 | ret = dm_attr->show(md, page); | ||
37 | dm_put(md); | ||
38 | |||
39 | return ret; | ||
40 | } | ||
41 | |||
42 | static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf) | ||
43 | { | ||
44 | if (dm_copy_name_and_uuid(md, buf, NULL)) | ||
45 | return -EIO; | ||
46 | |||
47 | strcat(buf, "\n"); | ||
48 | return strlen(buf); | ||
49 | } | ||
50 | |||
51 | static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf) | ||
52 | { | ||
53 | if (dm_copy_name_and_uuid(md, NULL, buf)) | ||
54 | return -EIO; | ||
55 | |||
56 | strcat(buf, "\n"); | ||
57 | return strlen(buf); | ||
58 | } | ||
59 | |||
60 | static DM_ATTR_RO(name); | ||
61 | static DM_ATTR_RO(uuid); | ||
62 | |||
63 | static struct attribute *dm_attrs[] = { | ||
64 | &dm_attr_name.attr, | ||
65 | &dm_attr_uuid.attr, | ||
66 | NULL, | ||
67 | }; | ||
68 | |||
69 | static struct sysfs_ops dm_sysfs_ops = { | ||
70 | .show = dm_attr_show, | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * dm kobject is embedded in mapped_device structure | ||
75 | * no need to define release function here | ||
76 | */ | ||
77 | static struct kobj_type dm_ktype = { | ||
78 | .sysfs_ops = &dm_sysfs_ops, | ||
79 | .default_attrs = dm_attrs, | ||
80 | }; | ||
81 | |||
82 | /* | ||
83 | * Initialize kobj | ||
84 | * because nobody using md yet, no need to call explicit dm_get/put | ||
85 | */ | ||
86 | int dm_sysfs_init(struct mapped_device *md) | ||
87 | { | ||
88 | return kobject_init_and_add(dm_kobject(md), &dm_ktype, | ||
89 | &disk_to_dev(dm_disk(md))->kobj, | ||
90 | "%s", "dm"); | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Remove kobj, called after all references removed | ||
95 | */ | ||
96 | void dm_sysfs_exit(struct mapped_device *md) | ||
97 | { | ||
98 | kobject_put(dm_kobject(md)); | ||
99 | } | ||
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 04e5fd742c2c..2fd66c30f7f8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2001 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001 Sistina Software (UK) Limited. |
3 | * Copyright (C) 2004 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
18 | #include <linux/delay.h> | ||
18 | #include <asm/atomic.h> | 19 | #include <asm/atomic.h> |
19 | 20 | ||
20 | #define DM_MSG_PREFIX "table" | 21 | #define DM_MSG_PREFIX "table" |
@@ -24,6 +25,19 @@ | |||
24 | #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) | 25 | #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) |
25 | #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) | 26 | #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) |
26 | 27 | ||
28 | /* | ||
29 | * The table has always exactly one reference from either mapped_device->map | ||
30 | * or hash_cell->new_map. This reference is not counted in table->holders. | ||
31 | * A pair of dm_create_table/dm_destroy_table functions is used for table | ||
32 | * creation/destruction. | ||
33 | * | ||
34 | * Temporary references from the other code increase table->holders. A pair | ||
35 | * of dm_table_get/dm_table_put functions is used to manipulate it. | ||
36 | * | ||
37 | * When the table is about to be destroyed, we wait for table->holders to | ||
38 | * drop to zero. | ||
39 | */ | ||
40 | |||
27 | struct dm_table { | 41 | struct dm_table { |
28 | struct mapped_device *md; | 42 | struct mapped_device *md; |
29 | atomic_t holders; | 43 | atomic_t holders; |
@@ -38,6 +52,8 @@ struct dm_table { | |||
38 | sector_t *highs; | 52 | sector_t *highs; |
39 | struct dm_target *targets; | 53 | struct dm_target *targets; |
40 | 54 | ||
55 | unsigned barriers_supported:1; | ||
56 | |||
41 | /* | 57 | /* |
42 | * Indicates the rw permissions for the new logical | 58 | * Indicates the rw permissions for the new logical |
43 | * device. This should be a combination of FMODE_READ | 59 | * device. This should be a combination of FMODE_READ |
@@ -226,7 +242,8 @@ int dm_table_create(struct dm_table **result, fmode_t mode, | |||
226 | return -ENOMEM; | 242 | return -ENOMEM; |
227 | 243 | ||
228 | INIT_LIST_HEAD(&t->devices); | 244 | INIT_LIST_HEAD(&t->devices); |
229 | atomic_set(&t->holders, 1); | 245 | atomic_set(&t->holders, 0); |
246 | t->barriers_supported = 1; | ||
230 | 247 | ||
231 | if (!num_targets) | 248 | if (!num_targets) |
232 | num_targets = KEYS_PER_NODE; | 249 | num_targets = KEYS_PER_NODE; |
@@ -256,10 +273,14 @@ static void free_devices(struct list_head *devices) | |||
256 | } | 273 | } |
257 | } | 274 | } |
258 | 275 | ||
259 | static void table_destroy(struct dm_table *t) | 276 | void dm_table_destroy(struct dm_table *t) |
260 | { | 277 | { |
261 | unsigned int i; | 278 | unsigned int i; |
262 | 279 | ||
280 | while (atomic_read(&t->holders)) | ||
281 | msleep(1); | ||
282 | smp_mb(); | ||
283 | |||
263 | /* free the indexes (see dm_table_complete) */ | 284 | /* free the indexes (see dm_table_complete) */ |
264 | if (t->depth >= 2) | 285 | if (t->depth >= 2) |
265 | vfree(t->index[t->depth - 2]); | 286 | vfree(t->index[t->depth - 2]); |
@@ -297,8 +318,8 @@ void dm_table_put(struct dm_table *t) | |||
297 | if (!t) | 318 | if (!t) |
298 | return; | 319 | return; |
299 | 320 | ||
300 | if (atomic_dec_and_test(&t->holders)) | 321 | smp_mb__before_atomic_dec(); |
301 | table_destroy(t); | 322 | atomic_dec(&t->holders); |
302 | } | 323 | } |
303 | 324 | ||
304 | /* | 325 | /* |
@@ -728,6 +749,10 @@ int dm_table_add_target(struct dm_table *t, const char *type, | |||
728 | /* FIXME: the plan is to combine high here and then have | 749 | /* FIXME: the plan is to combine high here and then have |
729 | * the merge fn apply the target level restrictions. */ | 750 | * the merge fn apply the target level restrictions. */ |
730 | combine_restrictions_low(&t->limits, &tgt->limits); | 751 | combine_restrictions_low(&t->limits, &tgt->limits); |
752 | |||
753 | if (!(tgt->type->features & DM_TARGET_SUPPORTS_BARRIERS)) | ||
754 | t->barriers_supported = 0; | ||
755 | |||
731 | return 0; | 756 | return 0; |
732 | 757 | ||
733 | bad: | 758 | bad: |
@@ -772,6 +797,12 @@ int dm_table_complete(struct dm_table *t) | |||
772 | 797 | ||
773 | check_for_valid_limits(&t->limits); | 798 | check_for_valid_limits(&t->limits); |
774 | 799 | ||
800 | /* | ||
801 | * We only support barriers if there is exactly one underlying device. | ||
802 | */ | ||
803 | if (!list_is_singular(&t->devices)) | ||
804 | t->barriers_supported = 0; | ||
805 | |||
775 | /* how many indexes will the btree have ? */ | 806 | /* how many indexes will the btree have ? */ |
776 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); | 807 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); |
777 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); | 808 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); |
@@ -986,6 +1017,12 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) | |||
986 | return t->md; | 1017 | return t->md; |
987 | } | 1018 | } |
988 | 1019 | ||
1020 | int dm_table_barrier_ok(struct dm_table *t) | ||
1021 | { | ||
1022 | return t->barriers_supported; | ||
1023 | } | ||
1024 | EXPORT_SYMBOL(dm_table_barrier_ok); | ||
1025 | |||
989 | EXPORT_SYMBOL(dm_vcalloc); | 1026 | EXPORT_SYMBOL(dm_vcalloc); |
990 | EXPORT_SYMBOL(dm_get_device); | 1027 | EXPORT_SYMBOL(dm_get_device); |
991 | EXPORT_SYMBOL(dm_put_device); | 1028 | EXPORT_SYMBOL(dm_put_device); |
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 835cf95b857f..7decf10006e4 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c | |||
@@ -130,26 +130,26 @@ int dm_register_target(struct target_type *t) | |||
130 | return rv; | 130 | return rv; |
131 | } | 131 | } |
132 | 132 | ||
133 | int dm_unregister_target(struct target_type *t) | 133 | void dm_unregister_target(struct target_type *t) |
134 | { | 134 | { |
135 | struct tt_internal *ti; | 135 | struct tt_internal *ti; |
136 | 136 | ||
137 | down_write(&_lock); | 137 | down_write(&_lock); |
138 | if (!(ti = __find_target_type(t->name))) { | 138 | if (!(ti = __find_target_type(t->name))) { |
139 | up_write(&_lock); | 139 | DMCRIT("Unregistering unrecognised target: %s", t->name); |
140 | return -EINVAL; | 140 | BUG(); |
141 | } | 141 | } |
142 | 142 | ||
143 | if (ti->use) { | 143 | if (ti->use) { |
144 | up_write(&_lock); | 144 | DMCRIT("Attempt to unregister target still in use: %s", |
145 | return -ETXTBSY; | 145 | t->name); |
146 | BUG(); | ||
146 | } | 147 | } |
147 | 148 | ||
148 | list_del(&ti->list); | 149 | list_del(&ti->list); |
149 | kfree(ti); | 150 | kfree(ti); |
150 | 151 | ||
151 | up_write(&_lock); | 152 | up_write(&_lock); |
152 | return 0; | ||
153 | } | 153 | } |
154 | 154 | ||
155 | /* | 155 | /* |
@@ -187,8 +187,7 @@ int __init dm_target_init(void) | |||
187 | 187 | ||
188 | void dm_target_exit(void) | 188 | void dm_target_exit(void) |
189 | { | 189 | { |
190 | if (dm_unregister_target(&error_target)) | 190 | dm_unregister_target(&error_target); |
191 | DMWARN("error target unregistration failed"); | ||
192 | } | 191 | } |
193 | 192 | ||
194 | EXPORT_SYMBOL(dm_register_target); | 193 | EXPORT_SYMBOL(dm_register_target); |
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index cdbf126ec106..bbc97030c0c2 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c | |||
@@ -69,10 +69,7 @@ static int __init dm_zero_init(void) | |||
69 | 69 | ||
70 | static void __exit dm_zero_exit(void) | 70 | static void __exit dm_zero_exit(void) |
71 | { | 71 | { |
72 | int r = dm_unregister_target(&zero_target); | 72 | dm_unregister_target(&zero_target); |
73 | |||
74 | if (r < 0) | ||
75 | DMERR("unregister failed %d", r); | ||
76 | } | 73 | } |
77 | 74 | ||
78 | module_init(dm_zero_init) | 75 | module_init(dm_zero_init) |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c99e4728ff41..51ba1db4b3e7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/idr.h> | 21 | #include <linux/idr.h> |
22 | #include <linux/hdreg.h> | 22 | #include <linux/hdreg.h> |
23 | #include <linux/blktrace_api.h> | 23 | #include <linux/blktrace_api.h> |
24 | #include <trace/block.h> | ||
24 | 25 | ||
25 | #define DM_MSG_PREFIX "core" | 26 | #define DM_MSG_PREFIX "core" |
26 | 27 | ||
@@ -31,6 +32,7 @@ static unsigned int _major = 0; | |||
31 | 32 | ||
32 | static DEFINE_SPINLOCK(_minor_lock); | 33 | static DEFINE_SPINLOCK(_minor_lock); |
33 | /* | 34 | /* |
35 | * For bio-based dm. | ||
34 | * One of these is allocated per bio. | 36 | * One of these is allocated per bio. |
35 | */ | 37 | */ |
36 | struct dm_io { | 38 | struct dm_io { |
@@ -42,6 +44,7 @@ struct dm_io { | |||
42 | }; | 44 | }; |
43 | 45 | ||
44 | /* | 46 | /* |
47 | * For bio-based dm. | ||
45 | * One of these is allocated per target within a bio. Hopefully | 48 | * One of these is allocated per target within a bio. Hopefully |
46 | * this will be simplified out one day. | 49 | * this will be simplified out one day. |
47 | */ | 50 | */ |
@@ -51,6 +54,29 @@ struct dm_target_io { | |||
51 | union map_info info; | 54 | union map_info info; |
52 | }; | 55 | }; |
53 | 56 | ||
57 | DEFINE_TRACE(block_bio_complete); | ||
58 | |||
59 | /* | ||
60 | * For request-based dm. | ||
61 | * One of these is allocated per request. | ||
62 | */ | ||
63 | struct dm_rq_target_io { | ||
64 | struct mapped_device *md; | ||
65 | struct dm_target *ti; | ||
66 | struct request *orig, clone; | ||
67 | int error; | ||
68 | union map_info info; | ||
69 | }; | ||
70 | |||
71 | /* | ||
72 | * For request-based dm. | ||
73 | * One of these is allocated per bio. | ||
74 | */ | ||
75 | struct dm_rq_clone_bio_info { | ||
76 | struct bio *orig; | ||
77 | struct request *rq; | ||
78 | }; | ||
79 | |||
54 | union map_info *dm_get_mapinfo(struct bio *bio) | 80 | union map_info *dm_get_mapinfo(struct bio *bio) |
55 | { | 81 | { |
56 | if (bio && bio->bi_private) | 82 | if (bio && bio->bi_private) |
@@ -141,11 +167,16 @@ struct mapped_device { | |||
141 | 167 | ||
142 | /* forced geometry settings */ | 168 | /* forced geometry settings */ |
143 | struct hd_geometry geometry; | 169 | struct hd_geometry geometry; |
170 | |||
171 | /* sysfs handle */ | ||
172 | struct kobject kobj; | ||
144 | }; | 173 | }; |
145 | 174 | ||
146 | #define MIN_IOS 256 | 175 | #define MIN_IOS 256 |
147 | static struct kmem_cache *_io_cache; | 176 | static struct kmem_cache *_io_cache; |
148 | static struct kmem_cache *_tio_cache; | 177 | static struct kmem_cache *_tio_cache; |
178 | static struct kmem_cache *_rq_tio_cache; | ||
179 | static struct kmem_cache *_rq_bio_info_cache; | ||
149 | 180 | ||
150 | static int __init local_init(void) | 181 | static int __init local_init(void) |
151 | { | 182 | { |
@@ -161,9 +192,17 @@ static int __init local_init(void) | |||
161 | if (!_tio_cache) | 192 | if (!_tio_cache) |
162 | goto out_free_io_cache; | 193 | goto out_free_io_cache; |
163 | 194 | ||
195 | _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); | ||
196 | if (!_rq_tio_cache) | ||
197 | goto out_free_tio_cache; | ||
198 | |||
199 | _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); | ||
200 | if (!_rq_bio_info_cache) | ||
201 | goto out_free_rq_tio_cache; | ||
202 | |||
164 | r = dm_uevent_init(); | 203 | r = dm_uevent_init(); |
165 | if (r) | 204 | if (r) |
166 | goto out_free_tio_cache; | 205 | goto out_free_rq_bio_info_cache; |
167 | 206 | ||
168 | _major = major; | 207 | _major = major; |
169 | r = register_blkdev(_major, _name); | 208 | r = register_blkdev(_major, _name); |
@@ -177,6 +216,10 @@ static int __init local_init(void) | |||
177 | 216 | ||
178 | out_uevent_exit: | 217 | out_uevent_exit: |
179 | dm_uevent_exit(); | 218 | dm_uevent_exit(); |
219 | out_free_rq_bio_info_cache: | ||
220 | kmem_cache_destroy(_rq_bio_info_cache); | ||
221 | out_free_rq_tio_cache: | ||
222 | kmem_cache_destroy(_rq_tio_cache); | ||
180 | out_free_tio_cache: | 223 | out_free_tio_cache: |
181 | kmem_cache_destroy(_tio_cache); | 224 | kmem_cache_destroy(_tio_cache); |
182 | out_free_io_cache: | 225 | out_free_io_cache: |
@@ -187,6 +230,8 @@ out_free_io_cache: | |||
187 | 230 | ||
188 | static void local_exit(void) | 231 | static void local_exit(void) |
189 | { | 232 | { |
233 | kmem_cache_destroy(_rq_bio_info_cache); | ||
234 | kmem_cache_destroy(_rq_tio_cache); | ||
190 | kmem_cache_destroy(_tio_cache); | 235 | kmem_cache_destroy(_tio_cache); |
191 | kmem_cache_destroy(_io_cache); | 236 | kmem_cache_destroy(_io_cache); |
192 | unregister_blkdev(_major, _name); | 237 | unregister_blkdev(_major, _name); |
@@ -504,8 +549,7 @@ static void dec_pending(struct dm_io *io, int error) | |||
504 | end_io_acct(io); | 549 | end_io_acct(io); |
505 | 550 | ||
506 | if (io->error != DM_ENDIO_REQUEUE) { | 551 | if (io->error != DM_ENDIO_REQUEUE) { |
507 | blk_add_trace_bio(io->md->queue, io->bio, | 552 | trace_block_bio_complete(io->md->queue, io->bio); |
508 | BLK_TA_COMPLETE); | ||
509 | 553 | ||
510 | bio_endio(io->bio, io->error); | 554 | bio_endio(io->bio, io->error); |
511 | } | 555 | } |
@@ -598,7 +642,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, | |||
598 | if (r == DM_MAPIO_REMAPPED) { | 642 | if (r == DM_MAPIO_REMAPPED) { |
599 | /* the bio has been remapped so dispatch it */ | 643 | /* the bio has been remapped so dispatch it */ |
600 | 644 | ||
601 | blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, | 645 | trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, |
602 | tio->io->bio->bi_bdev->bd_dev, | 646 | tio->io->bio->bi_bdev->bd_dev, |
603 | clone->bi_sector, sector); | 647 | clone->bi_sector, sector); |
604 | 648 | ||
@@ -794,7 +838,11 @@ static int __split_bio(struct mapped_device *md, struct bio *bio) | |||
794 | ci.map = dm_get_table(md); | 838 | ci.map = dm_get_table(md); |
795 | if (unlikely(!ci.map)) | 839 | if (unlikely(!ci.map)) |
796 | return -EIO; | 840 | return -EIO; |
797 | 841 | if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { | |
842 | dm_table_put(ci.map); | ||
843 | bio_endio(bio, -EOPNOTSUPP); | ||
844 | return 0; | ||
845 | } | ||
798 | ci.md = md; | 846 | ci.md = md; |
799 | ci.bio = bio; | 847 | ci.bio = bio; |
800 | ci.io = alloc_io(md); | 848 | ci.io = alloc_io(md); |
@@ -878,15 +926,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
878 | struct mapped_device *md = q->queuedata; | 926 | struct mapped_device *md = q->queuedata; |
879 | int cpu; | 927 | int cpu; |
880 | 928 | ||
881 | /* | ||
882 | * There is no use in forwarding any barrier request since we can't | ||
883 | * guarantee it is (or can be) handled by the targets correctly. | ||
884 | */ | ||
885 | if (unlikely(bio_barrier(bio))) { | ||
886 | bio_endio(bio, -EOPNOTSUPP); | ||
887 | return 0; | ||
888 | } | ||
889 | |||
890 | down_read(&md->io_lock); | 929 | down_read(&md->io_lock); |
891 | 930 | ||
892 | cpu = part_stat_lock(); | 931 | cpu = part_stat_lock(); |
@@ -941,8 +980,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
941 | struct mapped_device *md = congested_data; | 980 | struct mapped_device *md = congested_data; |
942 | struct dm_table *map; | 981 | struct dm_table *map; |
943 | 982 | ||
944 | atomic_inc(&md->pending); | ||
945 | |||
946 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { | 983 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { |
947 | map = dm_get_table(md); | 984 | map = dm_get_table(md); |
948 | if (map) { | 985 | if (map) { |
@@ -951,10 +988,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
951 | } | 988 | } |
952 | } | 989 | } |
953 | 990 | ||
954 | if (!atomic_dec_return(&md->pending)) | ||
955 | /* nudge anyone waiting on suspend queue */ | ||
956 | wake_up(&md->wait); | ||
957 | |||
958 | return r; | 991 | return r; |
959 | } | 992 | } |
960 | 993 | ||
@@ -1091,7 +1124,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
1091 | if (!md->tio_pool) | 1124 | if (!md->tio_pool) |
1092 | goto bad_tio_pool; | 1125 | goto bad_tio_pool; |
1093 | 1126 | ||
1094 | md->bs = bioset_create(16, 16); | 1127 | md->bs = bioset_create(16, 0); |
1095 | if (!md->bs) | 1128 | if (!md->bs) |
1096 | goto bad_no_bioset; | 1129 | goto bad_no_bioset; |
1097 | 1130 | ||
@@ -1214,10 +1247,12 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
1214 | 1247 | ||
1215 | if (md->suspended_bdev) | 1248 | if (md->suspended_bdev) |
1216 | __set_size(md, size); | 1249 | __set_size(md, size); |
1217 | if (size == 0) | 1250 | |
1251 | if (!size) { | ||
1252 | dm_table_destroy(t); | ||
1218 | return 0; | 1253 | return 0; |
1254 | } | ||
1219 | 1255 | ||
1220 | dm_table_get(t); | ||
1221 | dm_table_event_callback(t, event_callback, md); | 1256 | dm_table_event_callback(t, event_callback, md); |
1222 | 1257 | ||
1223 | write_lock(&md->map_lock); | 1258 | write_lock(&md->map_lock); |
@@ -1239,7 +1274,7 @@ static void __unbind(struct mapped_device *md) | |||
1239 | write_lock(&md->map_lock); | 1274 | write_lock(&md->map_lock); |
1240 | md->map = NULL; | 1275 | md->map = NULL; |
1241 | write_unlock(&md->map_lock); | 1276 | write_unlock(&md->map_lock); |
1242 | dm_table_put(map); | 1277 | dm_table_destroy(map); |
1243 | } | 1278 | } |
1244 | 1279 | ||
1245 | /* | 1280 | /* |
@@ -1253,6 +1288,8 @@ int dm_create(int minor, struct mapped_device **result) | |||
1253 | if (!md) | 1288 | if (!md) |
1254 | return -ENXIO; | 1289 | return -ENXIO; |
1255 | 1290 | ||
1291 | dm_sysfs_init(md); | ||
1292 | |||
1256 | *result = md; | 1293 | *result = md; |
1257 | return 0; | 1294 | return 0; |
1258 | } | 1295 | } |
@@ -1328,8 +1365,9 @@ void dm_put(struct mapped_device *md) | |||
1328 | dm_table_presuspend_targets(map); | 1365 | dm_table_presuspend_targets(map); |
1329 | dm_table_postsuspend_targets(map); | 1366 | dm_table_postsuspend_targets(map); |
1330 | } | 1367 | } |
1331 | __unbind(md); | 1368 | dm_sysfs_exit(md); |
1332 | dm_table_put(map); | 1369 | dm_table_put(map); |
1370 | __unbind(md); | ||
1333 | free_dev(md); | 1371 | free_dev(md); |
1334 | } | 1372 | } |
1335 | } | 1373 | } |
@@ -1667,6 +1705,27 @@ struct gendisk *dm_disk(struct mapped_device *md) | |||
1667 | return md->disk; | 1705 | return md->disk; |
1668 | } | 1706 | } |
1669 | 1707 | ||
1708 | struct kobject *dm_kobject(struct mapped_device *md) | ||
1709 | { | ||
1710 | return &md->kobj; | ||
1711 | } | ||
1712 | |||
1713 | /* | ||
1714 | * struct mapped_device should not be exported outside of dm.c | ||
1715 | * so use this check to verify that kobj is part of md structure | ||
1716 | */ | ||
1717 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | ||
1718 | { | ||
1719 | struct mapped_device *md; | ||
1720 | |||
1721 | md = container_of(kobj, struct mapped_device, kobj); | ||
1722 | if (&md->kobj != kobj) | ||
1723 | return NULL; | ||
1724 | |||
1725 | dm_get(md); | ||
1726 | return md; | ||
1727 | } | ||
1728 | |||
1670 | int dm_suspended(struct mapped_device *md) | 1729 | int dm_suspended(struct mapped_device *md) |
1671 | { | 1730 | { |
1672 | return test_bit(DMF_SUSPENDED, &md->flags); | 1731 | return test_bit(DMF_SUSPENDED, &md->flags); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 0ade60cdef42..20194e000c5a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -36,6 +36,7 @@ struct dm_table; | |||
36 | /*----------------------------------------------------------------- | 36 | /*----------------------------------------------------------------- |
37 | * Internal table functions. | 37 | * Internal table functions. |
38 | *---------------------------------------------------------------*/ | 38 | *---------------------------------------------------------------*/ |
39 | void dm_table_destroy(struct dm_table *t); | ||
39 | void dm_table_event_callback(struct dm_table *t, | 40 | void dm_table_event_callback(struct dm_table *t, |
40 | void (*fn)(void *), void *context); | 41 | void (*fn)(void *), void *context); |
41 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); | 42 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); |
@@ -51,6 +52,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits); | |||
51 | * To check the return value from dm_table_find_target(). | 52 | * To check the return value from dm_table_find_target(). |
52 | */ | 53 | */ |
53 | #define dm_target_is_valid(t) ((t)->table) | 54 | #define dm_target_is_valid(t) ((t)->table) |
55 | int dm_table_barrier_ok(struct dm_table *t); | ||
54 | 56 | ||
55 | /*----------------------------------------------------------------- | 57 | /*----------------------------------------------------------------- |
56 | * A registry of target types. | 58 | * A registry of target types. |
@@ -72,6 +74,14 @@ int dm_interface_init(void); | |||
72 | void dm_interface_exit(void); | 74 | void dm_interface_exit(void); |
73 | 75 | ||
74 | /* | 76 | /* |
77 | * sysfs interface | ||
78 | */ | ||
79 | int dm_sysfs_init(struct mapped_device *md); | ||
80 | void dm_sysfs_exit(struct mapped_device *md); | ||
81 | struct kobject *dm_kobject(struct mapped_device *md); | ||
82 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj); | ||
83 | |||
84 | /* | ||
75 | * Targets for linear and striped mappings | 85 | * Targets for linear and striped mappings |
76 | */ | 86 | */ |
77 | int dm_linear_init(void); | 87 | int dm_linear_init(void); |
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index f26c1f9a475b..86d9adf90e79 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -283,7 +283,6 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size) | |||
283 | static int run(mddev_t *mddev) | 283 | static int run(mddev_t *mddev) |
284 | { | 284 | { |
285 | mdk_rdev_t *rdev; | 285 | mdk_rdev_t *rdev; |
286 | struct list_head *tmp; | ||
287 | int i; | 286 | int i; |
288 | 287 | ||
289 | conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); | 288 | conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); |
@@ -296,7 +295,7 @@ static int run(mddev_t *mddev) | |||
296 | } | 295 | } |
297 | conf->nfaults = 0; | 296 | conf->nfaults = 0; |
298 | 297 | ||
299 | rdev_for_each(rdev, tmp, mddev) | 298 | list_for_each_entry(rdev, &mddev->disks, same_set) |
300 | conf->rdev = rdev; | 299 | conf->rdev = rdev; |
301 | 300 | ||
302 | mddev->array_sectors = mddev->size * 2; | 301 | mddev->array_sectors = mddev->size * 2; |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 3b90c5c924ec..1e3aea9eecf1 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -105,7 +105,6 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
105 | int i, nb_zone, cnt; | 105 | int i, nb_zone, cnt; |
106 | sector_t min_sectors; | 106 | sector_t min_sectors; |
107 | sector_t curr_sector; | 107 | sector_t curr_sector; |
108 | struct list_head *tmp; | ||
109 | 108 | ||
110 | conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), | 109 | conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), |
111 | GFP_KERNEL); | 110 | GFP_KERNEL); |
@@ -115,7 +114,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
115 | cnt = 0; | 114 | cnt = 0; |
116 | conf->array_sectors = 0; | 115 | conf->array_sectors = 0; |
117 | 116 | ||
118 | rdev_for_each(rdev, tmp, mddev) { | 117 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
119 | int j = rdev->raid_disk; | 118 | int j = rdev->raid_disk; |
120 | dev_info_t *disk = conf->disks + j; | 119 | dev_info_t *disk = conf->disks + j; |
121 | 120 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 1b1d32694f6f..41e2509bf896 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -214,20 +214,33 @@ static inline mddev_t *mddev_get(mddev_t *mddev) | |||
214 | return mddev; | 214 | return mddev; |
215 | } | 215 | } |
216 | 216 | ||
217 | static void mddev_delayed_delete(struct work_struct *ws) | ||
218 | { | ||
219 | mddev_t *mddev = container_of(ws, mddev_t, del_work); | ||
220 | kobject_del(&mddev->kobj); | ||
221 | kobject_put(&mddev->kobj); | ||
222 | } | ||
223 | |||
217 | static void mddev_put(mddev_t *mddev) | 224 | static void mddev_put(mddev_t *mddev) |
218 | { | 225 | { |
219 | if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) | 226 | if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) |
220 | return; | 227 | return; |
221 | if (!mddev->raid_disks && list_empty(&mddev->disks)) { | 228 | if (!mddev->raid_disks && list_empty(&mddev->disks) && |
229 | !mddev->hold_active) { | ||
222 | list_del(&mddev->all_mddevs); | 230 | list_del(&mddev->all_mddevs); |
223 | spin_unlock(&all_mddevs_lock); | 231 | if (mddev->gendisk) { |
224 | blk_cleanup_queue(mddev->queue); | 232 | /* we did a probe so need to clean up. |
225 | if (mddev->sysfs_state) | 233 | * Call schedule_work inside the spinlock |
226 | sysfs_put(mddev->sysfs_state); | 234 | * so that flush_scheduled_work() after |
227 | mddev->sysfs_state = NULL; | 235 | * mddev_find will succeed in waiting for the |
228 | kobject_put(&mddev->kobj); | 236 | * work to be done. |
229 | } else | 237 | */ |
230 | spin_unlock(&all_mddevs_lock); | 238 | INIT_WORK(&mddev->del_work, mddev_delayed_delete); |
239 | schedule_work(&mddev->del_work); | ||
240 | } else | ||
241 | kfree(mddev); | ||
242 | } | ||
243 | spin_unlock(&all_mddevs_lock); | ||
231 | } | 244 | } |
232 | 245 | ||
233 | static mddev_t * mddev_find(dev_t unit) | 246 | static mddev_t * mddev_find(dev_t unit) |
@@ -236,15 +249,50 @@ static mddev_t * mddev_find(dev_t unit) | |||
236 | 249 | ||
237 | retry: | 250 | retry: |
238 | spin_lock(&all_mddevs_lock); | 251 | spin_lock(&all_mddevs_lock); |
239 | list_for_each_entry(mddev, &all_mddevs, all_mddevs) | 252 | |
240 | if (mddev->unit == unit) { | 253 | if (unit) { |
241 | mddev_get(mddev); | 254 | list_for_each_entry(mddev, &all_mddevs, all_mddevs) |
255 | if (mddev->unit == unit) { | ||
256 | mddev_get(mddev); | ||
257 | spin_unlock(&all_mddevs_lock); | ||
258 | kfree(new); | ||
259 | return mddev; | ||
260 | } | ||
261 | |||
262 | if (new) { | ||
263 | list_add(&new->all_mddevs, &all_mddevs); | ||
242 | spin_unlock(&all_mddevs_lock); | 264 | spin_unlock(&all_mddevs_lock); |
243 | kfree(new); | 265 | new->hold_active = UNTIL_IOCTL; |
244 | return mddev; | 266 | return new; |
245 | } | 267 | } |
246 | 268 | } else if (new) { | |
247 | if (new) { | 269 | /* find an unused unit number */ |
270 | static int next_minor = 512; | ||
271 | int start = next_minor; | ||
272 | int is_free = 0; | ||
273 | int dev = 0; | ||
274 | while (!is_free) { | ||
275 | dev = MKDEV(MD_MAJOR, next_minor); | ||
276 | next_minor++; | ||
277 | if (next_minor > MINORMASK) | ||
278 | next_minor = 0; | ||
279 | if (next_minor == start) { | ||
280 | /* Oh dear, all in use. */ | ||
281 | spin_unlock(&all_mddevs_lock); | ||
282 | kfree(new); | ||
283 | return NULL; | ||
284 | } | ||
285 | |||
286 | is_free = 1; | ||
287 | list_for_each_entry(mddev, &all_mddevs, all_mddevs) | ||
288 | if (mddev->unit == dev) { | ||
289 | is_free = 0; | ||
290 | break; | ||
291 | } | ||
292 | } | ||
293 | new->unit = dev; | ||
294 | new->md_minor = MINOR(dev); | ||
295 | new->hold_active = UNTIL_STOP; | ||
248 | list_add(&new->all_mddevs, &all_mddevs); | 296 | list_add(&new->all_mddevs, &all_mddevs); |
249 | spin_unlock(&all_mddevs_lock); | 297 | spin_unlock(&all_mddevs_lock); |
250 | return new; | 298 | return new; |
@@ -275,16 +323,6 @@ static mddev_t * mddev_find(dev_t unit) | |||
275 | new->resync_max = MaxSector; | 323 | new->resync_max = MaxSector; |
276 | new->level = LEVEL_NONE; | 324 | new->level = LEVEL_NONE; |
277 | 325 | ||
278 | new->queue = blk_alloc_queue(GFP_KERNEL); | ||
279 | if (!new->queue) { | ||
280 | kfree(new); | ||
281 | return NULL; | ||
282 | } | ||
283 | /* Can be unlocked because the queue is new: no concurrency */ | ||
284 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue); | ||
285 | |||
286 | blk_queue_make_request(new->queue, md_fail_request); | ||
287 | |||
288 | goto retry; | 326 | goto retry; |
289 | } | 327 | } |
290 | 328 | ||
@@ -307,25 +345,23 @@ static inline void mddev_unlock(mddev_t * mddev) | |||
307 | 345 | ||
308 | static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) | 346 | static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) |
309 | { | 347 | { |
310 | mdk_rdev_t * rdev; | 348 | mdk_rdev_t *rdev; |
311 | struct list_head *tmp; | ||
312 | 349 | ||
313 | rdev_for_each(rdev, tmp, mddev) { | 350 | list_for_each_entry(rdev, &mddev->disks, same_set) |
314 | if (rdev->desc_nr == nr) | 351 | if (rdev->desc_nr == nr) |
315 | return rdev; | 352 | return rdev; |
316 | } | 353 | |
317 | return NULL; | 354 | return NULL; |
318 | } | 355 | } |
319 | 356 | ||
320 | static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) | 357 | static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) |
321 | { | 358 | { |
322 | struct list_head *tmp; | ||
323 | mdk_rdev_t *rdev; | 359 | mdk_rdev_t *rdev; |
324 | 360 | ||
325 | rdev_for_each(rdev, tmp, mddev) { | 361 | list_for_each_entry(rdev, &mddev->disks, same_set) |
326 | if (rdev->bdev->bd_dev == dev) | 362 | if (rdev->bdev->bd_dev == dev) |
327 | return rdev; | 363 | return rdev; |
328 | } | 364 | |
329 | return NULL; | 365 | return NULL; |
330 | } | 366 | } |
331 | 367 | ||
@@ -861,7 +897,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
861 | static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | 897 | static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) |
862 | { | 898 | { |
863 | mdp_super_t *sb; | 899 | mdp_super_t *sb; |
864 | struct list_head *tmp; | ||
865 | mdk_rdev_t *rdev2; | 900 | mdk_rdev_t *rdev2; |
866 | int next_spare = mddev->raid_disks; | 901 | int next_spare = mddev->raid_disks; |
867 | 902 | ||
@@ -933,7 +968,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
933 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); | 968 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); |
934 | 969 | ||
935 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 970 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
936 | rdev_for_each(rdev2, tmp, mddev) { | 971 | list_for_each_entry(rdev2, &mddev->disks, same_set) { |
937 | mdp_disk_t *d; | 972 | mdp_disk_t *d; |
938 | int desc_nr; | 973 | int desc_nr; |
939 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) | 974 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) |
@@ -1259,7 +1294,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1259 | static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | 1294 | static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) |
1260 | { | 1295 | { |
1261 | struct mdp_superblock_1 *sb; | 1296 | struct mdp_superblock_1 *sb; |
1262 | struct list_head *tmp; | ||
1263 | mdk_rdev_t *rdev2; | 1297 | mdk_rdev_t *rdev2; |
1264 | int max_dev, i; | 1298 | int max_dev, i; |
1265 | /* make rdev->sb match mddev and rdev data. */ | 1299 | /* make rdev->sb match mddev and rdev data. */ |
@@ -1307,7 +1341,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1307 | } | 1341 | } |
1308 | 1342 | ||
1309 | max_dev = 0; | 1343 | max_dev = 0; |
1310 | rdev_for_each(rdev2, tmp, mddev) | 1344 | list_for_each_entry(rdev2, &mddev->disks, same_set) |
1311 | if (rdev2->desc_nr+1 > max_dev) | 1345 | if (rdev2->desc_nr+1 > max_dev) |
1312 | max_dev = rdev2->desc_nr+1; | 1346 | max_dev = rdev2->desc_nr+1; |
1313 | 1347 | ||
@@ -1316,7 +1350,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1316 | for (i=0; i<max_dev;i++) | 1350 | for (i=0; i<max_dev;i++) |
1317 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1351 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1318 | 1352 | ||
1319 | rdev_for_each(rdev2, tmp, mddev) { | 1353 | list_for_each_entry(rdev2, &mddev->disks, same_set) { |
1320 | i = rdev2->desc_nr; | 1354 | i = rdev2->desc_nr; |
1321 | if (test_bit(Faulty, &rdev2->flags)) | 1355 | if (test_bit(Faulty, &rdev2->flags)) |
1322 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1356 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
@@ -1466,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1466 | 1500 | ||
1467 | list_add_rcu(&rdev->same_set, &mddev->disks); | 1501 | list_add_rcu(&rdev->same_set, &mddev->disks); |
1468 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); | 1502 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); |
1503 | |||
1504 | /* May as well allow recovery to be retried once */ | ||
1505 | mddev->recovery_disabled = 0; | ||
1469 | return 0; | 1506 | return 0; |
1470 | 1507 | ||
1471 | fail: | 1508 | fail: |
@@ -1571,8 +1608,7 @@ static void kick_rdev_from_array(mdk_rdev_t * rdev) | |||
1571 | 1608 | ||
1572 | static void export_array(mddev_t *mddev) | 1609 | static void export_array(mddev_t *mddev) |
1573 | { | 1610 | { |
1574 | struct list_head *tmp; | 1611 | mdk_rdev_t *rdev, *tmp; |
1575 | mdk_rdev_t *rdev; | ||
1576 | 1612 | ||
1577 | rdev_for_each(rdev, tmp, mddev) { | 1613 | rdev_for_each(rdev, tmp, mddev) { |
1578 | if (!rdev->mddev) { | 1614 | if (!rdev->mddev) { |
@@ -1593,7 +1629,7 @@ static void print_desc(mdp_disk_t *desc) | |||
1593 | desc->major,desc->minor,desc->raid_disk,desc->state); | 1629 | desc->major,desc->minor,desc->raid_disk,desc->state); |
1594 | } | 1630 | } |
1595 | 1631 | ||
1596 | static void print_sb(mdp_super_t *sb) | 1632 | static void print_sb_90(mdp_super_t *sb) |
1597 | { | 1633 | { |
1598 | int i; | 1634 | int i; |
1599 | 1635 | ||
@@ -1624,10 +1660,57 @@ static void print_sb(mdp_super_t *sb) | |||
1624 | } | 1660 | } |
1625 | printk(KERN_INFO "md: THIS: "); | 1661 | printk(KERN_INFO "md: THIS: "); |
1626 | print_desc(&sb->this_disk); | 1662 | print_desc(&sb->this_disk); |
1627 | |||
1628 | } | 1663 | } |
1629 | 1664 | ||
1630 | static void print_rdev(mdk_rdev_t *rdev) | 1665 | static void print_sb_1(struct mdp_superblock_1 *sb) |
1666 | { | ||
1667 | __u8 *uuid; | ||
1668 | |||
1669 | uuid = sb->set_uuid; | ||
1670 | printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" | ||
1671 | ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" | ||
1672 | KERN_INFO "md: Name: \"%s\" CT:%llu\n", | ||
1673 | le32_to_cpu(sb->major_version), | ||
1674 | le32_to_cpu(sb->feature_map), | ||
1675 | uuid[0], uuid[1], uuid[2], uuid[3], | ||
1676 | uuid[4], uuid[5], uuid[6], uuid[7], | ||
1677 | uuid[8], uuid[9], uuid[10], uuid[11], | ||
1678 | uuid[12], uuid[13], uuid[14], uuid[15], | ||
1679 | sb->set_name, | ||
1680 | (unsigned long long)le64_to_cpu(sb->ctime) | ||
1681 | & MD_SUPERBLOCK_1_TIME_SEC_MASK); | ||
1682 | |||
1683 | uuid = sb->device_uuid; | ||
1684 | printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" | ||
1685 | " RO:%llu\n" | ||
1686 | KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" | ||
1687 | ":%02x%02x%02x%02x%02x%02x\n" | ||
1688 | KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" | ||
1689 | KERN_INFO "md: (MaxDev:%u) \n", | ||
1690 | le32_to_cpu(sb->level), | ||
1691 | (unsigned long long)le64_to_cpu(sb->size), | ||
1692 | le32_to_cpu(sb->raid_disks), | ||
1693 | le32_to_cpu(sb->layout), | ||
1694 | le32_to_cpu(sb->chunksize), | ||
1695 | (unsigned long long)le64_to_cpu(sb->data_offset), | ||
1696 | (unsigned long long)le64_to_cpu(sb->data_size), | ||
1697 | (unsigned long long)le64_to_cpu(sb->super_offset), | ||
1698 | (unsigned long long)le64_to_cpu(sb->recovery_offset), | ||
1699 | le32_to_cpu(sb->dev_number), | ||
1700 | uuid[0], uuid[1], uuid[2], uuid[3], | ||
1701 | uuid[4], uuid[5], uuid[6], uuid[7], | ||
1702 | uuid[8], uuid[9], uuid[10], uuid[11], | ||
1703 | uuid[12], uuid[13], uuid[14], uuid[15], | ||
1704 | sb->devflags, | ||
1705 | (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK, | ||
1706 | (unsigned long long)le64_to_cpu(sb->events), | ||
1707 | (unsigned long long)le64_to_cpu(sb->resync_offset), | ||
1708 | le32_to_cpu(sb->sb_csum), | ||
1709 | le32_to_cpu(sb->max_dev) | ||
1710 | ); | ||
1711 | } | ||
1712 | |||
1713 | static void print_rdev(mdk_rdev_t *rdev, int major_version) | ||
1631 | { | 1714 | { |
1632 | char b[BDEVNAME_SIZE]; | 1715 | char b[BDEVNAME_SIZE]; |
1633 | printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", | 1716 | printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", |
@@ -1635,15 +1718,22 @@ static void print_rdev(mdk_rdev_t *rdev) | |||
1635 | test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), | 1718 | test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), |
1636 | rdev->desc_nr); | 1719 | rdev->desc_nr); |
1637 | if (rdev->sb_loaded) { | 1720 | if (rdev->sb_loaded) { |
1638 | printk(KERN_INFO "md: rdev superblock:\n"); | 1721 | printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version); |
1639 | print_sb((mdp_super_t*)page_address(rdev->sb_page)); | 1722 | switch (major_version) { |
1723 | case 0: | ||
1724 | print_sb_90((mdp_super_t*)page_address(rdev->sb_page)); | ||
1725 | break; | ||
1726 | case 1: | ||
1727 | print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page)); | ||
1728 | break; | ||
1729 | } | ||
1640 | } else | 1730 | } else |
1641 | printk(KERN_INFO "md: no rdev superblock!\n"); | 1731 | printk(KERN_INFO "md: no rdev superblock!\n"); |
1642 | } | 1732 | } |
1643 | 1733 | ||
1644 | static void md_print_devices(void) | 1734 | static void md_print_devices(void) |
1645 | { | 1735 | { |
1646 | struct list_head *tmp, *tmp2; | 1736 | struct list_head *tmp; |
1647 | mdk_rdev_t *rdev; | 1737 | mdk_rdev_t *rdev; |
1648 | mddev_t *mddev; | 1738 | mddev_t *mddev; |
1649 | char b[BDEVNAME_SIZE]; | 1739 | char b[BDEVNAME_SIZE]; |
@@ -1658,12 +1748,12 @@ static void md_print_devices(void) | |||
1658 | bitmap_print_sb(mddev->bitmap); | 1748 | bitmap_print_sb(mddev->bitmap); |
1659 | else | 1749 | else |
1660 | printk("%s: ", mdname(mddev)); | 1750 | printk("%s: ", mdname(mddev)); |
1661 | rdev_for_each(rdev, tmp2, mddev) | 1751 | list_for_each_entry(rdev, &mddev->disks, same_set) |
1662 | printk("<%s>", bdevname(rdev->bdev,b)); | 1752 | printk("<%s>", bdevname(rdev->bdev,b)); |
1663 | printk("\n"); | 1753 | printk("\n"); |
1664 | 1754 | ||
1665 | rdev_for_each(rdev, tmp2, mddev) | 1755 | list_for_each_entry(rdev, &mddev->disks, same_set) |
1666 | print_rdev(rdev); | 1756 | print_rdev(rdev, mddev->major_version); |
1667 | } | 1757 | } |
1668 | printk("md: **********************************\n"); | 1758 | printk("md: **********************************\n"); |
1669 | printk("\n"); | 1759 | printk("\n"); |
@@ -1679,9 +1769,8 @@ static void sync_sbs(mddev_t * mddev, int nospares) | |||
1679 | * with the rest of the array) | 1769 | * with the rest of the array) |
1680 | */ | 1770 | */ |
1681 | mdk_rdev_t *rdev; | 1771 | mdk_rdev_t *rdev; |
1682 | struct list_head *tmp; | ||
1683 | 1772 | ||
1684 | rdev_for_each(rdev, tmp, mddev) { | 1773 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
1685 | if (rdev->sb_events == mddev->events || | 1774 | if (rdev->sb_events == mddev->events || |
1686 | (nospares && | 1775 | (nospares && |
1687 | rdev->raid_disk < 0 && | 1776 | rdev->raid_disk < 0 && |
@@ -1699,7 +1788,6 @@ static void sync_sbs(mddev_t * mddev, int nospares) | |||
1699 | 1788 | ||
1700 | static void md_update_sb(mddev_t * mddev, int force_change) | 1789 | static void md_update_sb(mddev_t * mddev, int force_change) |
1701 | { | 1790 | { |
1702 | struct list_head *tmp; | ||
1703 | mdk_rdev_t *rdev; | 1791 | mdk_rdev_t *rdev; |
1704 | int sync_req; | 1792 | int sync_req; |
1705 | int nospares = 0; | 1793 | int nospares = 0; |
@@ -1790,7 +1878,7 @@ repeat: | |||
1790 | mdname(mddev),mddev->in_sync); | 1878 | mdname(mddev),mddev->in_sync); |
1791 | 1879 | ||
1792 | bitmap_update_sb(mddev->bitmap); | 1880 | bitmap_update_sb(mddev->bitmap); |
1793 | rdev_for_each(rdev, tmp, mddev) { | 1881 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
1794 | char b[BDEVNAME_SIZE]; | 1882 | char b[BDEVNAME_SIZE]; |
1795 | dprintk(KERN_INFO "md: "); | 1883 | dprintk(KERN_INFO "md: "); |
1796 | if (rdev->sb_loaded != 1) | 1884 | if (rdev->sb_loaded != 1) |
@@ -1999,7 +2087,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1999 | md_wakeup_thread(rdev->mddev->thread); | 2087 | md_wakeup_thread(rdev->mddev->thread); |
2000 | } else if (rdev->mddev->pers) { | 2088 | } else if (rdev->mddev->pers) { |
2001 | mdk_rdev_t *rdev2; | 2089 | mdk_rdev_t *rdev2; |
2002 | struct list_head *tmp; | ||
2003 | /* Activating a spare .. or possibly reactivating | 2090 | /* Activating a spare .. or possibly reactivating |
2004 | * if we every get bitmaps working here. | 2091 | * if we every get bitmaps working here. |
2005 | */ | 2092 | */ |
@@ -2010,7 +2097,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2010 | if (rdev->mddev->pers->hot_add_disk == NULL) | 2097 | if (rdev->mddev->pers->hot_add_disk == NULL) |
2011 | return -EINVAL; | 2098 | return -EINVAL; |
2012 | 2099 | ||
2013 | rdev_for_each(rdev2, tmp, rdev->mddev) | 2100 | list_for_each_entry(rdev2, &rdev->mddev->disks, same_set) |
2014 | if (rdev2->raid_disk == slot) | 2101 | if (rdev2->raid_disk == slot) |
2015 | return -EEXIST; | 2102 | return -EEXIST; |
2016 | 2103 | ||
@@ -2125,14 +2212,14 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2125 | */ | 2212 | */ |
2126 | mddev_t *mddev; | 2213 | mddev_t *mddev; |
2127 | int overlap = 0; | 2214 | int overlap = 0; |
2128 | struct list_head *tmp, *tmp2; | 2215 | struct list_head *tmp; |
2129 | 2216 | ||
2130 | mddev_unlock(my_mddev); | 2217 | mddev_unlock(my_mddev); |
2131 | for_each_mddev(mddev, tmp) { | 2218 | for_each_mddev(mddev, tmp) { |
2132 | mdk_rdev_t *rdev2; | 2219 | mdk_rdev_t *rdev2; |
2133 | 2220 | ||
2134 | mddev_lock(mddev); | 2221 | mddev_lock(mddev); |
2135 | rdev_for_each(rdev2, tmp2, mddev) | 2222 | list_for_each_entry(rdev2, &mddev->disks, same_set) |
2136 | if (test_bit(AllReserved, &rdev2->flags) || | 2223 | if (test_bit(AllReserved, &rdev2->flags) || |
2137 | (rdev->bdev == rdev2->bdev && | 2224 | (rdev->bdev == rdev2->bdev && |
2138 | rdev != rdev2 && | 2225 | rdev != rdev2 && |
@@ -2328,8 +2415,7 @@ abort_free: | |||
2328 | static void analyze_sbs(mddev_t * mddev) | 2415 | static void analyze_sbs(mddev_t * mddev) |
2329 | { | 2416 | { |
2330 | int i; | 2417 | int i; |
2331 | struct list_head *tmp; | 2418 | mdk_rdev_t *rdev, *freshest, *tmp; |
2332 | mdk_rdev_t *rdev, *freshest; | ||
2333 | char b[BDEVNAME_SIZE]; | 2419 | char b[BDEVNAME_SIZE]; |
2334 | 2420 | ||
2335 | freshest = NULL; | 2421 | freshest = NULL; |
@@ -3046,7 +3132,7 @@ action_store(mddev_t *mddev, const char *page, size_t len) | |||
3046 | } | 3132 | } |
3047 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3133 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
3048 | md_wakeup_thread(mddev->thread); | 3134 | md_wakeup_thread(mddev->thread); |
3049 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | 3135 | sysfs_notify_dirent(mddev->sysfs_action); |
3050 | return len; | 3136 | return len; |
3051 | } | 3137 | } |
3052 | 3138 | ||
@@ -3404,6 +3490,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, | |||
3404 | if (!capable(CAP_SYS_ADMIN)) | 3490 | if (!capable(CAP_SYS_ADMIN)) |
3405 | return -EACCES; | 3491 | return -EACCES; |
3406 | rv = mddev_lock(mddev); | 3492 | rv = mddev_lock(mddev); |
3493 | if (mddev->hold_active == UNTIL_IOCTL) | ||
3494 | mddev->hold_active = 0; | ||
3407 | if (!rv) { | 3495 | if (!rv) { |
3408 | rv = entry->store(mddev, page, length); | 3496 | rv = entry->store(mddev, page, length); |
3409 | mddev_unlock(mddev); | 3497 | mddev_unlock(mddev); |
@@ -3414,6 +3502,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, | |||
3414 | static void md_free(struct kobject *ko) | 3502 | static void md_free(struct kobject *ko) |
3415 | { | 3503 | { |
3416 | mddev_t *mddev = container_of(ko, mddev_t, kobj); | 3504 | mddev_t *mddev = container_of(ko, mddev_t, kobj); |
3505 | |||
3506 | if (mddev->sysfs_state) | ||
3507 | sysfs_put(mddev->sysfs_state); | ||
3508 | |||
3509 | if (mddev->gendisk) { | ||
3510 | del_gendisk(mddev->gendisk); | ||
3511 | put_disk(mddev->gendisk); | ||
3512 | } | ||
3513 | if (mddev->queue) | ||
3514 | blk_cleanup_queue(mddev->queue); | ||
3515 | |||
3417 | kfree(mddev); | 3516 | kfree(mddev); |
3418 | } | 3517 | } |
3419 | 3518 | ||
@@ -3429,34 +3528,74 @@ static struct kobj_type md_ktype = { | |||
3429 | 3528 | ||
3430 | int mdp_major = 0; | 3529 | int mdp_major = 0; |
3431 | 3530 | ||
3432 | static struct kobject *md_probe(dev_t dev, int *part, void *data) | 3531 | static int md_alloc(dev_t dev, char *name) |
3433 | { | 3532 | { |
3434 | static DEFINE_MUTEX(disks_mutex); | 3533 | static DEFINE_MUTEX(disks_mutex); |
3435 | mddev_t *mddev = mddev_find(dev); | 3534 | mddev_t *mddev = mddev_find(dev); |
3436 | struct gendisk *disk; | 3535 | struct gendisk *disk; |
3437 | int partitioned = (MAJOR(dev) != MD_MAJOR); | 3536 | int partitioned; |
3438 | int shift = partitioned ? MdpMinorShift : 0; | 3537 | int shift; |
3439 | int unit = MINOR(dev) >> shift; | 3538 | int unit; |
3440 | int error; | 3539 | int error; |
3441 | 3540 | ||
3442 | if (!mddev) | 3541 | if (!mddev) |
3443 | return NULL; | 3542 | return -ENODEV; |
3543 | |||
3544 | partitioned = (MAJOR(mddev->unit) != MD_MAJOR); | ||
3545 | shift = partitioned ? MdpMinorShift : 0; | ||
3546 | unit = MINOR(mddev->unit) >> shift; | ||
3547 | |||
3548 | /* wait for any previous instance if this device | ||
3549 | * to be completed removed (mddev_delayed_delete). | ||
3550 | */ | ||
3551 | flush_scheduled_work(); | ||
3444 | 3552 | ||
3445 | mutex_lock(&disks_mutex); | 3553 | mutex_lock(&disks_mutex); |
3446 | if (mddev->gendisk) { | 3554 | if (mddev->gendisk) { |
3447 | mutex_unlock(&disks_mutex); | 3555 | mutex_unlock(&disks_mutex); |
3448 | mddev_put(mddev); | 3556 | mddev_put(mddev); |
3449 | return NULL; | 3557 | return -EEXIST; |
3558 | } | ||
3559 | |||
3560 | if (name) { | ||
3561 | /* Need to ensure that 'name' is not a duplicate. | ||
3562 | */ | ||
3563 | mddev_t *mddev2; | ||
3564 | spin_lock(&all_mddevs_lock); | ||
3565 | |||
3566 | list_for_each_entry(mddev2, &all_mddevs, all_mddevs) | ||
3567 | if (mddev2->gendisk && | ||
3568 | strcmp(mddev2->gendisk->disk_name, name) == 0) { | ||
3569 | spin_unlock(&all_mddevs_lock); | ||
3570 | return -EEXIST; | ||
3571 | } | ||
3572 | spin_unlock(&all_mddevs_lock); | ||
3573 | } | ||
3574 | |||
3575 | mddev->queue = blk_alloc_queue(GFP_KERNEL); | ||
3576 | if (!mddev->queue) { | ||
3577 | mutex_unlock(&disks_mutex); | ||
3578 | mddev_put(mddev); | ||
3579 | return -ENOMEM; | ||
3450 | } | 3580 | } |
3581 | /* Can be unlocked because the queue is new: no concurrency */ | ||
3582 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); | ||
3583 | |||
3584 | blk_queue_make_request(mddev->queue, md_fail_request); | ||
3585 | |||
3451 | disk = alloc_disk(1 << shift); | 3586 | disk = alloc_disk(1 << shift); |
3452 | if (!disk) { | 3587 | if (!disk) { |
3453 | mutex_unlock(&disks_mutex); | 3588 | mutex_unlock(&disks_mutex); |
3589 | blk_cleanup_queue(mddev->queue); | ||
3590 | mddev->queue = NULL; | ||
3454 | mddev_put(mddev); | 3591 | mddev_put(mddev); |
3455 | return NULL; | 3592 | return -ENOMEM; |
3456 | } | 3593 | } |
3457 | disk->major = MAJOR(dev); | 3594 | disk->major = MAJOR(mddev->unit); |
3458 | disk->first_minor = unit << shift; | 3595 | disk->first_minor = unit << shift; |
3459 | if (partitioned) | 3596 | if (name) |
3597 | strcpy(disk->disk_name, name); | ||
3598 | else if (partitioned) | ||
3460 | sprintf(disk->disk_name, "md_d%d", unit); | 3599 | sprintf(disk->disk_name, "md_d%d", unit); |
3461 | else | 3600 | else |
3462 | sprintf(disk->disk_name, "md%d", unit); | 3601 | sprintf(disk->disk_name, "md%d", unit); |
@@ -3464,7 +3603,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
3464 | disk->private_data = mddev; | 3603 | disk->private_data = mddev; |
3465 | disk->queue = mddev->queue; | 3604 | disk->queue = mddev->queue; |
3466 | /* Allow extended partitions. This makes the | 3605 | /* Allow extended partitions. This makes the |
3467 | * 'mdp' device redundant, but we can really | 3606 | * 'mdp' device redundant, but we can't really |
3468 | * remove it now. | 3607 | * remove it now. |
3469 | */ | 3608 | */ |
3470 | disk->flags |= GENHD_FL_EXT_DEVT; | 3609 | disk->flags |= GENHD_FL_EXT_DEVT; |
@@ -3480,9 +3619,35 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
3480 | kobject_uevent(&mddev->kobj, KOBJ_ADD); | 3619 | kobject_uevent(&mddev->kobj, KOBJ_ADD); |
3481 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); | 3620 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); |
3482 | } | 3621 | } |
3622 | mddev_put(mddev); | ||
3623 | return 0; | ||
3624 | } | ||
3625 | |||
3626 | static struct kobject *md_probe(dev_t dev, int *part, void *data) | ||
3627 | { | ||
3628 | md_alloc(dev, NULL); | ||
3483 | return NULL; | 3629 | return NULL; |
3484 | } | 3630 | } |
3485 | 3631 | ||
3632 | static int add_named_array(const char *val, struct kernel_param *kp) | ||
3633 | { | ||
3634 | /* val must be "md_*" where * is not all digits. | ||
3635 | * We allocate an array with a large free minor number, and | ||
3636 | * set the name to val. val must not already be an active name. | ||
3637 | */ | ||
3638 | int len = strlen(val); | ||
3639 | char buf[DISK_NAME_LEN]; | ||
3640 | |||
3641 | while (len && val[len-1] == '\n') | ||
3642 | len--; | ||
3643 | if (len >= DISK_NAME_LEN) | ||
3644 | return -E2BIG; | ||
3645 | strlcpy(buf, val, len+1); | ||
3646 | if (strncmp(buf, "md_", 3) != 0) | ||
3647 | return -EINVAL; | ||
3648 | return md_alloc(0, buf); | ||
3649 | } | ||
3650 | |||
3486 | static void md_safemode_timeout(unsigned long data) | 3651 | static void md_safemode_timeout(unsigned long data) |
3487 | { | 3652 | { |
3488 | mddev_t *mddev = (mddev_t *) data; | 3653 | mddev_t *mddev = (mddev_t *) data; |
@@ -3501,7 +3666,6 @@ static int do_md_run(mddev_t * mddev) | |||
3501 | { | 3666 | { |
3502 | int err; | 3667 | int err; |
3503 | int chunk_size; | 3668 | int chunk_size; |
3504 | struct list_head *tmp; | ||
3505 | mdk_rdev_t *rdev; | 3669 | mdk_rdev_t *rdev; |
3506 | struct gendisk *disk; | 3670 | struct gendisk *disk; |
3507 | struct mdk_personality *pers; | 3671 | struct mdk_personality *pers; |
@@ -3540,7 +3704,7 @@ static int do_md_run(mddev_t * mddev) | |||
3540 | } | 3704 | } |
3541 | 3705 | ||
3542 | /* devices must have minimum size of one chunk */ | 3706 | /* devices must have minimum size of one chunk */ |
3543 | rdev_for_each(rdev, tmp, mddev) { | 3707 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
3544 | if (test_bit(Faulty, &rdev->flags)) | 3708 | if (test_bit(Faulty, &rdev->flags)) |
3545 | continue; | 3709 | continue; |
3546 | if (rdev->size < chunk_size / 1024) { | 3710 | if (rdev->size < chunk_size / 1024) { |
@@ -3565,7 +3729,7 @@ static int do_md_run(mddev_t * mddev) | |||
3565 | * the only valid external interface is through the md | 3729 | * the only valid external interface is through the md |
3566 | * device. | 3730 | * device. |
3567 | */ | 3731 | */ |
3568 | rdev_for_each(rdev, tmp, mddev) { | 3732 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
3569 | if (test_bit(Faulty, &rdev->flags)) | 3733 | if (test_bit(Faulty, &rdev->flags)) |
3570 | continue; | 3734 | continue; |
3571 | sync_blockdev(rdev->bdev); | 3735 | sync_blockdev(rdev->bdev); |
@@ -3630,10 +3794,10 @@ static int do_md_run(mddev_t * mddev) | |||
3630 | */ | 3794 | */ |
3631 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; | 3795 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; |
3632 | mdk_rdev_t *rdev2; | 3796 | mdk_rdev_t *rdev2; |
3633 | struct list_head *tmp2; | ||
3634 | int warned = 0; | 3797 | int warned = 0; |
3635 | rdev_for_each(rdev, tmp, mddev) { | 3798 | |
3636 | rdev_for_each(rdev2, tmp2, mddev) { | 3799 | list_for_each_entry(rdev, &mddev->disks, same_set) |
3800 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | ||
3637 | if (rdev < rdev2 && | 3801 | if (rdev < rdev2 && |
3638 | rdev->bdev->bd_contains == | 3802 | rdev->bdev->bd_contains == |
3639 | rdev2->bdev->bd_contains) { | 3803 | rdev2->bdev->bd_contains) { |
@@ -3647,7 +3811,7 @@ static int do_md_run(mddev_t * mddev) | |||
3647 | warned = 1; | 3811 | warned = 1; |
3648 | } | 3812 | } |
3649 | } | 3813 | } |
3650 | } | 3814 | |
3651 | if (warned) | 3815 | if (warned) |
3652 | printk(KERN_WARNING | 3816 | printk(KERN_WARNING |
3653 | "True protection against single-disk" | 3817 | "True protection against single-disk" |
@@ -3684,6 +3848,7 @@ static int do_md_run(mddev_t * mddev) | |||
3684 | printk(KERN_WARNING | 3848 | printk(KERN_WARNING |
3685 | "md: cannot register extra attributes for %s\n", | 3849 | "md: cannot register extra attributes for %s\n", |
3686 | mdname(mddev)); | 3850 | mdname(mddev)); |
3851 | mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action"); | ||
3687 | } else if (mddev->ro == 2) /* auto-readonly not meaningful */ | 3852 | } else if (mddev->ro == 2) /* auto-readonly not meaningful */ |
3688 | mddev->ro = 0; | 3853 | mddev->ro = 0; |
3689 | 3854 | ||
@@ -3694,7 +3859,7 @@ static int do_md_run(mddev_t * mddev) | |||
3694 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ | 3859 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ |
3695 | mddev->in_sync = 1; | 3860 | mddev->in_sync = 1; |
3696 | 3861 | ||
3697 | rdev_for_each(rdev, tmp, mddev) | 3862 | list_for_each_entry(rdev, &mddev->disks, same_set) |
3698 | if (rdev->raid_disk >= 0) { | 3863 | if (rdev->raid_disk >= 0) { |
3699 | char nm[20]; | 3864 | char nm[20]; |
3700 | sprintf(nm, "rd%d", rdev->raid_disk); | 3865 | sprintf(nm, "rd%d", rdev->raid_disk); |
@@ -3725,9 +3890,8 @@ static int do_md_run(mddev_t * mddev) | |||
3725 | * it will remove the drives and not do the right thing | 3890 | * it will remove the drives and not do the right thing |
3726 | */ | 3891 | */ |
3727 | if (mddev->degraded && !mddev->sync_thread) { | 3892 | if (mddev->degraded && !mddev->sync_thread) { |
3728 | struct list_head *rtmp; | ||
3729 | int spares = 0; | 3893 | int spares = 0; |
3730 | rdev_for_each(rdev, rtmp, mddev) | 3894 | list_for_each_entry(rdev, &mddev->disks, same_set) |
3731 | if (rdev->raid_disk >= 0 && | 3895 | if (rdev->raid_disk >= 0 && |
3732 | !test_bit(In_sync, &rdev->flags) && | 3896 | !test_bit(In_sync, &rdev->flags) && |
3733 | !test_bit(Faulty, &rdev->flags)) | 3897 | !test_bit(Faulty, &rdev->flags)) |
@@ -3754,7 +3918,8 @@ static int do_md_run(mddev_t * mddev) | |||
3754 | mddev->changed = 1; | 3918 | mddev->changed = 1; |
3755 | md_new_event(mddev); | 3919 | md_new_event(mddev); |
3756 | sysfs_notify_dirent(mddev->sysfs_state); | 3920 | sysfs_notify_dirent(mddev->sysfs_state); |
3757 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | 3921 | if (mddev->sysfs_action) |
3922 | sysfs_notify_dirent(mddev->sysfs_action); | ||
3758 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | 3923 | sysfs_notify(&mddev->kobj, NULL, "degraded"); |
3759 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); | 3924 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
3760 | return 0; | 3925 | return 0; |
@@ -3854,9 +4019,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
3854 | mddev->queue->merge_bvec_fn = NULL; | 4019 | mddev->queue->merge_bvec_fn = NULL; |
3855 | mddev->queue->unplug_fn = NULL; | 4020 | mddev->queue->unplug_fn = NULL; |
3856 | mddev->queue->backing_dev_info.congested_fn = NULL; | 4021 | mddev->queue->backing_dev_info.congested_fn = NULL; |
3857 | if (mddev->pers->sync_request) | 4022 | if (mddev->pers->sync_request) { |
3858 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); | 4023 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); |
3859 | 4024 | if (mddev->sysfs_action) | |
4025 | sysfs_put(mddev->sysfs_action); | ||
4026 | mddev->sysfs_action = NULL; | ||
4027 | } | ||
3860 | module_put(mddev->pers->owner); | 4028 | module_put(mddev->pers->owner); |
3861 | mddev->pers = NULL; | 4029 | mddev->pers = NULL; |
3862 | /* tell userspace to handle 'inactive' */ | 4030 | /* tell userspace to handle 'inactive' */ |
@@ -3883,7 +4051,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
3883 | */ | 4051 | */ |
3884 | if (mode == 0) { | 4052 | if (mode == 0) { |
3885 | mdk_rdev_t *rdev; | 4053 | mdk_rdev_t *rdev; |
3886 | struct list_head *tmp; | ||
3887 | 4054 | ||
3888 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); | 4055 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); |
3889 | 4056 | ||
@@ -3895,7 +4062,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
3895 | } | 4062 | } |
3896 | mddev->bitmap_offset = 0; | 4063 | mddev->bitmap_offset = 0; |
3897 | 4064 | ||
3898 | rdev_for_each(rdev, tmp, mddev) | 4065 | list_for_each_entry(rdev, &mddev->disks, same_set) |
3899 | if (rdev->raid_disk >= 0) { | 4066 | if (rdev->raid_disk >= 0) { |
3900 | char nm[20]; | 4067 | char nm[20]; |
3901 | sprintf(nm, "rd%d", rdev->raid_disk); | 4068 | sprintf(nm, "rd%d", rdev->raid_disk); |
@@ -3941,6 +4108,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
3941 | mddev->barriers_work = 0; | 4108 | mddev->barriers_work = 0; |
3942 | mddev->safemode = 0; | 4109 | mddev->safemode = 0; |
3943 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); | 4110 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
4111 | if (mddev->hold_active == UNTIL_STOP) | ||
4112 | mddev->hold_active = 0; | ||
3944 | 4113 | ||
3945 | } else if (mddev->pers) | 4114 | } else if (mddev->pers) |
3946 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 4115 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
@@ -3956,7 +4125,6 @@ out: | |||
3956 | static void autorun_array(mddev_t *mddev) | 4125 | static void autorun_array(mddev_t *mddev) |
3957 | { | 4126 | { |
3958 | mdk_rdev_t *rdev; | 4127 | mdk_rdev_t *rdev; |
3959 | struct list_head *tmp; | ||
3960 | int err; | 4128 | int err; |
3961 | 4129 | ||
3962 | if (list_empty(&mddev->disks)) | 4130 | if (list_empty(&mddev->disks)) |
@@ -3964,7 +4132,7 @@ static void autorun_array(mddev_t *mddev) | |||
3964 | 4132 | ||
3965 | printk(KERN_INFO "md: running: "); | 4133 | printk(KERN_INFO "md: running: "); |
3966 | 4134 | ||
3967 | rdev_for_each(rdev, tmp, mddev) { | 4135 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
3968 | char b[BDEVNAME_SIZE]; | 4136 | char b[BDEVNAME_SIZE]; |
3969 | printk("<%s>", bdevname(rdev->bdev,b)); | 4137 | printk("<%s>", bdevname(rdev->bdev,b)); |
3970 | } | 4138 | } |
@@ -3991,8 +4159,7 @@ static void autorun_array(mddev_t *mddev) | |||
3991 | */ | 4159 | */ |
3992 | static void autorun_devices(int part) | 4160 | static void autorun_devices(int part) |
3993 | { | 4161 | { |
3994 | struct list_head *tmp; | 4162 | mdk_rdev_t *rdev0, *rdev, *tmp; |
3995 | mdk_rdev_t *rdev0, *rdev; | ||
3996 | mddev_t *mddev; | 4163 | mddev_t *mddev; |
3997 | char b[BDEVNAME_SIZE]; | 4164 | char b[BDEVNAME_SIZE]; |
3998 | 4165 | ||
@@ -4007,7 +4174,7 @@ static void autorun_devices(int part) | |||
4007 | printk(KERN_INFO "md: considering %s ...\n", | 4174 | printk(KERN_INFO "md: considering %s ...\n", |
4008 | bdevname(rdev0->bdev,b)); | 4175 | bdevname(rdev0->bdev,b)); |
4009 | INIT_LIST_HEAD(&candidates); | 4176 | INIT_LIST_HEAD(&candidates); |
4010 | rdev_for_each_list(rdev, tmp, pending_raid_disks) | 4177 | rdev_for_each_list(rdev, tmp, &pending_raid_disks) |
4011 | if (super_90_load(rdev, rdev0, 0) >= 0) { | 4178 | if (super_90_load(rdev, rdev0, 0) >= 0) { |
4012 | printk(KERN_INFO "md: adding %s ...\n", | 4179 | printk(KERN_INFO "md: adding %s ...\n", |
4013 | bdevname(rdev->bdev,b)); | 4180 | bdevname(rdev->bdev,b)); |
@@ -4053,7 +4220,7 @@ static void autorun_devices(int part) | |||
4053 | } else { | 4220 | } else { |
4054 | printk(KERN_INFO "md: created %s\n", mdname(mddev)); | 4221 | printk(KERN_INFO "md: created %s\n", mdname(mddev)); |
4055 | mddev->persistent = 1; | 4222 | mddev->persistent = 1; |
4056 | rdev_for_each_list(rdev, tmp, candidates) { | 4223 | rdev_for_each_list(rdev, tmp, &candidates) { |
4057 | list_del_init(&rdev->same_set); | 4224 | list_del_init(&rdev->same_set); |
4058 | if (bind_rdev_to_array(rdev, mddev)) | 4225 | if (bind_rdev_to_array(rdev, mddev)) |
4059 | export_rdev(rdev); | 4226 | export_rdev(rdev); |
@@ -4064,7 +4231,7 @@ static void autorun_devices(int part) | |||
4064 | /* on success, candidates will be empty, on error | 4231 | /* on success, candidates will be empty, on error |
4065 | * it won't... | 4232 | * it won't... |
4066 | */ | 4233 | */ |
4067 | rdev_for_each_list(rdev, tmp, candidates) { | 4234 | rdev_for_each_list(rdev, tmp, &candidates) { |
4068 | list_del_init(&rdev->same_set); | 4235 | list_del_init(&rdev->same_set); |
4069 | export_rdev(rdev); | 4236 | export_rdev(rdev); |
4070 | } | 4237 | } |
@@ -4093,10 +4260,9 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
4093 | mdu_array_info_t info; | 4260 | mdu_array_info_t info; |
4094 | int nr,working,active,failed,spare; | 4261 | int nr,working,active,failed,spare; |
4095 | mdk_rdev_t *rdev; | 4262 | mdk_rdev_t *rdev; |
4096 | struct list_head *tmp; | ||
4097 | 4263 | ||
4098 | nr=working=active=failed=spare=0; | 4264 | nr=working=active=failed=spare=0; |
4099 | rdev_for_each(rdev, tmp, mddev) { | 4265 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
4100 | nr++; | 4266 | nr++; |
4101 | if (test_bit(Faulty, &rdev->flags)) | 4267 | if (test_bit(Faulty, &rdev->flags)) |
4102 | failed++; | 4268 | failed++; |
@@ -4614,9 +4780,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4614 | 4780 | ||
4615 | static int update_size(mddev_t *mddev, sector_t num_sectors) | 4781 | static int update_size(mddev_t *mddev, sector_t num_sectors) |
4616 | { | 4782 | { |
4617 | mdk_rdev_t * rdev; | 4783 | mdk_rdev_t *rdev; |
4618 | int rv; | 4784 | int rv; |
4619 | struct list_head *tmp; | ||
4620 | int fit = (num_sectors == 0); | 4785 | int fit = (num_sectors == 0); |
4621 | 4786 | ||
4622 | if (mddev->pers->resize == NULL) | 4787 | if (mddev->pers->resize == NULL) |
@@ -4638,7 +4803,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) | |||
4638 | * grow, and re-add. | 4803 | * grow, and re-add. |
4639 | */ | 4804 | */ |
4640 | return -EBUSY; | 4805 | return -EBUSY; |
4641 | rdev_for_each(rdev, tmp, mddev) { | 4806 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
4642 | sector_t avail; | 4807 | sector_t avail; |
4643 | avail = rdev->size * 2; | 4808 | avail = rdev->size * 2; |
4644 | 4809 | ||
@@ -5000,6 +5165,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
5000 | 5165 | ||
5001 | done_unlock: | 5166 | done_unlock: |
5002 | abort_unlock: | 5167 | abort_unlock: |
5168 | if (mddev->hold_active == UNTIL_IOCTL && | ||
5169 | err != -EINVAL) | ||
5170 | mddev->hold_active = 0; | ||
5003 | mddev_unlock(mddev); | 5171 | mddev_unlock(mddev); |
5004 | 5172 | ||
5005 | return err; | 5173 | return err; |
@@ -5016,14 +5184,25 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
5016 | * Succeed if we can lock the mddev, which confirms that | 5184 | * Succeed if we can lock the mddev, which confirms that |
5017 | * it isn't being stopped right now. | 5185 | * it isn't being stopped right now. |
5018 | */ | 5186 | */ |
5019 | mddev_t *mddev = bdev->bd_disk->private_data; | 5187 | mddev_t *mddev = mddev_find(bdev->bd_dev); |
5020 | int err; | 5188 | int err; |
5021 | 5189 | ||
5190 | if (mddev->gendisk != bdev->bd_disk) { | ||
5191 | /* we are racing with mddev_put which is discarding this | ||
5192 | * bd_disk. | ||
5193 | */ | ||
5194 | mddev_put(mddev); | ||
5195 | /* Wait until bdev->bd_disk is definitely gone */ | ||
5196 | flush_scheduled_work(); | ||
5197 | /* Then retry the open from the top */ | ||
5198 | return -ERESTARTSYS; | ||
5199 | } | ||
5200 | BUG_ON(mddev != bdev->bd_disk->private_data); | ||
5201 | |||
5022 | if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) | 5202 | if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) |
5023 | goto out; | 5203 | goto out; |
5024 | 5204 | ||
5025 | err = 0; | 5205 | err = 0; |
5026 | mddev_get(mddev); | ||
5027 | atomic_inc(&mddev->openers); | 5206 | atomic_inc(&mddev->openers); |
5028 | mddev_unlock(mddev); | 5207 | mddev_unlock(mddev); |
5029 | 5208 | ||
@@ -5187,11 +5366,10 @@ static void status_unused(struct seq_file *seq) | |||
5187 | { | 5366 | { |
5188 | int i = 0; | 5367 | int i = 0; |
5189 | mdk_rdev_t *rdev; | 5368 | mdk_rdev_t *rdev; |
5190 | struct list_head *tmp; | ||
5191 | 5369 | ||
5192 | seq_printf(seq, "unused devices: "); | 5370 | seq_printf(seq, "unused devices: "); |
5193 | 5371 | ||
5194 | rdev_for_each_list(rdev, tmp, pending_raid_disks) { | 5372 | list_for_each_entry(rdev, &pending_raid_disks, same_set) { |
5195 | char b[BDEVNAME_SIZE]; | 5373 | char b[BDEVNAME_SIZE]; |
5196 | i++; | 5374 | i++; |
5197 | seq_printf(seq, "%s ", | 5375 | seq_printf(seq, "%s ", |
@@ -5350,7 +5528,6 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5350 | { | 5528 | { |
5351 | mddev_t *mddev = v; | 5529 | mddev_t *mddev = v; |
5352 | sector_t size; | 5530 | sector_t size; |
5353 | struct list_head *tmp2; | ||
5354 | mdk_rdev_t *rdev; | 5531 | mdk_rdev_t *rdev; |
5355 | struct mdstat_info *mi = seq->private; | 5532 | struct mdstat_info *mi = seq->private; |
5356 | struct bitmap *bitmap; | 5533 | struct bitmap *bitmap; |
@@ -5387,7 +5564,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5387 | } | 5564 | } |
5388 | 5565 | ||
5389 | size = 0; | 5566 | size = 0; |
5390 | rdev_for_each(rdev, tmp2, mddev) { | 5567 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
5391 | char b[BDEVNAME_SIZE]; | 5568 | char b[BDEVNAME_SIZE]; |
5392 | seq_printf(seq, " %s[%d]", | 5569 | seq_printf(seq, " %s[%d]", |
5393 | bdevname(rdev->bdev,b), rdev->desc_nr); | 5570 | bdevname(rdev->bdev,b), rdev->desc_nr); |
@@ -5694,7 +5871,6 @@ void md_do_sync(mddev_t *mddev) | |||
5694 | struct list_head *tmp; | 5871 | struct list_head *tmp; |
5695 | sector_t last_check; | 5872 | sector_t last_check; |
5696 | int skipped = 0; | 5873 | int skipped = 0; |
5697 | struct list_head *rtmp; | ||
5698 | mdk_rdev_t *rdev; | 5874 | mdk_rdev_t *rdev; |
5699 | char *desc; | 5875 | char *desc; |
5700 | 5876 | ||
@@ -5799,7 +5975,7 @@ void md_do_sync(mddev_t *mddev) | |||
5799 | /* recovery follows the physical size of devices */ | 5975 | /* recovery follows the physical size of devices */ |
5800 | max_sectors = mddev->size << 1; | 5976 | max_sectors = mddev->size << 1; |
5801 | j = MaxSector; | 5977 | j = MaxSector; |
5802 | rdev_for_each(rdev, rtmp, mddev) | 5978 | list_for_each_entry(rdev, &mddev->disks, same_set) |
5803 | if (rdev->raid_disk >= 0 && | 5979 | if (rdev->raid_disk >= 0 && |
5804 | !test_bit(Faulty, &rdev->flags) && | 5980 | !test_bit(Faulty, &rdev->flags) && |
5805 | !test_bit(In_sync, &rdev->flags) && | 5981 | !test_bit(In_sync, &rdev->flags) && |
@@ -5949,7 +6125,7 @@ void md_do_sync(mddev_t *mddev) | |||
5949 | } else { | 6125 | } else { |
5950 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | 6126 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
5951 | mddev->curr_resync = MaxSector; | 6127 | mddev->curr_resync = MaxSector; |
5952 | rdev_for_each(rdev, rtmp, mddev) | 6128 | list_for_each_entry(rdev, &mddev->disks, same_set) |
5953 | if (rdev->raid_disk >= 0 && | 6129 | if (rdev->raid_disk >= 0 && |
5954 | !test_bit(Faulty, &rdev->flags) && | 6130 | !test_bit(Faulty, &rdev->flags) && |
5955 | !test_bit(In_sync, &rdev->flags) && | 6131 | !test_bit(In_sync, &rdev->flags) && |
@@ -5985,10 +6161,9 @@ EXPORT_SYMBOL_GPL(md_do_sync); | |||
5985 | static int remove_and_add_spares(mddev_t *mddev) | 6161 | static int remove_and_add_spares(mddev_t *mddev) |
5986 | { | 6162 | { |
5987 | mdk_rdev_t *rdev; | 6163 | mdk_rdev_t *rdev; |
5988 | struct list_head *rtmp; | ||
5989 | int spares = 0; | 6164 | int spares = 0; |
5990 | 6165 | ||
5991 | rdev_for_each(rdev, rtmp, mddev) | 6166 | list_for_each_entry(rdev, &mddev->disks, same_set) |
5992 | if (rdev->raid_disk >= 0 && | 6167 | if (rdev->raid_disk >= 0 && |
5993 | !test_bit(Blocked, &rdev->flags) && | 6168 | !test_bit(Blocked, &rdev->flags) && |
5994 | (test_bit(Faulty, &rdev->flags) || | 6169 | (test_bit(Faulty, &rdev->flags) || |
@@ -6003,8 +6178,8 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
6003 | } | 6178 | } |
6004 | } | 6179 | } |
6005 | 6180 | ||
6006 | if (mddev->degraded && ! mddev->ro) { | 6181 | if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) { |
6007 | rdev_for_each(rdev, rtmp, mddev) { | 6182 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
6008 | if (rdev->raid_disk >= 0 && | 6183 | if (rdev->raid_disk >= 0 && |
6009 | !test_bit(In_sync, &rdev->flags) && | 6184 | !test_bit(In_sync, &rdev->flags) && |
6010 | !test_bit(Blocked, &rdev->flags)) | 6185 | !test_bit(Blocked, &rdev->flags)) |
@@ -6056,7 +6231,6 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
6056 | void md_check_recovery(mddev_t *mddev) | 6231 | void md_check_recovery(mddev_t *mddev) |
6057 | { | 6232 | { |
6058 | mdk_rdev_t *rdev; | 6233 | mdk_rdev_t *rdev; |
6059 | struct list_head *rtmp; | ||
6060 | 6234 | ||
6061 | 6235 | ||
6062 | if (mddev->bitmap) | 6236 | if (mddev->bitmap) |
@@ -6120,7 +6294,7 @@ void md_check_recovery(mddev_t *mddev) | |||
6120 | if (mddev->flags) | 6294 | if (mddev->flags) |
6121 | md_update_sb(mddev, 0); | 6295 | md_update_sb(mddev, 0); |
6122 | 6296 | ||
6123 | rdev_for_each(rdev, rtmp, mddev) | 6297 | list_for_each_entry(rdev, &mddev->disks, same_set) |
6124 | if (test_and_clear_bit(StateChanged, &rdev->flags)) | 6298 | if (test_and_clear_bit(StateChanged, &rdev->flags)) |
6125 | sysfs_notify_dirent(rdev->sysfs_state); | 6299 | sysfs_notify_dirent(rdev->sysfs_state); |
6126 | 6300 | ||
@@ -6149,13 +6323,13 @@ void md_check_recovery(mddev_t *mddev) | |||
6149 | * information must be scrapped | 6323 | * information must be scrapped |
6150 | */ | 6324 | */ |
6151 | if (!mddev->degraded) | 6325 | if (!mddev->degraded) |
6152 | rdev_for_each(rdev, rtmp, mddev) | 6326 | list_for_each_entry(rdev, &mddev->disks, same_set) |
6153 | rdev->saved_raid_disk = -1; | 6327 | rdev->saved_raid_disk = -1; |
6154 | 6328 | ||
6155 | mddev->recovery = 0; | 6329 | mddev->recovery = 0; |
6156 | /* flag recovery needed just to double check */ | 6330 | /* flag recovery needed just to double check */ |
6157 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 6331 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
6158 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | 6332 | sysfs_notify_dirent(mddev->sysfs_action); |
6159 | md_new_event(mddev); | 6333 | md_new_event(mddev); |
6160 | goto unlock; | 6334 | goto unlock; |
6161 | } | 6335 | } |
@@ -6216,7 +6390,7 @@ void md_check_recovery(mddev_t *mddev) | |||
6216 | mddev->recovery = 0; | 6390 | mddev->recovery = 0; |
6217 | } else | 6391 | } else |
6218 | md_wakeup_thread(mddev->sync_thread); | 6392 | md_wakeup_thread(mddev->sync_thread); |
6219 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | 6393 | sysfs_notify_dirent(mddev->sysfs_action); |
6220 | md_new_event(mddev); | 6394 | md_new_event(mddev); |
6221 | } | 6395 | } |
6222 | unlock: | 6396 | unlock: |
@@ -6224,7 +6398,8 @@ void md_check_recovery(mddev_t *mddev) | |||
6224 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 6398 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
6225 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, | 6399 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, |
6226 | &mddev->recovery)) | 6400 | &mddev->recovery)) |
6227 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | 6401 | if (mddev->sysfs_action) |
6402 | sysfs_notify_dirent(mddev->sysfs_action); | ||
6228 | } | 6403 | } |
6229 | mddev_unlock(mddev); | 6404 | mddev_unlock(mddev); |
6230 | } | 6405 | } |
@@ -6386,14 +6561,8 @@ static __exit void md_exit(void) | |||
6386 | unregister_sysctl_table(raid_table_header); | 6561 | unregister_sysctl_table(raid_table_header); |
6387 | remove_proc_entry("mdstat", NULL); | 6562 | remove_proc_entry("mdstat", NULL); |
6388 | for_each_mddev(mddev, tmp) { | 6563 | for_each_mddev(mddev, tmp) { |
6389 | struct gendisk *disk = mddev->gendisk; | ||
6390 | if (!disk) | ||
6391 | continue; | ||
6392 | export_array(mddev); | 6564 | export_array(mddev); |
6393 | del_gendisk(disk); | 6565 | mddev->hold_active = 0; |
6394 | put_disk(disk); | ||
6395 | mddev->gendisk = NULL; | ||
6396 | mddev_put(mddev); | ||
6397 | } | 6566 | } |
6398 | } | 6567 | } |
6399 | 6568 | ||
@@ -6418,6 +6587,7 @@ static int set_ro(const char *val, struct kernel_param *kp) | |||
6418 | module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); | 6587 | module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); |
6419 | module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); | 6588 | module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); |
6420 | 6589 | ||
6590 | module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); | ||
6421 | 6591 | ||
6422 | EXPORT_SYMBOL(register_md_personality); | 6592 | EXPORT_SYMBOL(register_md_personality); |
6423 | EXPORT_SYMBOL(unregister_md_personality); | 6593 | EXPORT_SYMBOL(unregister_md_personality); |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index d4ac47d11279..f6d08f241671 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -408,7 +408,6 @@ static int multipath_run (mddev_t *mddev) | |||
408 | int disk_idx; | 408 | int disk_idx; |
409 | struct multipath_info *disk; | 409 | struct multipath_info *disk; |
410 | mdk_rdev_t *rdev; | 410 | mdk_rdev_t *rdev; |
411 | struct list_head *tmp; | ||
412 | 411 | ||
413 | if (mddev->level != LEVEL_MULTIPATH) { | 412 | if (mddev->level != LEVEL_MULTIPATH) { |
414 | printk("multipath: %s: raid level not set to multipath IO (%d)\n", | 413 | printk("multipath: %s: raid level not set to multipath IO (%d)\n", |
@@ -441,7 +440,7 @@ static int multipath_run (mddev_t *mddev) | |||
441 | } | 440 | } |
442 | 441 | ||
443 | conf->working_disks = 0; | 442 | conf->working_disks = 0; |
444 | rdev_for_each(rdev, tmp, mddev) { | 443 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
445 | disk_idx = rdev->raid_disk; | 444 | disk_idx = rdev->raid_disk; |
446 | if (disk_idx < 0 || | 445 | if (disk_idx < 0 || |
447 | disk_idx >= mddev->raid_disks) | 446 | disk_idx >= mddev->raid_disks) |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 8ac6488ad0dc..c605ba805586 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -53,11 +53,10 @@ static int raid0_congested(void *data, int bits) | |||
53 | static int create_strip_zones (mddev_t *mddev) | 53 | static int create_strip_zones (mddev_t *mddev) |
54 | { | 54 | { |
55 | int i, c, j; | 55 | int i, c, j; |
56 | sector_t current_offset, curr_zone_offset; | 56 | sector_t current_start, curr_zone_start; |
57 | sector_t min_spacing; | 57 | sector_t min_spacing; |
58 | raid0_conf_t *conf = mddev_to_conf(mddev); | 58 | raid0_conf_t *conf = mddev_to_conf(mddev); |
59 | mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; | 59 | mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; |
60 | struct list_head *tmp1, *tmp2; | ||
61 | struct strip_zone *zone; | 60 | struct strip_zone *zone; |
62 | int cnt; | 61 | int cnt; |
63 | char b[BDEVNAME_SIZE]; | 62 | char b[BDEVNAME_SIZE]; |
@@ -67,19 +66,19 @@ static int create_strip_zones (mddev_t *mddev) | |||
67 | */ | 66 | */ |
68 | conf->nr_strip_zones = 0; | 67 | conf->nr_strip_zones = 0; |
69 | 68 | ||
70 | rdev_for_each(rdev1, tmp1, mddev) { | 69 | list_for_each_entry(rdev1, &mddev->disks, same_set) { |
71 | printk("raid0: looking at %s\n", | 70 | printk(KERN_INFO "raid0: looking at %s\n", |
72 | bdevname(rdev1->bdev,b)); | 71 | bdevname(rdev1->bdev,b)); |
73 | c = 0; | 72 | c = 0; |
74 | rdev_for_each(rdev2, tmp2, mddev) { | 73 | list_for_each_entry(rdev2, &mddev->disks, same_set) { |
75 | printk("raid0: comparing %s(%llu)", | 74 | printk(KERN_INFO "raid0: comparing %s(%llu)", |
76 | bdevname(rdev1->bdev,b), | 75 | bdevname(rdev1->bdev,b), |
77 | (unsigned long long)rdev1->size); | 76 | (unsigned long long)rdev1->size); |
78 | printk(" with %s(%llu)\n", | 77 | printk(KERN_INFO " with %s(%llu)\n", |
79 | bdevname(rdev2->bdev,b), | 78 | bdevname(rdev2->bdev,b), |
80 | (unsigned long long)rdev2->size); | 79 | (unsigned long long)rdev2->size); |
81 | if (rdev2 == rdev1) { | 80 | if (rdev2 == rdev1) { |
82 | printk("raid0: END\n"); | 81 | printk(KERN_INFO "raid0: END\n"); |
83 | break; | 82 | break; |
84 | } | 83 | } |
85 | if (rdev2->size == rdev1->size) | 84 | if (rdev2->size == rdev1->size) |
@@ -88,19 +87,20 @@ static int create_strip_zones (mddev_t *mddev) | |||
88 | * Not unique, don't count it as a new | 87 | * Not unique, don't count it as a new |
89 | * group | 88 | * group |
90 | */ | 89 | */ |
91 | printk("raid0: EQUAL\n"); | 90 | printk(KERN_INFO "raid0: EQUAL\n"); |
92 | c = 1; | 91 | c = 1; |
93 | break; | 92 | break; |
94 | } | 93 | } |
95 | printk("raid0: NOT EQUAL\n"); | 94 | printk(KERN_INFO "raid0: NOT EQUAL\n"); |
96 | } | 95 | } |
97 | if (!c) { | 96 | if (!c) { |
98 | printk("raid0: ==> UNIQUE\n"); | 97 | printk(KERN_INFO "raid0: ==> UNIQUE\n"); |
99 | conf->nr_strip_zones++; | 98 | conf->nr_strip_zones++; |
100 | printk("raid0: %d zones\n", conf->nr_strip_zones); | 99 | printk(KERN_INFO "raid0: %d zones\n", |
100 | conf->nr_strip_zones); | ||
101 | } | 101 | } |
102 | } | 102 | } |
103 | printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); | 103 | printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones); |
104 | 104 | ||
105 | conf->strip_zone = kzalloc(sizeof(struct strip_zone)* | 105 | conf->strip_zone = kzalloc(sizeof(struct strip_zone)* |
106 | conf->nr_strip_zones, GFP_KERNEL); | 106 | conf->nr_strip_zones, GFP_KERNEL); |
@@ -119,16 +119,17 @@ static int create_strip_zones (mddev_t *mddev) | |||
119 | cnt = 0; | 119 | cnt = 0; |
120 | smallest = NULL; | 120 | smallest = NULL; |
121 | zone->dev = conf->devlist; | 121 | zone->dev = conf->devlist; |
122 | rdev_for_each(rdev1, tmp1, mddev) { | 122 | list_for_each_entry(rdev1, &mddev->disks, same_set) { |
123 | int j = rdev1->raid_disk; | 123 | int j = rdev1->raid_disk; |
124 | 124 | ||
125 | if (j < 0 || j >= mddev->raid_disks) { | 125 | if (j < 0 || j >= mddev->raid_disks) { |
126 | printk("raid0: bad disk number %d - aborting!\n", j); | 126 | printk(KERN_ERR "raid0: bad disk number %d - " |
127 | "aborting!\n", j); | ||
127 | goto abort; | 128 | goto abort; |
128 | } | 129 | } |
129 | if (zone->dev[j]) { | 130 | if (zone->dev[j]) { |
130 | printk("raid0: multiple devices for %d - aborting!\n", | 131 | printk(KERN_ERR "raid0: multiple devices for %d - " |
131 | j); | 132 | "aborting!\n", j); |
132 | goto abort; | 133 | goto abort; |
133 | } | 134 | } |
134 | zone->dev[j] = rdev1; | 135 | zone->dev[j] = rdev1; |
@@ -149,16 +150,16 @@ static int create_strip_zones (mddev_t *mddev) | |||
149 | cnt++; | 150 | cnt++; |
150 | } | 151 | } |
151 | if (cnt != mddev->raid_disks) { | 152 | if (cnt != mddev->raid_disks) { |
152 | printk("raid0: too few disks (%d of %d) - aborting!\n", | 153 | printk(KERN_ERR "raid0: too few disks (%d of %d) - " |
153 | cnt, mddev->raid_disks); | 154 | "aborting!\n", cnt, mddev->raid_disks); |
154 | goto abort; | 155 | goto abort; |
155 | } | 156 | } |
156 | zone->nb_dev = cnt; | 157 | zone->nb_dev = cnt; |
157 | zone->size = smallest->size * cnt; | 158 | zone->sectors = smallest->size * cnt * 2; |
158 | zone->zone_offset = 0; | 159 | zone->zone_start = 0; |
159 | 160 | ||
160 | current_offset = smallest->size; | 161 | current_start = smallest->size * 2; |
161 | curr_zone_offset = zone->size; | 162 | curr_zone_start = zone->sectors; |
162 | 163 | ||
163 | /* now do the other zones */ | 164 | /* now do the other zones */ |
164 | for (i = 1; i < conf->nr_strip_zones; i++) | 165 | for (i = 1; i < conf->nr_strip_zones; i++) |
@@ -166,40 +167,41 @@ static int create_strip_zones (mddev_t *mddev) | |||
166 | zone = conf->strip_zone + i; | 167 | zone = conf->strip_zone + i; |
167 | zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; | 168 | zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; |
168 | 169 | ||
169 | printk("raid0: zone %d\n", i); | 170 | printk(KERN_INFO "raid0: zone %d\n", i); |
170 | zone->dev_offset = current_offset; | 171 | zone->dev_start = current_start; |
171 | smallest = NULL; | 172 | smallest = NULL; |
172 | c = 0; | 173 | c = 0; |
173 | 174 | ||
174 | for (j=0; j<cnt; j++) { | 175 | for (j=0; j<cnt; j++) { |
175 | char b[BDEVNAME_SIZE]; | 176 | char b[BDEVNAME_SIZE]; |
176 | rdev = conf->strip_zone[0].dev[j]; | 177 | rdev = conf->strip_zone[0].dev[j]; |
177 | printk("raid0: checking %s ...", bdevname(rdev->bdev,b)); | 178 | printk(KERN_INFO "raid0: checking %s ...", |
178 | if (rdev->size > current_offset) | 179 | bdevname(rdev->bdev, b)); |
179 | { | 180 | if (rdev->size > current_start / 2) { |
180 | printk(" contained as device %d\n", c); | 181 | printk(KERN_INFO " contained as device %d\n", |
182 | c); | ||
181 | zone->dev[c] = rdev; | 183 | zone->dev[c] = rdev; |
182 | c++; | 184 | c++; |
183 | if (!smallest || (rdev->size <smallest->size)) { | 185 | if (!smallest || (rdev->size <smallest->size)) { |
184 | smallest = rdev; | 186 | smallest = rdev; |
185 | printk(" (%llu) is smallest!.\n", | 187 | printk(KERN_INFO " (%llu) is smallest!.\n", |
186 | (unsigned long long)rdev->size); | 188 | (unsigned long long)rdev->size); |
187 | } | 189 | } |
188 | } else | 190 | } else |
189 | printk(" nope.\n"); | 191 | printk(KERN_INFO " nope.\n"); |
190 | } | 192 | } |
191 | 193 | ||
192 | zone->nb_dev = c; | 194 | zone->nb_dev = c; |
193 | zone->size = (smallest->size - current_offset) * c; | 195 | zone->sectors = (smallest->size * 2 - current_start) * c; |
194 | printk("raid0: zone->nb_dev: %d, size: %llu\n", | 196 | printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", |
195 | zone->nb_dev, (unsigned long long)zone->size); | 197 | zone->nb_dev, (unsigned long long)zone->sectors); |
196 | 198 | ||
197 | zone->zone_offset = curr_zone_offset; | 199 | zone->zone_start = curr_zone_start; |
198 | curr_zone_offset += zone->size; | 200 | curr_zone_start += zone->sectors; |
199 | 201 | ||
200 | current_offset = smallest->size; | 202 | current_start = smallest->size * 2; |
201 | printk("raid0: current zone offset: %llu\n", | 203 | printk(KERN_INFO "raid0: current zone start: %llu\n", |
202 | (unsigned long long)current_offset); | 204 | (unsigned long long)current_start); |
203 | } | 205 | } |
204 | 206 | ||
205 | /* Now find appropriate hash spacing. | 207 | /* Now find appropriate hash spacing. |
@@ -210,16 +212,16 @@ static int create_strip_zones (mddev_t *mddev) | |||
210 | * strip though as it's size has no bearing on the efficacy of the hash | 212 | * strip though as it's size has no bearing on the efficacy of the hash |
211 | * table. | 213 | * table. |
212 | */ | 214 | */ |
213 | conf->hash_spacing = curr_zone_offset; | 215 | conf->spacing = curr_zone_start; |
214 | min_spacing = curr_zone_offset; | 216 | min_spacing = curr_zone_start; |
215 | sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); | 217 | sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); |
216 | for (i=0; i < conf->nr_strip_zones-1; i++) { | 218 | for (i=0; i < conf->nr_strip_zones-1; i++) { |
217 | sector_t sz = 0; | 219 | sector_t s = 0; |
218 | for (j=i; j<conf->nr_strip_zones-1 && | 220 | for (j = i; j < conf->nr_strip_zones - 1 && |
219 | sz < min_spacing ; j++) | 221 | s < min_spacing; j++) |
220 | sz += conf->strip_zone[j].size; | 222 | s += conf->strip_zone[j].sectors; |
221 | if (sz >= min_spacing && sz < conf->hash_spacing) | 223 | if (s >= min_spacing && s < conf->spacing) |
222 | conf->hash_spacing = sz; | 224 | conf->spacing = s; |
223 | } | 225 | } |
224 | 226 | ||
225 | mddev->queue->unplug_fn = raid0_unplug; | 227 | mddev->queue->unplug_fn = raid0_unplug; |
@@ -227,7 +229,7 @@ static int create_strip_zones (mddev_t *mddev) | |||
227 | mddev->queue->backing_dev_info.congested_fn = raid0_congested; | 229 | mddev->queue->backing_dev_info.congested_fn = raid0_congested; |
228 | mddev->queue->backing_dev_info.congested_data = mddev; | 230 | mddev->queue->backing_dev_info.congested_data = mddev; |
229 | 231 | ||
230 | printk("raid0: done.\n"); | 232 | printk(KERN_INFO "raid0: done.\n"); |
231 | return 0; | 233 | return 0; |
232 | abort: | 234 | abort: |
233 | return 1; | 235 | return 1; |
@@ -262,10 +264,9 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
262 | static int raid0_run (mddev_t *mddev) | 264 | static int raid0_run (mddev_t *mddev) |
263 | { | 265 | { |
264 | unsigned cur=0, i=0, nb_zone; | 266 | unsigned cur=0, i=0, nb_zone; |
265 | s64 size; | 267 | s64 sectors; |
266 | raid0_conf_t *conf; | 268 | raid0_conf_t *conf; |
267 | mdk_rdev_t *rdev; | 269 | mdk_rdev_t *rdev; |
268 | struct list_head *tmp; | ||
269 | 270 | ||
270 | if (mddev->chunk_size == 0) { | 271 | if (mddev->chunk_size == 0) { |
271 | printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); | 272 | printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); |
@@ -291,54 +292,54 @@ static int raid0_run (mddev_t *mddev) | |||
291 | 292 | ||
292 | /* calculate array device size */ | 293 | /* calculate array device size */ |
293 | mddev->array_sectors = 0; | 294 | mddev->array_sectors = 0; |
294 | rdev_for_each(rdev, tmp, mddev) | 295 | list_for_each_entry(rdev, &mddev->disks, same_set) |
295 | mddev->array_sectors += rdev->size * 2; | 296 | mddev->array_sectors += rdev->size * 2; |
296 | 297 | ||
297 | printk("raid0 : md_size is %llu blocks.\n", | 298 | printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", |
298 | (unsigned long long)mddev->array_sectors / 2); | 299 | (unsigned long long)mddev->array_sectors); |
299 | printk("raid0 : conf->hash_spacing is %llu blocks.\n", | 300 | printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", |
300 | (unsigned long long)conf->hash_spacing); | 301 | (unsigned long long)conf->spacing); |
301 | { | 302 | { |
302 | sector_t s = mddev->array_sectors / 2; | 303 | sector_t s = mddev->array_sectors; |
303 | sector_t space = conf->hash_spacing; | 304 | sector_t space = conf->spacing; |
304 | int round; | 305 | int round; |
305 | conf->preshift = 0; | 306 | conf->sector_shift = 0; |
306 | if (sizeof(sector_t) > sizeof(u32)) { | 307 | if (sizeof(sector_t) > sizeof(u32)) { |
307 | /*shift down space and s so that sector_div will work */ | 308 | /*shift down space and s so that sector_div will work */ |
308 | while (space > (sector_t) (~(u32)0)) { | 309 | while (space > (sector_t) (~(u32)0)) { |
309 | s >>= 1; | 310 | s >>= 1; |
310 | space >>= 1; | 311 | space >>= 1; |
311 | s += 1; /* force round-up */ | 312 | s += 1; /* force round-up */ |
312 | conf->preshift++; | 313 | conf->sector_shift++; |
313 | } | 314 | } |
314 | } | 315 | } |
315 | round = sector_div(s, (u32)space) ? 1 : 0; | 316 | round = sector_div(s, (u32)space) ? 1 : 0; |
316 | nb_zone = s + round; | 317 | nb_zone = s + round; |
317 | } | 318 | } |
318 | printk("raid0 : nb_zone is %d.\n", nb_zone); | 319 | printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone); |
319 | 320 | ||
320 | printk("raid0 : Allocating %Zd bytes for hash.\n", | 321 | printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n", |
321 | nb_zone*sizeof(struct strip_zone*)); | 322 | nb_zone*sizeof(struct strip_zone*)); |
322 | conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); | 323 | conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); |
323 | if (!conf->hash_table) | 324 | if (!conf->hash_table) |
324 | goto out_free_conf; | 325 | goto out_free_conf; |
325 | size = conf->strip_zone[cur].size; | 326 | sectors = conf->strip_zone[cur].sectors; |
326 | 327 | ||
327 | conf->hash_table[0] = conf->strip_zone + cur; | 328 | conf->hash_table[0] = conf->strip_zone + cur; |
328 | for (i=1; i< nb_zone; i++) { | 329 | for (i=1; i< nb_zone; i++) { |
329 | while (size <= conf->hash_spacing) { | 330 | while (sectors <= conf->spacing) { |
330 | cur++; | 331 | cur++; |
331 | size += conf->strip_zone[cur].size; | 332 | sectors += conf->strip_zone[cur].sectors; |
332 | } | 333 | } |
333 | size -= conf->hash_spacing; | 334 | sectors -= conf->spacing; |
334 | conf->hash_table[i] = conf->strip_zone + cur; | 335 | conf->hash_table[i] = conf->strip_zone + cur; |
335 | } | 336 | } |
336 | if (conf->preshift) { | 337 | if (conf->sector_shift) { |
337 | conf->hash_spacing >>= conf->preshift; | 338 | conf->spacing >>= conf->sector_shift; |
338 | /* round hash_spacing up so when we divide by it, we | 339 | /* round spacing up so when we divide by it, we |
339 | * err on the side of too-low, which is safest | 340 | * err on the side of too-low, which is safest |
340 | */ | 341 | */ |
341 | conf->hash_spacing++; | 342 | conf->spacing++; |
342 | } | 343 | } |
343 | 344 | ||
344 | /* calculate the max read-ahead size. | 345 | /* calculate the max read-ahead size. |
@@ -387,12 +388,12 @@ static int raid0_stop (mddev_t *mddev) | |||
387 | static int raid0_make_request (struct request_queue *q, struct bio *bio) | 388 | static int raid0_make_request (struct request_queue *q, struct bio *bio) |
388 | { | 389 | { |
389 | mddev_t *mddev = q->queuedata; | 390 | mddev_t *mddev = q->queuedata; |
390 | unsigned int sect_in_chunk, chunksize_bits, chunk_size, chunk_sects; | 391 | unsigned int sect_in_chunk, chunksect_bits, chunk_sects; |
391 | raid0_conf_t *conf = mddev_to_conf(mddev); | 392 | raid0_conf_t *conf = mddev_to_conf(mddev); |
392 | struct strip_zone *zone; | 393 | struct strip_zone *zone; |
393 | mdk_rdev_t *tmp_dev; | 394 | mdk_rdev_t *tmp_dev; |
394 | sector_t chunk; | 395 | sector_t chunk; |
395 | sector_t block, rsect; | 396 | sector_t sector, rsect; |
396 | const int rw = bio_data_dir(bio); | 397 | const int rw = bio_data_dir(bio); |
397 | int cpu; | 398 | int cpu; |
398 | 399 | ||
@@ -407,11 +408,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
407 | bio_sectors(bio)); | 408 | bio_sectors(bio)); |
408 | part_stat_unlock(); | 409 | part_stat_unlock(); |
409 | 410 | ||
410 | chunk_size = mddev->chunk_size >> 10; | ||
411 | chunk_sects = mddev->chunk_size >> 9; | 411 | chunk_sects = mddev->chunk_size >> 9; |
412 | chunksize_bits = ffz(~chunk_size); | 412 | chunksect_bits = ffz(~chunk_sects); |
413 | block = bio->bi_sector >> 1; | 413 | sector = bio->bi_sector; |
414 | |||
415 | 414 | ||
416 | if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { | 415 | if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { |
417 | struct bio_pair *bp; | 416 | struct bio_pair *bp; |
@@ -434,28 +433,27 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
434 | 433 | ||
435 | 434 | ||
436 | { | 435 | { |
437 | sector_t x = block >> conf->preshift; | 436 | sector_t x = sector >> conf->sector_shift; |
438 | sector_div(x, (u32)conf->hash_spacing); | 437 | sector_div(x, (u32)conf->spacing); |
439 | zone = conf->hash_table[x]; | 438 | zone = conf->hash_table[x]; |
440 | } | 439 | } |
441 | 440 | ||
442 | while (block >= (zone->zone_offset + zone->size)) | 441 | while (sector >= zone->zone_start + zone->sectors) |
443 | zone++; | 442 | zone++; |
444 | 443 | ||
445 | sect_in_chunk = bio->bi_sector & ((chunk_size<<1) -1); | 444 | sect_in_chunk = bio->bi_sector & (chunk_sects - 1); |
446 | 445 | ||
447 | 446 | ||
448 | { | 447 | { |
449 | sector_t x = (block - zone->zone_offset) >> chunksize_bits; | 448 | sector_t x = (sector - zone->zone_start) >> chunksect_bits; |
450 | 449 | ||
451 | sector_div(x, zone->nb_dev); | 450 | sector_div(x, zone->nb_dev); |
452 | chunk = x; | 451 | chunk = x; |
453 | 452 | ||
454 | x = block >> chunksize_bits; | 453 | x = sector >> chunksect_bits; |
455 | tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; | 454 | tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; |
456 | } | 455 | } |
457 | rsect = (((chunk << chunksize_bits) + zone->dev_offset)<<1) | 456 | rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; |
458 | + sect_in_chunk; | ||
459 | 457 | ||
460 | bio->bi_bdev = tmp_dev->bdev; | 458 | bio->bi_bdev = tmp_dev->bdev; |
461 | bio->bi_sector = rsect + tmp_dev->data_offset; | 459 | bio->bi_sector = rsect + tmp_dev->data_offset; |
@@ -467,7 +465,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
467 | 465 | ||
468 | bad_map: | 466 | bad_map: |
469 | printk("raid0_make_request bug: can't convert block across chunks" | 467 | printk("raid0_make_request bug: can't convert block across chunks" |
470 | " or bigger than %dk %llu %d\n", chunk_size, | 468 | " or bigger than %dk %llu %d\n", chunk_sects / 2, |
471 | (unsigned long long)bio->bi_sector, bio->bi_size >> 10); | 469 | (unsigned long long)bio->bi_sector, bio->bi_size >> 10); |
472 | 470 | ||
473 | bio_io_error(bio); | 471 | bio_io_error(bio); |
@@ -492,10 +490,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) | |||
492 | seq_printf(seq, "%s/", bdevname( | 490 | seq_printf(seq, "%s/", bdevname( |
493 | conf->strip_zone[j].dev[k]->bdev,b)); | 491 | conf->strip_zone[j].dev[k]->bdev,b)); |
494 | 492 | ||
495 | seq_printf(seq, "] zo=%d do=%d s=%d\n", | 493 | seq_printf(seq, "] zs=%d ds=%d s=%d\n", |
496 | conf->strip_zone[j].zone_offset, | 494 | conf->strip_zone[j].zone_start, |
497 | conf->strip_zone[j].dev_offset, | 495 | conf->strip_zone[j].dev_start, |
498 | conf->strip_zone[j].size); | 496 | conf->strip_zone[j].sectors); |
499 | } | 497 | } |
500 | #endif | 498 | #endif |
501 | seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); | 499 | seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9c788e2489b1..7b4f5f7155d8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1016,12 +1016,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1016 | * else mark the drive as failed | 1016 | * else mark the drive as failed |
1017 | */ | 1017 | */ |
1018 | if (test_bit(In_sync, &rdev->flags) | 1018 | if (test_bit(In_sync, &rdev->flags) |
1019 | && (conf->raid_disks - mddev->degraded) == 1) | 1019 | && (conf->raid_disks - mddev->degraded) == 1) { |
1020 | /* | 1020 | /* |
1021 | * Don't fail the drive, act as though we were just a | 1021 | * Don't fail the drive, act as though we were just a |
1022 | * normal single drive | 1022 | * normal single drive. |
1023 | * However don't try a recovery from this drive as | ||
1024 | * it is very likely to fail. | ||
1023 | */ | 1025 | */ |
1026 | mddev->recovery_disabled = 1; | ||
1024 | return; | 1027 | return; |
1028 | } | ||
1025 | if (test_and_clear_bit(In_sync, &rdev->flags)) { | 1029 | if (test_and_clear_bit(In_sync, &rdev->flags)) { |
1026 | unsigned long flags; | 1030 | unsigned long flags; |
1027 | spin_lock_irqsave(&conf->device_lock, flags); | 1031 | spin_lock_irqsave(&conf->device_lock, flags); |
@@ -1919,7 +1923,6 @@ static int run(mddev_t *mddev) | |||
1919 | int i, j, disk_idx; | 1923 | int i, j, disk_idx; |
1920 | mirror_info_t *disk; | 1924 | mirror_info_t *disk; |
1921 | mdk_rdev_t *rdev; | 1925 | mdk_rdev_t *rdev; |
1922 | struct list_head *tmp; | ||
1923 | 1926 | ||
1924 | if (mddev->level != 1) { | 1927 | if (mddev->level != 1) { |
1925 | printk("raid1: %s: raid level not set to mirroring (%d)\n", | 1928 | printk("raid1: %s: raid level not set to mirroring (%d)\n", |
@@ -1964,7 +1967,7 @@ static int run(mddev_t *mddev) | |||
1964 | spin_lock_init(&conf->device_lock); | 1967 | spin_lock_init(&conf->device_lock); |
1965 | mddev->queue->queue_lock = &conf->device_lock; | 1968 | mddev->queue->queue_lock = &conf->device_lock; |
1966 | 1969 | ||
1967 | rdev_for_each(rdev, tmp, mddev) { | 1970 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
1968 | disk_idx = rdev->raid_disk; | 1971 | disk_idx = rdev->raid_disk; |
1969 | if (disk_idx >= mddev->raid_disks | 1972 | if (disk_idx >= mddev->raid_disks |
1970 | || disk_idx < 0) | 1973 | || disk_idx < 0) |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 970a96ef9b18..6736d6dff981 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -2025,7 +2025,6 @@ static int run(mddev_t *mddev) | |||
2025 | int i, disk_idx; | 2025 | int i, disk_idx; |
2026 | mirror_info_t *disk; | 2026 | mirror_info_t *disk; |
2027 | mdk_rdev_t *rdev; | 2027 | mdk_rdev_t *rdev; |
2028 | struct list_head *tmp; | ||
2029 | int nc, fc, fo; | 2028 | int nc, fc, fo; |
2030 | sector_t stride, size; | 2029 | sector_t stride, size; |
2031 | 2030 | ||
@@ -2108,7 +2107,7 @@ static int run(mddev_t *mddev) | |||
2108 | spin_lock_init(&conf->device_lock); | 2107 | spin_lock_init(&conf->device_lock); |
2109 | mddev->queue->queue_lock = &conf->device_lock; | 2108 | mddev->queue->queue_lock = &conf->device_lock; |
2110 | 2109 | ||
2111 | rdev_for_each(rdev, tmp, mddev) { | 2110 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
2112 | disk_idx = rdev->raid_disk; | 2111 | disk_idx = rdev->raid_disk; |
2113 | if (disk_idx >= mddev->raid_disks | 2112 | if (disk_idx >= mddev->raid_disks |
2114 | || disk_idx < 0) | 2113 | || disk_idx < 0) |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a36a7435edf5..a5ba080d303b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3998,7 +3998,6 @@ static int run(mddev_t *mddev) | |||
3998 | int raid_disk, memory; | 3998 | int raid_disk, memory; |
3999 | mdk_rdev_t *rdev; | 3999 | mdk_rdev_t *rdev; |
4000 | struct disk_info *disk; | 4000 | struct disk_info *disk; |
4001 | struct list_head *tmp; | ||
4002 | int working_disks = 0; | 4001 | int working_disks = 0; |
4003 | 4002 | ||
4004 | if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { | 4003 | if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { |
@@ -4108,7 +4107,7 @@ static int run(mddev_t *mddev) | |||
4108 | 4107 | ||
4109 | pr_debug("raid5: run(%s) called.\n", mdname(mddev)); | 4108 | pr_debug("raid5: run(%s) called.\n", mdname(mddev)); |
4110 | 4109 | ||
4111 | rdev_for_each(rdev, tmp, mddev) { | 4110 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
4112 | raid_disk = rdev->raid_disk; | 4111 | raid_disk = rdev->raid_disk; |
4113 | if (raid_disk >= conf->raid_disks | 4112 | if (raid_disk >= conf->raid_disks |
4114 | || raid_disk < 0) | 4113 | || raid_disk < 0) |
@@ -4533,7 +4532,6 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4533 | { | 4532 | { |
4534 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4533 | raid5_conf_t *conf = mddev_to_conf(mddev); |
4535 | mdk_rdev_t *rdev; | 4534 | mdk_rdev_t *rdev; |
4536 | struct list_head *rtmp; | ||
4537 | int spares = 0; | 4535 | int spares = 0; |
4538 | int added_devices = 0; | 4536 | int added_devices = 0; |
4539 | unsigned long flags; | 4537 | unsigned long flags; |
@@ -4541,7 +4539,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4541 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 4539 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
4542 | return -EBUSY; | 4540 | return -EBUSY; |
4543 | 4541 | ||
4544 | rdev_for_each(rdev, rtmp, mddev) | 4542 | list_for_each_entry(rdev, &mddev->disks, same_set) |
4545 | if (rdev->raid_disk < 0 && | 4543 | if (rdev->raid_disk < 0 && |
4546 | !test_bit(Faulty, &rdev->flags)) | 4544 | !test_bit(Faulty, &rdev->flags)) |
4547 | spares++; | 4545 | spares++; |
@@ -4563,7 +4561,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4563 | /* Add some new drives, as many as will fit. | 4561 | /* Add some new drives, as many as will fit. |
4564 | * We know there are enough to make the newly sized array work. | 4562 | * We know there are enough to make the newly sized array work. |
4565 | */ | 4563 | */ |
4566 | rdev_for_each(rdev, rtmp, mddev) | 4564 | list_for_each_entry(rdev, &mddev->disks, same_set) |
4567 | if (rdev->raid_disk < 0 && | 4565 | if (rdev->raid_disk < 0 && |
4568 | !test_bit(Faulty, &rdev->flags)) { | 4566 | !test_bit(Faulty, &rdev->flags)) { |
4569 | if (raid5_add_disk(mddev, rdev) == 0) { | 4567 | if (raid5_add_disk(mddev, rdev) == 0) { |