aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-exception-store.c13
-rw-r--r--drivers/md/dm-exception-store.h4
-rw-r--r--drivers/md/dm-log-userspace-base.c39
-rw-r--r--drivers/md/dm-log-userspace-transfer.c8
-rw-r--r--drivers/md/dm-log-userspace-transfer.h2
-rw-r--r--drivers/md/dm-raid1.c8
-rw-r--r--drivers/md/dm-snap-persistent.c88
-rw-r--r--drivers/md/dm-snap.c23
-rw-r--r--drivers/md/dm-stripe.c13
-rw-r--r--drivers/md/dm-table.c51
-rw-r--r--drivers/md/dm.c15
-rw-r--r--drivers/md/md.c33
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/md/raid5.c34
14 files changed, 240 insertions, 101 deletions
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 3710ff88fc10..556acff3952f 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -171,6 +171,14 @@ static int set_chunk_size(struct dm_exception_store *store,
171 */ 171 */
172 chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9); 172 chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
173 173
174 return dm_exception_store_set_chunk_size(store, chunk_size_ulong,
175 error);
176}
177
178int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
179 unsigned long chunk_size_ulong,
180 char **error)
181{
174 /* Check chunk_size is a power of 2 */ 182 /* Check chunk_size is a power of 2 */
175 if (!is_power_of_2(chunk_size_ulong)) { 183 if (!is_power_of_2(chunk_size_ulong)) {
176 *error = "Chunk size is not a power of 2"; 184 *error = "Chunk size is not a power of 2";
@@ -183,6 +191,11 @@ static int set_chunk_size(struct dm_exception_store *store,
183 return -EINVAL; 191 return -EINVAL;
184 } 192 }
185 193
194 if (chunk_size_ulong > INT_MAX >> SECTOR_SHIFT) {
195 *error = "Chunk size is too high";
196 return -EINVAL;
197 }
198
186 store->chunk_size = chunk_size_ulong; 199 store->chunk_size = chunk_size_ulong;
187 store->chunk_mask = chunk_size_ulong - 1; 200 store->chunk_mask = chunk_size_ulong - 1;
188 store->chunk_shift = ffs(chunk_size_ulong) - 1; 201 store->chunk_shift = ffs(chunk_size_ulong) - 1;
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 2442c8c07898..812c71872ba0 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -168,6 +168,10 @@ static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
168int dm_exception_store_type_register(struct dm_exception_store_type *type); 168int dm_exception_store_type_register(struct dm_exception_store_type *type);
169int dm_exception_store_type_unregister(struct dm_exception_store_type *type); 169int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
170 170
171int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
172 unsigned long chunk_size_ulong,
173 char **error);
174
171int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, 175int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
172 unsigned *args_used, 176 unsigned *args_used,
173 struct dm_exception_store **store); 177 struct dm_exception_store **store);
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index e69b96560997..6e186b1a062d 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -21,6 +21,7 @@ struct log_c {
21 struct dm_target *ti; 21 struct dm_target *ti;
22 uint32_t region_size; 22 uint32_t region_size;
23 region_t region_count; 23 region_t region_count;
24 uint64_t luid;
24 char uuid[DM_UUID_LEN]; 25 char uuid[DM_UUID_LEN];
25 26
26 char *usr_argv_str; 27 char *usr_argv_str;
@@ -63,7 +64,7 @@ static int userspace_do_request(struct log_c *lc, const char *uuid,
63 * restored. 64 * restored.
64 */ 65 */
65retry: 66retry:
66 r = dm_consult_userspace(uuid, request_type, data, 67 r = dm_consult_userspace(uuid, lc->luid, request_type, data,
67 data_size, rdata, rdata_size); 68 data_size, rdata, rdata_size);
68 69
69 if (r != -ESRCH) 70 if (r != -ESRCH)
@@ -74,14 +75,15 @@ retry:
74 set_current_state(TASK_INTERRUPTIBLE); 75 set_current_state(TASK_INTERRUPTIBLE);
75 schedule_timeout(2*HZ); 76 schedule_timeout(2*HZ);
76 DMWARN("Attempting to contact userspace log server..."); 77 DMWARN("Attempting to contact userspace log server...");
77 r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str, 78 r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
79 lc->usr_argv_str,
78 strlen(lc->usr_argv_str) + 1, 80 strlen(lc->usr_argv_str) + 1,
79 NULL, NULL); 81 NULL, NULL);
80 if (!r) 82 if (!r)
81 break; 83 break;
82 } 84 }
83 DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); 85 DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
84 r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL, 86 r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
85 0, NULL, NULL); 87 0, NULL, NULL);
86 if (!r) 88 if (!r)
87 goto retry; 89 goto retry;
@@ -111,10 +113,9 @@ static int build_constructor_string(struct dm_target *ti,
111 return -ENOMEM; 113 return -ENOMEM;
112 } 114 }
113 115
114 for (i = 0, str_size = 0; i < argc; i++) 116 str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
115 str_size += sprintf(str + str_size, "%s ", argv[i]); 117 for (i = 0; i < argc; i++)
116 str_size += sprintf(str + str_size, "%llu", 118 str_size += sprintf(str + str_size, " %s", argv[i]);
117 (unsigned long long)ti->len);
118 119
119 *ctr_str = str; 120 *ctr_str = str;
120 return str_size; 121 return str_size;
@@ -154,6 +155,9 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
154 return -ENOMEM; 155 return -ENOMEM;
155 } 156 }
156 157
158 /* The ptr value is sufficient for local unique id */
159 lc->luid = (uint64_t)lc;
160
157 lc->ti = ti; 161 lc->ti = ti;
158 162
159 if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { 163 if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
@@ -173,7 +177,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
173 } 177 }
174 178
175 /* Send table string */ 179 /* Send table string */
176 r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR, 180 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
177 ctr_str, str_size, NULL, NULL); 181 ctr_str, str_size, NULL, NULL);
178 182
179 if (r == -ESRCH) { 183 if (r == -ESRCH) {
@@ -183,7 +187,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
183 187
184 /* Since the region size does not change, get it now */ 188 /* Since the region size does not change, get it now */
185 rdata_size = sizeof(rdata); 189 rdata_size = sizeof(rdata);
186 r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE, 190 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
187 NULL, 0, (char *)&rdata, &rdata_size); 191 NULL, 0, (char *)&rdata, &rdata_size);
188 192
189 if (r) { 193 if (r) {
@@ -212,7 +216,7 @@ static void userspace_dtr(struct dm_dirty_log *log)
212 int r; 216 int r;
213 struct log_c *lc = log->context; 217 struct log_c *lc = log->context;
214 218
215 r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR, 219 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
216 NULL, 0, 220 NULL, 0,
217 NULL, NULL); 221 NULL, NULL);
218 222
@@ -227,7 +231,7 @@ static int userspace_presuspend(struct dm_dirty_log *log)
227 int r; 231 int r;
228 struct log_c *lc = log->context; 232 struct log_c *lc = log->context;
229 233
230 r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND, 234 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
231 NULL, 0, 235 NULL, 0,
232 NULL, NULL); 236 NULL, NULL);
233 237
@@ -239,7 +243,7 @@ static int userspace_postsuspend(struct dm_dirty_log *log)
239 int r; 243 int r;
240 struct log_c *lc = log->context; 244 struct log_c *lc = log->context;
241 245
242 r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND, 246 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
243 NULL, 0, 247 NULL, 0,
244 NULL, NULL); 248 NULL, NULL);
245 249
@@ -252,7 +256,7 @@ static int userspace_resume(struct dm_dirty_log *log)
252 struct log_c *lc = log->context; 256 struct log_c *lc = log->context;
253 257
254 lc->in_sync_hint = 0; 258 lc->in_sync_hint = 0;
255 r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME, 259 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
256 NULL, 0, 260 NULL, 0,
257 NULL, NULL); 261 NULL, NULL);
258 262
@@ -561,6 +565,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
561 char *result, unsigned maxlen) 565 char *result, unsigned maxlen)
562{ 566{
563 int r = 0; 567 int r = 0;
568 char *table_args;
564 size_t sz = (size_t)maxlen; 569 size_t sz = (size_t)maxlen;
565 struct log_c *lc = log->context; 570 struct log_c *lc = log->context;
566 571
@@ -577,8 +582,12 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
577 break; 582 break;
578 case STATUSTYPE_TABLE: 583 case STATUSTYPE_TABLE:
579 sz = 0; 584 sz = 0;
580 DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1, 585 table_args = strstr(lc->usr_argv_str, " ");
581 lc->uuid, lc->usr_argv_str); 586 BUG_ON(!table_args); /* There will always be a ' ' */
587 table_args++;
588
589 DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
590 lc->uuid, table_args);
582 break; 591 break;
583 } 592 }
584 return (r) ? 0 : (int)sz; 593 return (r) ? 0 : (int)sz;
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 0ca1ee768a1f..ba0edad2d048 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -108,7 +108,7 @@ static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
108 *(pkg->data_size) = 0; 108 *(pkg->data_size) = 0;
109 } else if (tfr->data_size > *(pkg->data_size)) { 109 } else if (tfr->data_size > *(pkg->data_size)) {
110 DMERR("Insufficient space to receive package [%u] " 110 DMERR("Insufficient space to receive package [%u] "
111 "(%u vs %lu)", tfr->request_type, 111 "(%u vs %zu)", tfr->request_type,
112 tfr->data_size, *(pkg->data_size)); 112 tfr->data_size, *(pkg->data_size));
113 113
114 *(pkg->data_size) = 0; 114 *(pkg->data_size) = 0;
@@ -147,7 +147,8 @@ static void cn_ulog_callback(void *data)
147 147
148/** 148/**
149 * dm_consult_userspace 149 * dm_consult_userspace
150 * @uuid: log's uuid (must be DM_UUID_LEN in size) 150 * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)
151 * @luid: log's local unique identifier
151 * @request_type: found in include/linux/dm-log-userspace.h 152 * @request_type: found in include/linux/dm-log-userspace.h
152 * @data: data to tx to the server 153 * @data: data to tx to the server
153 * @data_size: size of data in bytes 154 * @data_size: size of data in bytes
@@ -163,7 +164,7 @@ static void cn_ulog_callback(void *data)
163 * 164 *
164 * Returns: 0 on success, -EXXX on failure 165 * Returns: 0 on success, -EXXX on failure
165 **/ 166 **/
166int dm_consult_userspace(const char *uuid, int request_type, 167int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
167 char *data, size_t data_size, 168 char *data, size_t data_size,
168 char *rdata, size_t *rdata_size) 169 char *rdata, size_t *rdata_size)
169{ 170{
@@ -190,6 +191,7 @@ resend:
190 191
191 memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size); 192 memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
192 memcpy(tfr->uuid, uuid, DM_UUID_LEN); 193 memcpy(tfr->uuid, uuid, DM_UUID_LEN);
194 tfr->luid = luid;
193 tfr->seq = dm_ulog_seq++; 195 tfr->seq = dm_ulog_seq++;
194 196
195 /* 197 /*
diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h
index c26d8e4e2710..04ee874f9153 100644
--- a/drivers/md/dm-log-userspace-transfer.h
+++ b/drivers/md/dm-log-userspace-transfer.h
@@ -11,7 +11,7 @@
11 11
12int dm_ulog_tfr_init(void); 12int dm_ulog_tfr_init(void);
13void dm_ulog_tfr_exit(void); 13void dm_ulog_tfr_exit(void);
14int dm_consult_userspace(const char *uuid, int request_type, 14int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
15 char *data, size_t data_size, 15 char *data, size_t data_size,
16 char *rdata, size_t *rdata_size); 16 char *rdata, size_t *rdata_size);
17 17
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9726577cde49..33f179e66bf5 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -648,7 +648,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
648 */ 648 */
649 dm_rh_inc_pending(ms->rh, &sync); 649 dm_rh_inc_pending(ms->rh, &sync);
650 dm_rh_inc_pending(ms->rh, &nosync); 650 dm_rh_inc_pending(ms->rh, &nosync);
651 ms->log_failure = dm_rh_flush(ms->rh) ? 1 : 0; 651
652 /*
653 * If the flush fails on a previous call and succeeds here,
654 * we must not reset the log_failure variable. We need
655 * userspace interaction to do that.
656 */
657 ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure;
652 658
653 /* 659 /*
654 * Dispatch io. 660 * Dispatch io.
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 6e3fe4f14934..d5b2e08750d5 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -106,6 +106,13 @@ struct pstore {
106 void *zero_area; 106 void *zero_area;
107 107
108 /* 108 /*
109 * An area used for header. The header can be written
110 * concurrently with metadata (when invalidating the snapshot),
111 * so it needs a separate buffer.
112 */
113 void *header_area;
114
115 /*
109 * Used to keep track of which metadata area the data in 116 * Used to keep track of which metadata area the data in
110 * 'chunk' refers to. 117 * 'chunk' refers to.
111 */ 118 */
@@ -148,16 +155,27 @@ static int alloc_area(struct pstore *ps)
148 */ 155 */
149 ps->area = vmalloc(len); 156 ps->area = vmalloc(len);
150 if (!ps->area) 157 if (!ps->area)
151 return r; 158 goto err_area;
152 159
153 ps->zero_area = vmalloc(len); 160 ps->zero_area = vmalloc(len);
154 if (!ps->zero_area) { 161 if (!ps->zero_area)
155 vfree(ps->area); 162 goto err_zero_area;
156 return r;
157 }
158 memset(ps->zero_area, 0, len); 163 memset(ps->zero_area, 0, len);
159 164
165 ps->header_area = vmalloc(len);
166 if (!ps->header_area)
167 goto err_header_area;
168
160 return 0; 169 return 0;
170
171err_header_area:
172 vfree(ps->zero_area);
173
174err_zero_area:
175 vfree(ps->area);
176
177err_area:
178 return r;
161} 179}
162 180
163static void free_area(struct pstore *ps) 181static void free_area(struct pstore *ps)
@@ -169,6 +187,10 @@ static void free_area(struct pstore *ps)
169 if (ps->zero_area) 187 if (ps->zero_area)
170 vfree(ps->zero_area); 188 vfree(ps->zero_area);
171 ps->zero_area = NULL; 189 ps->zero_area = NULL;
190
191 if (ps->header_area)
192 vfree(ps->header_area);
193 ps->header_area = NULL;
172} 194}
173 195
174struct mdata_req { 196struct mdata_req {
@@ -188,7 +210,8 @@ static void do_metadata(struct work_struct *work)
188/* 210/*
189 * Read or write a chunk aligned and sized block of data from a device. 211 * Read or write a chunk aligned and sized block of data from a device.
190 */ 212 */
191static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) 213static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
214 int metadata)
192{ 215{
193 struct dm_io_region where = { 216 struct dm_io_region where = {
194 .bdev = ps->store->cow->bdev, 217 .bdev = ps->store->cow->bdev,
@@ -198,7 +221,7 @@ static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
198 struct dm_io_request io_req = { 221 struct dm_io_request io_req = {
199 .bi_rw = rw, 222 .bi_rw = rw,
200 .mem.type = DM_IO_VMA, 223 .mem.type = DM_IO_VMA,
201 .mem.ptr.vma = ps->area, 224 .mem.ptr.vma = area,
202 .client = ps->io_client, 225 .client = ps->io_client,
203 .notify.fn = NULL, 226 .notify.fn = NULL,
204 }; 227 };
@@ -240,7 +263,7 @@ static int area_io(struct pstore *ps, int rw)
240 263
241 chunk = area_location(ps, ps->current_area); 264 chunk = area_location(ps, ps->current_area);
242 265
243 r = chunk_io(ps, chunk, rw, 0); 266 r = chunk_io(ps, ps->area, chunk, rw, 0);
244 if (r) 267 if (r)
245 return r; 268 return r;
246 269
@@ -254,20 +277,7 @@ static void zero_memory_area(struct pstore *ps)
254 277
255static int zero_disk_area(struct pstore *ps, chunk_t area) 278static int zero_disk_area(struct pstore *ps, chunk_t area)
256{ 279{
257 struct dm_io_region where = { 280 return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
258 .bdev = ps->store->cow->bdev,
259 .sector = ps->store->chunk_size * area_location(ps, area),
260 .count = ps->store->chunk_size,
261 };
262 struct dm_io_request io_req = {
263 .bi_rw = WRITE,
264 .mem.type = DM_IO_VMA,
265 .mem.ptr.vma = ps->zero_area,
266 .client = ps->io_client,
267 .notify.fn = NULL,
268 };
269
270 return dm_io(&io_req, 1, &where, NULL);
271} 281}
272 282
273static int read_header(struct pstore *ps, int *new_snapshot) 283static int read_header(struct pstore *ps, int *new_snapshot)
@@ -276,6 +286,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
276 struct disk_header *dh; 286 struct disk_header *dh;
277 chunk_t chunk_size; 287 chunk_t chunk_size;
278 int chunk_size_supplied = 1; 288 int chunk_size_supplied = 1;
289 char *chunk_err;
279 290
280 /* 291 /*
281 * Use default chunk size (or hardsect_size, if larger) if none supplied 292 * Use default chunk size (or hardsect_size, if larger) if none supplied
@@ -297,11 +308,11 @@ static int read_header(struct pstore *ps, int *new_snapshot)
297 if (r) 308 if (r)
298 return r; 309 return r;
299 310
300 r = chunk_io(ps, 0, READ, 1); 311 r = chunk_io(ps, ps->header_area, 0, READ, 1);
301 if (r) 312 if (r)
302 goto bad; 313 goto bad;
303 314
304 dh = (struct disk_header *) ps->area; 315 dh = ps->header_area;
305 316
306 if (le32_to_cpu(dh->magic) == 0) { 317 if (le32_to_cpu(dh->magic) == 0) {
307 *new_snapshot = 1; 318 *new_snapshot = 1;
@@ -319,20 +330,25 @@ static int read_header(struct pstore *ps, int *new_snapshot)
319 ps->version = le32_to_cpu(dh->version); 330 ps->version = le32_to_cpu(dh->version);
320 chunk_size = le32_to_cpu(dh->chunk_size); 331 chunk_size = le32_to_cpu(dh->chunk_size);
321 332
322 if (!chunk_size_supplied || ps->store->chunk_size == chunk_size) 333 if (ps->store->chunk_size == chunk_size)
323 return 0; 334 return 0;
324 335
325 DMWARN("chunk size %llu in device metadata overrides " 336 if (chunk_size_supplied)
326 "table chunk size of %llu.", 337 DMWARN("chunk size %llu in device metadata overrides "
327 (unsigned long long)chunk_size, 338 "table chunk size of %llu.",
328 (unsigned long long)ps->store->chunk_size); 339 (unsigned long long)chunk_size,
340 (unsigned long long)ps->store->chunk_size);
329 341
330 /* We had a bogus chunk_size. Fix stuff up. */ 342 /* We had a bogus chunk_size. Fix stuff up. */
331 free_area(ps); 343 free_area(ps);
332 344
333 ps->store->chunk_size = chunk_size; 345 r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
334 ps->store->chunk_mask = chunk_size - 1; 346 &chunk_err);
335 ps->store->chunk_shift = ffs(chunk_size) - 1; 347 if (r) {
348 DMERR("invalid on-disk chunk size %llu: %s.",
349 (unsigned long long)chunk_size, chunk_err);
350 return r;
351 }
336 352
337 r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size), 353 r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
338 ps->io_client); 354 ps->io_client);
@@ -351,15 +367,15 @@ static int write_header(struct pstore *ps)
351{ 367{
352 struct disk_header *dh; 368 struct disk_header *dh;
353 369
354 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT); 370 memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
355 371
356 dh = (struct disk_header *) ps->area; 372 dh = ps->header_area;
357 dh->magic = cpu_to_le32(SNAP_MAGIC); 373 dh->magic = cpu_to_le32(SNAP_MAGIC);
358 dh->valid = cpu_to_le32(ps->valid); 374 dh->valid = cpu_to_le32(ps->valid);
359 dh->version = cpu_to_le32(ps->version); 375 dh->version = cpu_to_le32(ps->version);
360 dh->chunk_size = cpu_to_le32(ps->store->chunk_size); 376 dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
361 377
362 return chunk_io(ps, 0, WRITE, 1); 378 return chunk_io(ps, ps->header_area, 0, WRITE, 1);
363} 379}
364 380
365/* 381/*
@@ -679,6 +695,8 @@ static int persistent_ctr(struct dm_exception_store *store,
679 ps->valid = 1; 695 ps->valid = 1;
680 ps->version = SNAPSHOT_DISK_VERSION; 696 ps->version = SNAPSHOT_DISK_VERSION;
681 ps->area = NULL; 697 ps->area = NULL;
698 ps->zero_area = NULL;
699 ps->header_area = NULL;
682 ps->next_free = 2; /* skipping the header and first area */ 700 ps->next_free = 2; /* skipping the header and first area */
683 ps->current_committed = 0; 701 ps->current_committed = 0;
684 702
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index d573165cd2b7..57f1bf7f3b7a 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1176,6 +1176,15 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
1176 return 0; 1176 return 0;
1177} 1177}
1178 1178
1179static int snapshot_iterate_devices(struct dm_target *ti,
1180 iterate_devices_callout_fn fn, void *data)
1181{
1182 struct dm_snapshot *snap = ti->private;
1183
1184 return fn(ti, snap->origin, 0, ti->len, data);
1185}
1186
1187
1179/*----------------------------------------------------------------- 1188/*-----------------------------------------------------------------
1180 * Origin methods 1189 * Origin methods
1181 *---------------------------------------------------------------*/ 1190 *---------------------------------------------------------------*/
@@ -1410,20 +1419,29 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result,
1410 return 0; 1419 return 0;
1411} 1420}
1412 1421
1422static int origin_iterate_devices(struct dm_target *ti,
1423 iterate_devices_callout_fn fn, void *data)
1424{
1425 struct dm_dev *dev = ti->private;
1426
1427 return fn(ti, dev, 0, ti->len, data);
1428}
1429
1413static struct target_type origin_target = { 1430static struct target_type origin_target = {
1414 .name = "snapshot-origin", 1431 .name = "snapshot-origin",
1415 .version = {1, 6, 0}, 1432 .version = {1, 7, 0},
1416 .module = THIS_MODULE, 1433 .module = THIS_MODULE,
1417 .ctr = origin_ctr, 1434 .ctr = origin_ctr,
1418 .dtr = origin_dtr, 1435 .dtr = origin_dtr,
1419 .map = origin_map, 1436 .map = origin_map,
1420 .resume = origin_resume, 1437 .resume = origin_resume,
1421 .status = origin_status, 1438 .status = origin_status,
1439 .iterate_devices = origin_iterate_devices,
1422}; 1440};
1423 1441
1424static struct target_type snapshot_target = { 1442static struct target_type snapshot_target = {
1425 .name = "snapshot", 1443 .name = "snapshot",
1426 .version = {1, 6, 0}, 1444 .version = {1, 7, 0},
1427 .module = THIS_MODULE, 1445 .module = THIS_MODULE,
1428 .ctr = snapshot_ctr, 1446 .ctr = snapshot_ctr,
1429 .dtr = snapshot_dtr, 1447 .dtr = snapshot_dtr,
@@ -1431,6 +1449,7 @@ static struct target_type snapshot_target = {
1431 .end_io = snapshot_end_io, 1449 .end_io = snapshot_end_io,
1432 .resume = snapshot_resume, 1450 .resume = snapshot_resume,
1433 .status = snapshot_status, 1451 .status = snapshot_status,
1452 .iterate_devices = snapshot_iterate_devices,
1434}; 1453};
1435 1454
1436static int __init dm_snapshot_init(void) 1455static int __init dm_snapshot_init(void)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4e0e5937e42a..3e563d251733 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -329,9 +329,19 @@ static int stripe_iterate_devices(struct dm_target *ti,
329 return ret; 329 return ret;
330} 330}
331 331
332static void stripe_io_hints(struct dm_target *ti,
333 struct queue_limits *limits)
334{
335 struct stripe_c *sc = ti->private;
336 unsigned chunk_size = (sc->chunk_mask + 1) << 9;
337
338 blk_limits_io_min(limits, chunk_size);
339 limits->io_opt = chunk_size * sc->stripes;
340}
341
332static struct target_type stripe_target = { 342static struct target_type stripe_target = {
333 .name = "striped", 343 .name = "striped",
334 .version = {1, 2, 0}, 344 .version = {1, 3, 0},
335 .module = THIS_MODULE, 345 .module = THIS_MODULE,
336 .ctr = stripe_ctr, 346 .ctr = stripe_ctr,
337 .dtr = stripe_dtr, 347 .dtr = stripe_dtr,
@@ -339,6 +349,7 @@ static struct target_type stripe_target = {
339 .end_io = stripe_end_io, 349 .end_io = stripe_end_io,
340 .status = stripe_status, 350 .status = stripe_status,
341 .iterate_devices = stripe_iterate_devices, 351 .iterate_devices = stripe_iterate_devices,
352 .io_hints = stripe_io_hints,
342}; 353};
343 354
344int __init dm_stripe_init(void) 355int __init dm_stripe_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index d952b3441913..1a6cb3c7822e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -343,10 +343,10 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
343} 343}
344 344
345/* 345/*
346 * If possible, this checks an area of a destination device is valid. 346 * If possible, this checks an area of a destination device is invalid.
347 */ 347 */
348static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, 348static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
349 sector_t start, sector_t len, void *data) 349 sector_t start, sector_t len, void *data)
350{ 350{
351 struct queue_limits *limits = data; 351 struct queue_limits *limits = data;
352 struct block_device *bdev = dev->bdev; 352 struct block_device *bdev = dev->bdev;
@@ -357,36 +357,40 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
357 char b[BDEVNAME_SIZE]; 357 char b[BDEVNAME_SIZE];
358 358
359 if (!dev_size) 359 if (!dev_size)
360 return 1; 360 return 0;
361 361
362 if ((start >= dev_size) || (start + len > dev_size)) { 362 if ((start >= dev_size) || (start + len > dev_size)) {
363 DMWARN("%s: %s too small for target", 363 DMWARN("%s: %s too small for target: "
364 dm_device_name(ti->table->md), bdevname(bdev, b)); 364 "start=%llu, len=%llu, dev_size=%llu",
365 return 0; 365 dm_device_name(ti->table->md), bdevname(bdev, b),
366 (unsigned long long)start,
367 (unsigned long long)len,
368 (unsigned long long)dev_size);
369 return 1;
366 } 370 }
367 371
368 if (logical_block_size_sectors <= 1) 372 if (logical_block_size_sectors <= 1)
369 return 1; 373 return 0;
370 374
371 if (start & (logical_block_size_sectors - 1)) { 375 if (start & (logical_block_size_sectors - 1)) {
372 DMWARN("%s: start=%llu not aligned to h/w " 376 DMWARN("%s: start=%llu not aligned to h/w "
373 "logical block size %hu of %s", 377 "logical block size %u of %s",
374 dm_device_name(ti->table->md), 378 dm_device_name(ti->table->md),
375 (unsigned long long)start, 379 (unsigned long long)start,
376 limits->logical_block_size, bdevname(bdev, b)); 380 limits->logical_block_size, bdevname(bdev, b));
377 return 0; 381 return 1;
378 } 382 }
379 383
380 if (len & (logical_block_size_sectors - 1)) { 384 if (len & (logical_block_size_sectors - 1)) {
381 DMWARN("%s: len=%llu not aligned to h/w " 385 DMWARN("%s: len=%llu not aligned to h/w "
382 "logical block size %hu of %s", 386 "logical block size %u of %s",
383 dm_device_name(ti->table->md), 387 dm_device_name(ti->table->md),
384 (unsigned long long)len, 388 (unsigned long long)len,
385 limits->logical_block_size, bdevname(bdev, b)); 389 limits->logical_block_size, bdevname(bdev, b));
386 return 0; 390 return 1;
387 } 391 }
388 392
389 return 1; 393 return 0;
390} 394}
391 395
392/* 396/*
@@ -496,8 +500,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
496 } 500 }
497 501
498 if (blk_stack_limits(limits, &q->limits, start << 9) < 0) 502 if (blk_stack_limits(limits, &q->limits, start << 9) < 0)
499 DMWARN("%s: target device %s is misaligned", 503 DMWARN("%s: target device %s is misaligned: "
500 dm_device_name(ti->table->md), bdevname(bdev, b)); 504 "physical_block_size=%u, logical_block_size=%u, "
505 "alignment_offset=%u, start=%llu",
506 dm_device_name(ti->table->md), bdevname(bdev, b),
507 q->limits.physical_block_size,
508 q->limits.logical_block_size,
509 q->limits.alignment_offset,
510 (unsigned long long) start << 9);
511
501 512
502 /* 513 /*
503 * Check if merge fn is supported. 514 * Check if merge fn is supported.
@@ -698,7 +709,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
698 709
699 if (remaining) { 710 if (remaining) {
700 DMWARN("%s: table line %u (start sect %llu len %llu) " 711 DMWARN("%s: table line %u (start sect %llu len %llu) "
701 "not aligned to h/w logical block size %hu", 712 "not aligned to h/w logical block size %u",
702 dm_device_name(table->md), i, 713 dm_device_name(table->md), i,
703 (unsigned long long) ti->begin, 714 (unsigned long long) ti->begin,
704 (unsigned long long) ti->len, 715 (unsigned long long) ti->len,
@@ -996,12 +1007,16 @@ int dm_calculate_queue_limits(struct dm_table *table,
996 ti->type->iterate_devices(ti, dm_set_device_limits, 1007 ti->type->iterate_devices(ti, dm_set_device_limits,
997 &ti_limits); 1008 &ti_limits);
998 1009
1010 /* Set I/O hints portion of queue limits */
1011 if (ti->type->io_hints)
1012 ti->type->io_hints(ti, &ti_limits);
1013
999 /* 1014 /*
1000 * Check each device area is consistent with the target's 1015 * Check each device area is consistent with the target's
1001 * overall queue limits. 1016 * overall queue limits.
1002 */ 1017 */
1003 if (!ti->type->iterate_devices(ti, device_area_is_valid, 1018 if (ti->type->iterate_devices(ti, device_area_is_invalid,
1004 &ti_limits)) 1019 &ti_limits))
1005 return -EINVAL; 1020 return -EINVAL;
1006 1021
1007combine_limits: 1022combine_limits:
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8a311ea0d441..b4845b14740d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -738,16 +738,22 @@ static void rq_completed(struct mapped_device *md, int run_queue)
738 dm_put(md); 738 dm_put(md);
739} 739}
740 740
741static void free_rq_clone(struct request *clone)
742{
743 struct dm_rq_target_io *tio = clone->end_io_data;
744
745 blk_rq_unprep_clone(clone);
746 free_rq_tio(tio);
747}
748
741static void dm_unprep_request(struct request *rq) 749static void dm_unprep_request(struct request *rq)
742{ 750{
743 struct request *clone = rq->special; 751 struct request *clone = rq->special;
744 struct dm_rq_target_io *tio = clone->end_io_data;
745 752
746 rq->special = NULL; 753 rq->special = NULL;
747 rq->cmd_flags &= ~REQ_DONTPREP; 754 rq->cmd_flags &= ~REQ_DONTPREP;
748 755
749 blk_rq_unprep_clone(clone); 756 free_rq_clone(clone);
750 free_rq_tio(tio);
751} 757}
752 758
753/* 759/*
@@ -825,8 +831,7 @@ static void dm_end_request(struct request *clone, int error)
825 rq->sense_len = clone->sense_len; 831 rq->sense_len = clone->sense_len;
826 } 832 }
827 833
828 BUG_ON(clone->bio); 834 free_rq_clone(clone);
829 free_rq_tio(tio);
830 835
831 blk_end_request_all(rq, error); 836 blk_end_request_all(rq, error);
832 837
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5b98bea4ff9b..9dd872000cec 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359 else 359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift; 360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361 361
362 mutex_init(&new->open_mutex);
362 mutex_init(&new->reconfig_mutex); 363 mutex_init(&new->reconfig_mutex);
363 INIT_LIST_HEAD(&new->disks); 364 INIT_LIST_HEAD(&new->disks);
364 INIT_LIST_HEAD(&new->all_mddevs); 365 INIT_LIST_HEAD(&new->all_mddevs);
@@ -1974,17 +1975,14 @@ repeat:
1974 /* otherwise we have to go forward and ... */ 1975 /* otherwise we have to go forward and ... */
1975 mddev->events ++; 1976 mddev->events ++;
1976 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ 1977 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1977 /* .. if the array isn't clean, insist on an odd 'events' */ 1978 /* .. if the array isn't clean, an 'even' event must also go
1978 if ((mddev->events&1)==0) { 1979 * to spares. */
1979 mddev->events++; 1980 if ((mddev->events&1)==0)
1980 nospares = 0; 1981 nospares = 0;
1981 }
1982 } else { 1982 } else {
1983 /* otherwise insist on an even 'events' (for clean states) */ 1983 /* otherwise an 'odd' event must go to spares */
1984 if ((mddev->events&1)) { 1984 if ((mddev->events&1))
1985 mddev->events++;
1986 nospares = 0; 1985 nospares = 0;
1987 }
1988 } 1986 }
1989 } 1987 }
1990 1988
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3601 if (max < mddev->resync_min) 3599 if (max < mddev->resync_min)
3602 return -EINVAL; 3600 return -EINVAL;
3603 if (max < mddev->resync_max && 3601 if (max < mddev->resync_max &&
3602 mddev->ro == 0 &&
3604 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3603 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3605 return -EBUSY; 3604 return -EBUSY;
3606 3605
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4304 struct gendisk *disk = mddev->gendisk; 4303 struct gendisk *disk = mddev->gendisk;
4305 mdk_rdev_t *rdev; 4304 mdk_rdev_t *rdev;
4306 4305
4306 mutex_lock(&mddev->open_mutex);
4307 if (atomic_read(&mddev->openers) > is_open) { 4307 if (atomic_read(&mddev->openers) > is_open) {
4308 printk("md: %s still in use.\n",mdname(mddev)); 4308 printk("md: %s still in use.\n",mdname(mddev));
4309 return -EBUSY; 4309 err = -EBUSY;
4310 } 4310 } else if (mddev->pers) {
4311
4312 if (mddev->pers) {
4313 4311
4314 if (mddev->sync_thread) { 4312 if (mddev->sync_thread) {
4315 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4313 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4366,8 +4364,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4366 if (mode == 1) 4364 if (mode == 1)
4367 set_disk_ro(disk, 1); 4365 set_disk_ro(disk, 1);
4368 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4367 err = 0;
4369 } 4368 }
4370 4369out:
4370 mutex_unlock(&mddev->open_mutex);
4371 if (err)
4372 return err;
4371 /* 4373 /*
4372 * Free resources if final stop 4374 * Free resources if final stop
4373 */ 4375 */
@@ -4433,7 +4435,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4433 blk_integrity_unregister(disk); 4435 blk_integrity_unregister(disk);
4434 md_new_event(mddev); 4436 md_new_event(mddev);
4435 sysfs_notify_dirent(mddev->sysfs_state); 4437 sysfs_notify_dirent(mddev->sysfs_state);
4436out:
4437 return err; 4438 return err;
4438} 4439}
4439 4440
@@ -5518,12 +5519,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5518 } 5519 }
5519 BUG_ON(mddev != bdev->bd_disk->private_data); 5520 BUG_ON(mddev != bdev->bd_disk->private_data);
5520 5521
5521 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5522 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5522 goto out; 5523 goto out;
5523 5524
5524 err = 0; 5525 err = 0;
5525 atomic_inc(&mddev->openers); 5526 atomic_inc(&mddev->openers);
5526 mddev_unlock(mddev); 5527 mutex_unlock(&mddev->open_mutex);
5527 5528
5528 check_disk_change(bdev); 5529 check_disk_change(bdev);
5529 out: 5530 out:
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 78f03168baf9..f8fc188bc762 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
223 * so we don't loop trying */ 223 * so we don't loop trying */
224 224
225 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
226 /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
227 * that we are never stopping an array while it is open.
228 * 'reconfig_mutex' protects all other reconfiguration.
229 * These locks are separate due to conflicting interactions
230 * with bdev->bd_mutex.
231 * Lock ordering is:
232 * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
233 * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
234 */
235 struct mutex open_mutex;
226 struct mutex reconfig_mutex; 236 struct mutex reconfig_mutex;
227 atomic_t active; /* general refcount */ 237 atomic_t active; /* general refcount */
228 atomic_t openers; /* number of active opens */ 238 atomic_t openers; /* number of active opens */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2b521ee67dfa..b8a2c5dc67ba 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) { 3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
3786 sector_nr = raid5_size(mddev, 0, 0) 3786 sector_nr = raid5_size(mddev, 0, 0)
3787 - conf->reshape_progress; 3787 - conf->reshape_progress;
3788 } else if (mddev->delta_disks > 0 && 3788 } else if (mddev->delta_disks >= 0 &&
3789 conf->reshape_progress > 0) 3789 conf->reshape_progress > 0)
3790 sector_nr = conf->reshape_progress; 3790 sector_nr = conf->reshape_progress;
3791 sector_div(sector_nr, new_data_disks); 3791 sector_div(sector_nr, new_data_disks);
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
4509 (old_disks-max_degraded)); 4509 (old_disks-max_degraded));
4510 /* here_old is the first stripe that we might need to read 4510 /* here_old is the first stripe that we might need to read
4511 * from */ 4511 * from */
4512 if (here_new >= here_old) { 4512 if (mddev->delta_disks == 0) {
4513 /* We cannot be sure it is safe to start an in-place
4514 * reshape. It is only safe if user-space if monitoring
4515 * and taking constant backups.
4516 * mdadm always starts a situation like this in
4517 * readonly mode so it can take control before
4518 * allowing any writes. So just check for that.
4519 */
4520 if ((here_new * mddev->new_chunk_sectors !=
4521 here_old * mddev->chunk_sectors) ||
4522 mddev->ro == 0) {
4523 printk(KERN_ERR "raid5: in-place reshape must be started"
4524 " in read-only mode - aborting\n");
4525 return -EINVAL;
4526 }
4527 } else if (mddev->delta_disks < 0
4528 ? (here_new * mddev->new_chunk_sectors <=
4529 here_old * mddev->chunk_sectors)
4530 : (here_new * mddev->new_chunk_sectors >=
4531 here_old * mddev->chunk_sectors)) {
4513 /* Reading from the same stripe as writing to - bad */ 4532 /* Reading from the same stripe as writing to - bad */
4514 printk(KERN_ERR "raid5: reshape_position too early for " 4533 printk(KERN_ERR "raid5: reshape_position too early for "
4515 "auto-recovery - aborting.\n"); 4534 "auto-recovery - aborting.\n");
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
5078 mddev->degraded--; 5097 mddev->degraded--;
5079 for (d = conf->raid_disks ; 5098 for (d = conf->raid_disks ;
5080 d < conf->raid_disks - mddev->delta_disks; 5099 d < conf->raid_disks - mddev->delta_disks;
5081 d++) 5100 d++) {
5082 raid5_remove_disk(mddev, d); 5101 mdk_rdev_t *rdev = conf->disks[d].rdev;
5102 if (rdev && raid5_remove_disk(mddev, d) == 0) {
5103 char nm[20];
5104 sprintf(nm, "rd%d", rdev->raid_disk);
5105 sysfs_remove_link(&mddev->kobj, nm);
5106 rdev->raid_disk = -1;
5107 }
5108 }
5083 } 5109 }
5084 mddev->layout = conf->algorithm; 5110 mddev->layout = conf->algorithm;
5085 mddev->chunk_sectors = conf->chunk_sectors; 5111 mddev->chunk_sectors = conf->chunk_sectors;