aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-03-06 14:34:04 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-06 14:34:04 -0500
commit87c7ae06cc50bcbcdcc60d64a959ca0b9b71f892 (patch)
tree14753708a10c8bf65517056cd3fc87a22a03eda9
parentdff6d1c5ef9116a4478908001d72ee67127ecf01 (diff)
parentf070304094edb8d516423e79edd27c97ec2020b0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: dm raid1: fix deadlock when suspending failed device dm: eliminate some holes data structures dm ioctl: introduce flag indicating uevent was generated dm: free dm_io before bio_endio not after dm table: remove unused dm_get_device range parameters dm ioctl: only issue uevent on resume if state changed dm raid1: always return error if all legs fail dm mpath: refactor pg_init dm mpath: wait for pg_init completion when suspending dm mpath: hold io until all pg_inits completed dm mpath: avoid storing private suspended state dm: document when snapshot has finished merging dm table: remove dm_get from dm_table_get_md dm mpath: skip activate_path for failed paths dm mpath: pass struct pgpath to pg init done
-rw-r--r--Documentation/device-mapper/snapshot.txt44
-rw-r--r--drivers/md/dm-crypt.c3
-rw-r--r--drivers/md/dm-delay.c8
-rw-r--r--drivers/md/dm-ioctl.c24
-rw-r--r--drivers/md/dm-linear.c3
-rw-r--r--drivers/md/dm-log.c3
-rw-r--r--drivers/md/dm-mpath.c111
-rw-r--r--drivers/md/dm-raid1.c53
-rw-r--r--drivers/md/dm-snap.c34
-rw-r--r--drivers/md/dm-stripe.c3
-rw-r--r--drivers/md/dm-table.c12
-rw-r--r--drivers/md/dm-uevent.c7
-rw-r--r--drivers/md/dm.c25
-rw-r--r--drivers/md/dm.h4
-rw-r--r--include/linux/device-mapper.h5
-rw-r--r--include/linux/dm-io.h4
-rw-r--r--include/linux/dm-ioctl.h9
17 files changed, 212 insertions, 140 deletions
diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt
index e3a77b215135..0d5bc46dc167 100644
--- a/Documentation/device-mapper/snapshot.txt
+++ b/Documentation/device-mapper/snapshot.txt
@@ -122,3 +122,47 @@ volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
122brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real 122brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
123brw------- 1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow 123brw------- 1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
124brw------- 1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base 124brw------- 1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
125
126
127How to determine when a merging is complete
128===========================================
129The snapshot-merge and snapshot status lines end with:
130 <sectors_allocated>/<total_sectors> <metadata_sectors>
131
132Both <sectors_allocated> and <total_sectors> include both data and metadata.
133During merging, the number of sectors allocated gets smaller and
134smaller. Merging has finished when the number of sectors holding data
135is zero, in other words <sectors_allocated> == <metadata_sectors>.
136
137Here is a practical example (using a hybrid of lvm and dmsetup commands):
138
139# lvs
140 LV VG Attr LSize Origin Snap% Move Log Copy% Convert
141 base volumeGroup owi-a- 4.00g
142 snap volumeGroup swi-a- 1.00g base 18.97
143
144# dmsetup status volumeGroup-snap
1450 8388608 snapshot 397896/2097152 1560
146 ^^^^ metadata sectors
147
148# lvconvert --merge -b volumeGroup/snap
149 Merging of volume snap started.
150
151# lvs volumeGroup/snap
152 LV VG Attr LSize Origin Snap% Move Log Copy% Convert
153 base volumeGroup Owi-a- 4.00g 17.23
154
155# dmsetup status volumeGroup-base
1560 8388608 snapshot-merge 281688/2097152 1104
157
158# dmsetup status volumeGroup-base
1590 8388608 snapshot-merge 180480/2097152 712
160
161# dmsetup status volumeGroup-base
1620 8388608 snapshot-merge 16/2097152 16
163
164Merging has finished.
165
166# lvs
167 LV VG Attr LSize Origin Snap% Move Log Copy% Convert
168 base volumeGroup owi-a- 4.00g
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index a93637223c8d..3bdbb6115702 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1160,8 +1160,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1160 } 1160 }
1161 cc->start = tmpll; 1161 cc->start = tmpll;
1162 1162
1163 if (dm_get_device(ti, argv[3], cc->start, ti->len, 1163 if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) {
1164 dm_table_get_mode(ti->table), &cc->dev)) {
1165 ti->error = "Device lookup failed"; 1164 ti->error = "Device lookup failed";
1166 goto bad_device; 1165 goto bad_device;
1167 } 1166 }
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index ebe7381f47c8..852052880d7a 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -156,8 +156,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
156 goto bad; 156 goto bad;
157 } 157 }
158 158
159 if (dm_get_device(ti, argv[0], dc->start_read, ti->len, 159 if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
160 dm_table_get_mode(ti->table), &dc->dev_read)) { 160 &dc->dev_read)) {
161 ti->error = "Device lookup failed"; 161 ti->error = "Device lookup failed";
162 goto bad; 162 goto bad;
163 } 163 }
@@ -177,8 +177,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
177 goto bad_dev_read; 177 goto bad_dev_read;
178 } 178 }
179 179
180 if (dm_get_device(ti, argv[3], dc->start_write, ti->len, 180 if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table),
181 dm_table_get_mode(ti->table), &dc->dev_write)) { 181 &dc->dev_write)) {
182 ti->error = "Write device lookup failed"; 182 ti->error = "Write device lookup failed";
183 goto bad_dev_read; 183 goto bad_dev_read;
184 } 184 }
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 1d669322b27c..d7500e1c26f2 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -285,7 +285,8 @@ retry:
285 up_write(&_hash_lock); 285 up_write(&_hash_lock);
286} 286}
287 287
288static int dm_hash_rename(uint32_t cookie, const char *old, const char *new) 288static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old,
289 const char *new)
289{ 290{
290 char *new_name, *old_name; 291 char *new_name, *old_name;
291 struct hash_cell *hc; 292 struct hash_cell *hc;
@@ -344,7 +345,8 @@ static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
344 dm_table_put(table); 345 dm_table_put(table);
345 } 346 }
346 347
347 dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie); 348 if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie))
349 *flags |= DM_UEVENT_GENERATED_FLAG;
348 350
349 dm_put(hc->md); 351 dm_put(hc->md);
350 up_write(&_hash_lock); 352 up_write(&_hash_lock);
@@ -736,10 +738,10 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
736 __hash_remove(hc); 738 __hash_remove(hc);
737 up_write(&_hash_lock); 739 up_write(&_hash_lock);
738 740
739 dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr); 741 if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
742 param->flags |= DM_UEVENT_GENERATED_FLAG;
740 743
741 dm_put(md); 744 dm_put(md);
742 param->data_size = 0;
743 return 0; 745 return 0;
744} 746}
745 747
@@ -773,7 +775,9 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
773 return r; 775 return r;
774 776
775 param->data_size = 0; 777 param->data_size = 0;
776 return dm_hash_rename(param->event_nr, param->name, new_name); 778
779 return dm_hash_rename(param->event_nr, &param->flags, param->name,
780 new_name);
777} 781}
778 782
779static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) 783static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
@@ -897,16 +901,17 @@ static int do_resume(struct dm_ioctl *param)
897 set_disk_ro(dm_disk(md), 1); 901 set_disk_ro(dm_disk(md), 1);
898 } 902 }
899 903
900 if (dm_suspended_md(md)) 904 if (dm_suspended_md(md)) {
901 r = dm_resume(md); 905 r = dm_resume(md);
906 if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr))
907 param->flags |= DM_UEVENT_GENERATED_FLAG;
908 }
902 909
903 if (old_map) 910 if (old_map)
904 dm_table_destroy(old_map); 911 dm_table_destroy(old_map);
905 912
906 if (!r) { 913 if (!r)
907 dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr);
908 r = __dev_status(md, param); 914 r = __dev_status(md, param);
909 }
910 915
911 dm_put(md); 916 dm_put(md);
912 return r; 917 return r;
@@ -1476,6 +1481,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
1476{ 1481{
1477 /* Always clear this flag */ 1482 /* Always clear this flag */
1478 param->flags &= ~DM_BUFFER_FULL_FLAG; 1483 param->flags &= ~DM_BUFFER_FULL_FLAG;
1484 param->flags &= ~DM_UEVENT_GENERATED_FLAG;
1479 1485
1480 /* Ignores parameters */ 1486 /* Ignores parameters */
1481 if (cmd == DM_REMOVE_ALL_CMD || 1487 if (cmd == DM_REMOVE_ALL_CMD ||
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 82f7d6e6b1ea..9200dbf2391a 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -47,8 +47,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
47 } 47 }
48 lc->start = tmp; 48 lc->start = tmp;
49 49
50 if (dm_get_device(ti, argv[0], lc->start, ti->len, 50 if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev)) {
51 dm_table_get_mode(ti->table), &lc->dev)) {
52 ti->error = "dm-linear: Device lookup failed"; 51 ti->error = "dm-linear: Device lookup failed";
53 goto bad; 52 goto bad;
54 } 53 }
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 7035582786fb..5a08be0222db 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -543,8 +543,7 @@ static int disk_ctr(struct dm_dirty_log *log, struct dm_target *ti,
543 return -EINVAL; 543 return -EINVAL;
544 } 544 }
545 545
546 r = dm_get_device(ti, argv[0], 0, 0 /* FIXME */, 546 r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &dev);
547 FMODE_READ | FMODE_WRITE, &dev);
548 if (r) 547 if (r)
549 return r; 548 return r;
550 549
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index e81345a1d08f..826bce7343b3 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -69,6 +69,7 @@ struct multipath {
69 struct list_head priority_groups; 69 struct list_head priority_groups;
70 unsigned pg_init_required; /* pg_init needs calling? */ 70 unsigned pg_init_required; /* pg_init needs calling? */
71 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ 71 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */
72 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
72 73
73 unsigned nr_valid_paths; /* Total number of usable paths */ 74 unsigned nr_valid_paths; /* Total number of usable paths */
74 struct pgpath *current_pgpath; 75 struct pgpath *current_pgpath;
@@ -95,8 +96,6 @@ struct multipath {
95 mempool_t *mpio_pool; 96 mempool_t *mpio_pool;
96 97
97 struct mutex work_mutex; 98 struct mutex work_mutex;
98
99 unsigned suspended; /* Don't create new I/O internally when set. */
100}; 99};
101 100
102/* 101/*
@@ -202,6 +201,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
202 m->queue_io = 1; 201 m->queue_io = 1;
203 INIT_WORK(&m->process_queued_ios, process_queued_ios); 202 INIT_WORK(&m->process_queued_ios, process_queued_ios);
204 INIT_WORK(&m->trigger_event, trigger_event); 203 INIT_WORK(&m->trigger_event, trigger_event);
204 init_waitqueue_head(&m->pg_init_wait);
205 mutex_init(&m->work_mutex); 205 mutex_init(&m->work_mutex);
206 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 206 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
207 if (!m->mpio_pool) { 207 if (!m->mpio_pool) {
@@ -235,6 +235,21 @@ static void free_multipath(struct multipath *m)
235 * Path selection 235 * Path selection
236 *-----------------------------------------------*/ 236 *-----------------------------------------------*/
237 237
238static void __pg_init_all_paths(struct multipath *m)
239{
240 struct pgpath *pgpath;
241
242 m->pg_init_count++;
243 m->pg_init_required = 0;
244 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
245 /* Skip failed paths */
246 if (!pgpath->is_active)
247 continue;
248 if (queue_work(kmpath_handlerd, &pgpath->activate_path))
249 m->pg_init_in_progress++;
250 }
251}
252
238static void __switch_pg(struct multipath *m, struct pgpath *pgpath) 253static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
239{ 254{
240 m->current_pg = pgpath->pg; 255 m->current_pg = pgpath->pg;
@@ -439,7 +454,7 @@ static void process_queued_ios(struct work_struct *work)
439{ 454{
440 struct multipath *m = 455 struct multipath *m =
441 container_of(work, struct multipath, process_queued_ios); 456 container_of(work, struct multipath, process_queued_ios);
442 struct pgpath *pgpath = NULL, *tmp; 457 struct pgpath *pgpath = NULL;
443 unsigned must_queue = 1; 458 unsigned must_queue = 1;
444 unsigned long flags; 459 unsigned long flags;
445 460
@@ -457,14 +472,9 @@ static void process_queued_ios(struct work_struct *work)
457 (!pgpath && !m->queue_if_no_path)) 472 (!pgpath && !m->queue_if_no_path))
458 must_queue = 0; 473 must_queue = 0;
459 474
460 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) { 475 if (m->pg_init_required && !m->pg_init_in_progress && pgpath)
461 m->pg_init_count++; 476 __pg_init_all_paths(m);
462 m->pg_init_required = 0; 477
463 list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
464 if (queue_work(kmpath_handlerd, &tmp->activate_path))
465 m->pg_init_in_progress++;
466 }
467 }
468out: 478out:
469 spin_unlock_irqrestore(&m->lock, flags); 479 spin_unlock_irqrestore(&m->lock, flags);
470 if (!must_queue) 480 if (!must_queue)
@@ -597,8 +607,8 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
597 if (!p) 607 if (!p)
598 return ERR_PTR(-ENOMEM); 608 return ERR_PTR(-ENOMEM);
599 609
600 r = dm_get_device(ti, shift(as), ti->begin, ti->len, 610 r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table),
601 dm_table_get_mode(ti->table), &p->path.dev); 611 &p->path.dev);
602 if (r) { 612 if (r) {
603 ti->error = "error getting device"; 613 ti->error = "error getting device";
604 goto bad; 614 goto bad;
@@ -890,9 +900,34 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
890 return r; 900 return r;
891} 901}
892 902
893static void flush_multipath_work(void) 903static void multipath_wait_for_pg_init_completion(struct multipath *m)
904{
905 DECLARE_WAITQUEUE(wait, current);
906 unsigned long flags;
907
908 add_wait_queue(&m->pg_init_wait, &wait);
909
910 while (1) {
911 set_current_state(TASK_UNINTERRUPTIBLE);
912
913 spin_lock_irqsave(&m->lock, flags);
914 if (!m->pg_init_in_progress) {
915 spin_unlock_irqrestore(&m->lock, flags);
916 break;
917 }
918 spin_unlock_irqrestore(&m->lock, flags);
919
920 io_schedule();
921 }
922 set_current_state(TASK_RUNNING);
923
924 remove_wait_queue(&m->pg_init_wait, &wait);
925}
926
927static void flush_multipath_work(struct multipath *m)
894{ 928{
895 flush_workqueue(kmpath_handlerd); 929 flush_workqueue(kmpath_handlerd);
930 multipath_wait_for_pg_init_completion(m);
896 flush_workqueue(kmultipathd); 931 flush_workqueue(kmultipathd);
897 flush_scheduled_work(); 932 flush_scheduled_work();
898} 933}
@@ -901,7 +936,7 @@ static void multipath_dtr(struct dm_target *ti)
901{ 936{
902 struct multipath *m = ti->private; 937 struct multipath *m = ti->private;
903 938
904 flush_multipath_work(); 939 flush_multipath_work(m);
905 free_multipath(m); 940 free_multipath(m);
906} 941}
907 942
@@ -1128,8 +1163,7 @@ static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1128 1163
1129static void pg_init_done(void *data, int errors) 1164static void pg_init_done(void *data, int errors)
1130{ 1165{
1131 struct dm_path *path = data; 1166 struct pgpath *pgpath = data;
1132 struct pgpath *pgpath = path_to_pgpath(path);
1133 struct priority_group *pg = pgpath->pg; 1167 struct priority_group *pg = pgpath->pg;
1134 struct multipath *m = pg->m; 1168 struct multipath *m = pg->m;
1135 unsigned long flags; 1169 unsigned long flags;
@@ -1143,8 +1177,8 @@ static void pg_init_done(void *data, int errors)
1143 errors = 0; 1177 errors = 0;
1144 break; 1178 break;
1145 } 1179 }
1146 DMERR("Cannot failover device because scsi_dh_%s was not " 1180 DMERR("Could not failover the device: Handler scsi_dh_%s "
1147 "loaded.", m->hw_handler_name); 1181 "Error %d.", m->hw_handler_name, errors);
1148 /* 1182 /*
1149 * Fail path for now, so we do not ping pong 1183 * Fail path for now, so we do not ping pong
1150 */ 1184 */
@@ -1181,14 +1215,24 @@ static void pg_init_done(void *data, int errors)
1181 m->current_pgpath = NULL; 1215 m->current_pgpath = NULL;
1182 m->current_pg = NULL; 1216 m->current_pg = NULL;
1183 } 1217 }
1184 } else if (!m->pg_init_required) { 1218 } else if (!m->pg_init_required)
1185 m->queue_io = 0;
1186 pg->bypassed = 0; 1219 pg->bypassed = 0;
1187 }
1188 1220
1189 m->pg_init_in_progress--; 1221 if (--m->pg_init_in_progress)
1190 if (!m->pg_init_in_progress) 1222 /* Activations of other paths are still on going */
1191 queue_work(kmultipathd, &m->process_queued_ios); 1223 goto out;
1224
1225 if (!m->pg_init_required)
1226 m->queue_io = 0;
1227
1228 queue_work(kmultipathd, &m->process_queued_ios);
1229
1230 /*
1231 * Wake up any thread waiting to suspend.
1232 */
1233 wake_up(&m->pg_init_wait);
1234
1235out:
1192 spin_unlock_irqrestore(&m->lock, flags); 1236 spin_unlock_irqrestore(&m->lock, flags);
1193} 1237}
1194 1238
@@ -1198,7 +1242,7 @@ static void activate_path(struct work_struct *work)
1198 container_of(work, struct pgpath, activate_path); 1242 container_of(work, struct pgpath, activate_path);
1199 1243
1200 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), 1244 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
1201 pg_init_done, &pgpath->path); 1245 pg_init_done, pgpath);
1202} 1246}
1203 1247
1204/* 1248/*
@@ -1276,8 +1320,7 @@ static void multipath_postsuspend(struct dm_target *ti)
1276 struct multipath *m = ti->private; 1320 struct multipath *m = ti->private;
1277 1321
1278 mutex_lock(&m->work_mutex); 1322 mutex_lock(&m->work_mutex);
1279 m->suspended = 1; 1323 flush_multipath_work(m);
1280 flush_multipath_work();
1281 mutex_unlock(&m->work_mutex); 1324 mutex_unlock(&m->work_mutex);
1282} 1325}
1283 1326
@@ -1289,10 +1332,6 @@ static void multipath_resume(struct dm_target *ti)
1289 struct multipath *m = (struct multipath *) ti->private; 1332 struct multipath *m = (struct multipath *) ti->private;
1290 unsigned long flags; 1333 unsigned long flags;
1291 1334
1292 mutex_lock(&m->work_mutex);
1293 m->suspended = 0;
1294 mutex_unlock(&m->work_mutex);
1295
1296 spin_lock_irqsave(&m->lock, flags); 1335 spin_lock_irqsave(&m->lock, flags);
1297 m->queue_if_no_path = m->saved_queue_if_no_path; 1336 m->queue_if_no_path = m->saved_queue_if_no_path;
1298 spin_unlock_irqrestore(&m->lock, flags); 1337 spin_unlock_irqrestore(&m->lock, flags);
@@ -1428,11 +1467,6 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1428 1467
1429 mutex_lock(&m->work_mutex); 1468 mutex_lock(&m->work_mutex);
1430 1469
1431 if (m->suspended) {
1432 r = -EBUSY;
1433 goto out;
1434 }
1435
1436 if (dm_suspended(ti)) { 1470 if (dm_suspended(ti)) {
1437 r = -EBUSY; 1471 r = -EBUSY;
1438 goto out; 1472 goto out;
@@ -1471,8 +1505,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1471 goto out; 1505 goto out;
1472 } 1506 }
1473 1507
1474 r = dm_get_device(ti, argv[1], ti->begin, ti->len, 1508 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
1475 dm_table_get_mode(ti->table), &dev);
1476 if (r) { 1509 if (r) {
1477 DMWARN("message: error getting device %s", 1510 DMWARN("message: error getting device %s",
1478 argv[1]); 1511 argv[1]);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 6c1046df81f6..ddda531723dc 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -465,9 +465,17 @@ static void map_region(struct dm_io_region *io, struct mirror *m,
465static void hold_bio(struct mirror_set *ms, struct bio *bio) 465static void hold_bio(struct mirror_set *ms, struct bio *bio)
466{ 466{
467 /* 467 /*
468 * If device is suspended, complete the bio. 468 * Lock is required to avoid race condition during suspend
469 * process.
469 */ 470 */
471 spin_lock_irq(&ms->lock);
472
470 if (atomic_read(&ms->suspend)) { 473 if (atomic_read(&ms->suspend)) {
474 spin_unlock_irq(&ms->lock);
475
476 /*
477 * If device is suspended, complete the bio.
478 */
471 if (dm_noflush_suspending(ms->ti)) 479 if (dm_noflush_suspending(ms->ti))
472 bio_endio(bio, DM_ENDIO_REQUEUE); 480 bio_endio(bio, DM_ENDIO_REQUEUE);
473 else 481 else
@@ -478,7 +486,6 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
478 /* 486 /*
479 * Hold bio until the suspend is complete. 487 * Hold bio until the suspend is complete.
480 */ 488 */
481 spin_lock_irq(&ms->lock);
482 bio_list_add(&ms->holds, bio); 489 bio_list_add(&ms->holds, bio);
483 spin_unlock_irq(&ms->lock); 490 spin_unlock_irq(&ms->lock);
484} 491}
@@ -737,9 +744,12 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
737 dm_rh_delay(ms->rh, bio); 744 dm_rh_delay(ms->rh, bio);
738 745
739 while ((bio = bio_list_pop(&nosync))) { 746 while ((bio = bio_list_pop(&nosync))) {
740 if (unlikely(ms->leg_failure) && errors_handled(ms)) 747 if (unlikely(ms->leg_failure) && errors_handled(ms)) {
741 hold_bio(ms, bio); 748 spin_lock_irq(&ms->lock);
742 else { 749 bio_list_add(&ms->failures, bio);
750 spin_unlock_irq(&ms->lock);
751 wakeup_mirrord(ms);
752 } else {
743 map_bio(get_default_mirror(ms), bio); 753 map_bio(get_default_mirror(ms), bio);
744 generic_make_request(bio); 754 generic_make_request(bio);
745 } 755 }
@@ -917,8 +927,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
917 return -EINVAL; 927 return -EINVAL;
918 } 928 }
919 929
920 if (dm_get_device(ti, argv[0], offset, ti->len, 930 if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
921 dm_table_get_mode(ti->table),
922 &ms->mirror[mirror].dev)) { 931 &ms->mirror[mirror].dev)) {
923 ti->error = "Device lookup failure"; 932 ti->error = "Device lookup failure";
924 return -ENXIO; 933 return -ENXIO;
@@ -1259,6 +1268,20 @@ static void mirror_presuspend(struct dm_target *ti)
1259 atomic_set(&ms->suspend, 1); 1268 atomic_set(&ms->suspend, 1);
1260 1269
1261 /* 1270 /*
1271 * Process bios in the hold list to start recovery waiting
1272 * for bios in the hold list. After the process, no bio has
1273 * a chance to be added in the hold list because ms->suspend
1274 * is set.
1275 */
1276 spin_lock_irq(&ms->lock);
1277 holds = ms->holds;
1278 bio_list_init(&ms->holds);
1279 spin_unlock_irq(&ms->lock);
1280
1281 while ((bio = bio_list_pop(&holds)))
1282 hold_bio(ms, bio);
1283
1284 /*
1262 * We must finish up all the work that we've 1285 * We must finish up all the work that we've
1263 * generated (i.e. recovery work). 1286 * generated (i.e. recovery work).
1264 */ 1287 */
@@ -1278,22 +1301,6 @@ static void mirror_presuspend(struct dm_target *ti)
1278 * we know that all of our I/O has been pushed. 1301 * we know that all of our I/O has been pushed.
1279 */ 1302 */
1280 flush_workqueue(ms->kmirrord_wq); 1303 flush_workqueue(ms->kmirrord_wq);
1281
1282 /*
1283 * Now set ms->suspend is set and the workqueue flushed, no more
1284 * entries can be added to ms->hold list, so process it.
1285 *
1286 * Bios can still arrive concurrently with or after this
1287 * presuspend function, but they cannot join the hold list
1288 * because ms->suspend is set.
1289 */
1290 spin_lock_irq(&ms->lock);
1291 holds = ms->holds;
1292 bio_list_init(&ms->holds);
1293 spin_unlock_irq(&ms->lock);
1294
1295 while ((bio = bio_list_pop(&holds)))
1296 hold_bio(ms, bio);
1297} 1304}
1298 1305
1299static void mirror_postsuspend(struct dm_target *ti) 1306static void mirror_postsuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ee8eb283650d..54853773510c 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -83,10 +83,10 @@ struct dm_snapshot {
83 /* Whether or not owning mapped_device is suspended */ 83 /* Whether or not owning mapped_device is suspended */
84 int suspended; 84 int suspended;
85 85
86 mempool_t *pending_pool;
87
88 atomic_t pending_exceptions_count; 86 atomic_t pending_exceptions_count;
89 87
88 mempool_t *pending_pool;
89
90 struct dm_exception_table pending; 90 struct dm_exception_table pending;
91 struct dm_exception_table complete; 91 struct dm_exception_table complete;
92 92
@@ -96,6 +96,11 @@ struct dm_snapshot {
96 */ 96 */
97 spinlock_t pe_lock; 97 spinlock_t pe_lock;
98 98
99 /* Chunks with outstanding reads */
100 spinlock_t tracked_chunk_lock;
101 mempool_t *tracked_chunk_pool;
102 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
103
99 /* The on disk metadata handler */ 104 /* The on disk metadata handler */
100 struct dm_exception_store *store; 105 struct dm_exception_store *store;
101 106
@@ -105,10 +110,12 @@ struct dm_snapshot {
105 struct bio_list queued_bios; 110 struct bio_list queued_bios;
106 struct work_struct queued_bios_work; 111 struct work_struct queued_bios_work;
107 112
108 /* Chunks with outstanding reads */ 113 /* Wait for events based on state_bits */
109 mempool_t *tracked_chunk_pool; 114 unsigned long state_bits;
110 spinlock_t tracked_chunk_lock; 115
111 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 116 /* Range of chunks currently being merged. */
117 chunk_t first_merging_chunk;
118 int num_merging_chunks;
112 119
113 /* 120 /*
114 * The merge operation failed if this flag is set. 121 * The merge operation failed if this flag is set.
@@ -125,13 +132,6 @@ struct dm_snapshot {
125 */ 132 */
126 int merge_failed; 133 int merge_failed;
127 134
128 /* Wait for events based on state_bits */
129 unsigned long state_bits;
130
131 /* Range of chunks currently being merged. */
132 chunk_t first_merging_chunk;
133 int num_merging_chunks;
134
135 /* 135 /*
136 * Incoming bios that overlap with chunks being merged must wait 136 * Incoming bios that overlap with chunks being merged must wait
137 * for them to be committed. 137 * for them to be committed.
@@ -1081,8 +1081,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1081 argv++; 1081 argv++;
1082 argc--; 1082 argc--;
1083 1083
1084 r = dm_get_device(ti, cow_path, 0, 0, 1084 r = dm_get_device(ti, cow_path, FMODE_READ | FMODE_WRITE, &s->cow);
1085 FMODE_READ | FMODE_WRITE, &s->cow);
1086 if (r) { 1085 if (r) {
1087 ti->error = "Cannot get COW device"; 1086 ti->error = "Cannot get COW device";
1088 goto bad_cow; 1087 goto bad_cow;
@@ -1098,7 +1097,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1098 argv += args_used; 1097 argv += args_used;
1099 argc -= args_used; 1098 argc -= args_used;
1100 1099
1101 r = dm_get_device(ti, origin_path, 0, ti->len, origin_mode, &s->origin); 1100 r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
1102 if (r) { 1101 if (r) {
1103 ti->error = "Cannot get origin device"; 1102 ti->error = "Cannot get origin device";
1104 goto bad_origin; 1103 goto bad_origin;
@@ -2100,8 +2099,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2100 return -EINVAL; 2099 return -EINVAL;
2101 } 2100 }
2102 2101
2103 r = dm_get_device(ti, argv[0], 0, ti->len, 2102 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev);
2104 dm_table_get_mode(ti->table), &dev);
2105 if (r) { 2103 if (r) {
2106 ti->error = "Cannot get target device"; 2104 ti->error = "Cannot get target device";
2107 return r; 2105 return r;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index bd58703ee8f6..e610725db766 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -80,8 +80,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
80 if (sscanf(argv[1], "%llu", &start) != 1) 80 if (sscanf(argv[1], "%llu", &start) != 1)
81 return -EINVAL; 81 return -EINVAL;
82 82
83 if (dm_get_device(ti, argv[0], start, sc->stripe_width, 83 if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
84 dm_table_get_mode(ti->table),
85 &sc->stripe[stripe].dev)) 84 &sc->stripe[stripe].dev))
86 return -ENXIO; 85 return -ENXIO;
87 86
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4b22feb01a0c..9924ea23032d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -429,8 +429,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
429 * it's already present. 429 * it's already present.
430 */ 430 */
431static int __table_get_device(struct dm_table *t, struct dm_target *ti, 431static int __table_get_device(struct dm_table *t, struct dm_target *ti,
432 const char *path, sector_t start, sector_t len, 432 const char *path, fmode_t mode, struct dm_dev **result)
433 fmode_t mode, struct dm_dev **result)
434{ 433{
435 int r; 434 int r;
436 dev_t uninitialized_var(dev); 435 dev_t uninitialized_var(dev);
@@ -527,11 +526,10 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
527} 526}
528EXPORT_SYMBOL_GPL(dm_set_device_limits); 527EXPORT_SYMBOL_GPL(dm_set_device_limits);
529 528
530int dm_get_device(struct dm_target *ti, const char *path, sector_t start, 529int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
531 sector_t len, fmode_t mode, struct dm_dev **result) 530 struct dm_dev **result)
532{ 531{
533 return __table_get_device(ti->table, ti, path, 532 return __table_get_device(ti->table, ti, path, mode, result);
534 start, len, mode, result);
535} 533}
536 534
537 535
@@ -1231,8 +1229,6 @@ void dm_table_unplug_all(struct dm_table *t)
1231 1229
1232struct mapped_device *dm_table_get_md(struct dm_table *t) 1230struct mapped_device *dm_table_get_md(struct dm_table *t)
1233{ 1231{
1234 dm_get(t->md);
1235
1236 return t->md; 1232 return t->md;
1237} 1233}
1238 1234
diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c
index c7c555a8c7b2..6b1e3b61b25e 100644
--- a/drivers/md/dm-uevent.c
+++ b/drivers/md/dm-uevent.c
@@ -187,7 +187,7 @@ void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
187 187
188 if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) { 188 if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) {
189 DMERR("%s: Invalid event_type %d", __func__, event_type); 189 DMERR("%s: Invalid event_type %d", __func__, event_type);
190 goto out; 190 return;
191 } 191 }
192 192
193 event = dm_build_path_uevent(md, ti, 193 event = dm_build_path_uevent(md, ti,
@@ -195,12 +195,9 @@ void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
195 _dm_uevent_type_names[event_type].name, 195 _dm_uevent_type_names[event_type].name,
196 path, nr_valid_paths); 196 path, nr_valid_paths);
197 if (IS_ERR(event)) 197 if (IS_ERR(event))
198 goto out; 198 return;
199 199
200 dm_uevent_add(md, &event->elist); 200 dm_uevent_add(md, &event->elist);
201
202out:
203 dm_put(md);
204} 201}
205EXPORT_SYMBOL_GPL(dm_path_uevent); 202EXPORT_SYMBOL_GPL(dm_path_uevent);
206 203
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index aa4e2aa86d49..d21e1284604f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -635,8 +635,10 @@ static void dec_pending(struct dm_io *io, int error)
635 if (!md->barrier_error && io_error != -EOPNOTSUPP) 635 if (!md->barrier_error && io_error != -EOPNOTSUPP)
636 md->barrier_error = io_error; 636 md->barrier_error = io_error;
637 end_io_acct(io); 637 end_io_acct(io);
638 free_io(md, io);
638 } else { 639 } else {
639 end_io_acct(io); 640 end_io_acct(io);
641 free_io(md, io);
640 642
641 if (io_error != DM_ENDIO_REQUEUE) { 643 if (io_error != DM_ENDIO_REQUEUE) {
642 trace_block_bio_complete(md->queue, bio); 644 trace_block_bio_complete(md->queue, bio);
@@ -644,8 +646,6 @@ static void dec_pending(struct dm_io *io, int error)
644 bio_endio(bio, io_error); 646 bio_endio(bio, io_error);
645 } 647 }
646 } 648 }
647
648 free_io(md, io);
649 } 649 }
650} 650}
651 651
@@ -2618,18 +2618,19 @@ out:
2618/*----------------------------------------------------------------- 2618/*-----------------------------------------------------------------
2619 * Event notification. 2619 * Event notification.
2620 *---------------------------------------------------------------*/ 2620 *---------------------------------------------------------------*/
2621void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, 2621int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
2622 unsigned cookie) 2622 unsigned cookie)
2623{ 2623{
2624 char udev_cookie[DM_COOKIE_LENGTH]; 2624 char udev_cookie[DM_COOKIE_LENGTH];
2625 char *envp[] = { udev_cookie, NULL }; 2625 char *envp[] = { udev_cookie, NULL };
2626 2626
2627 if (!cookie) 2627 if (!cookie)
2628 kobject_uevent(&disk_to_dev(md->disk)->kobj, action); 2628 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
2629 else { 2629 else {
2630 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", 2630 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
2631 DM_COOKIE_ENV_VAR_NAME, cookie); 2631 DM_COOKIE_ENV_VAR_NAME, cookie);
2632 kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp); 2632 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
2633 action, envp);
2633 } 2634 }
2634} 2635}
2635 2636
@@ -2699,23 +2700,13 @@ int dm_suspended_md(struct mapped_device *md)
2699 2700
2700int dm_suspended(struct dm_target *ti) 2701int dm_suspended(struct dm_target *ti)
2701{ 2702{
2702 struct mapped_device *md = dm_table_get_md(ti->table); 2703 return dm_suspended_md(dm_table_get_md(ti->table));
2703 int r = dm_suspended_md(md);
2704
2705 dm_put(md);
2706
2707 return r;
2708} 2704}
2709EXPORT_SYMBOL_GPL(dm_suspended); 2705EXPORT_SYMBOL_GPL(dm_suspended);
2710 2706
2711int dm_noflush_suspending(struct dm_target *ti) 2707int dm_noflush_suspending(struct dm_target *ti)
2712{ 2708{
2713 struct mapped_device *md = dm_table_get_md(ti->table); 2709 return __noflush_suspending(dm_table_get_md(ti->table));
2714 int r = __noflush_suspending(md);
2715
2716 dm_put(md);
2717
2718 return r;
2719} 2710}
2720EXPORT_SYMBOL_GPL(dm_noflush_suspending); 2711EXPORT_SYMBOL_GPL(dm_noflush_suspending);
2721 2712
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 8dadaa5bc396..bad1724d4869 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -125,8 +125,8 @@ void dm_stripe_exit(void);
125int dm_open_count(struct mapped_device *md); 125int dm_open_count(struct mapped_device *md);
126int dm_lock_for_deletion(struct mapped_device *md); 126int dm_lock_for_deletion(struct mapped_device *md);
127 127
128void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, 128int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
129 unsigned cookie); 129 unsigned cookie);
130 130
131int dm_io_init(void); 131int dm_io_init(void);
132void dm_io_exit(void); 132void dm_io_exit(void);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index d4c9c0b88adc..1381cd97b4ed 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -118,10 +118,9 @@ struct dm_dev {
118/* 118/*
119 * Constructors should call these functions to ensure destination devices 119 * Constructors should call these functions to ensure destination devices
120 * are opened/closed correctly. 120 * are opened/closed correctly.
121 * FIXME: too many arguments.
122 */ 121 */
123int dm_get_device(struct dm_target *ti, const char *path, sector_t start, 122int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
124 sector_t len, fmode_t mode, struct dm_dev **result); 123 struct dm_dev **result);
125void dm_put_device(struct dm_target *ti, struct dm_dev *d); 124void dm_put_device(struct dm_target *ti, struct dm_dev *d);
126 125
127/* 126/*
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index b6bf17ee2f61..5c9186b93fff 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -37,14 +37,14 @@ enum dm_io_mem_type {
37struct dm_io_memory { 37struct dm_io_memory {
38 enum dm_io_mem_type type; 38 enum dm_io_mem_type type;
39 39
40 unsigned offset;
41
40 union { 42 union {
41 struct page_list *pl; 43 struct page_list *pl;
42 struct bio_vec *bvec; 44 struct bio_vec *bvec;
43 void *vma; 45 void *vma;
44 void *addr; 46 void *addr;
45 } ptr; 47 } ptr;
46
47 unsigned offset;
48}; 48};
49 49
50struct dm_io_notify { 50struct dm_io_notify {
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index aa95508d2f95..2c445e113790 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -266,9 +266,9 @@ enum {
266#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) 266#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
267 267
268#define DM_VERSION_MAJOR 4 268#define DM_VERSION_MAJOR 4
269#define DM_VERSION_MINOR 16 269#define DM_VERSION_MINOR 17
270#define DM_VERSION_PATCHLEVEL 0 270#define DM_VERSION_PATCHLEVEL 0
271#define DM_VERSION_EXTRA "-ioctl (2009-11-05)" 271#define DM_VERSION_EXTRA "-ioctl (2010-03-05)"
272 272
273/* Status bits */ 273/* Status bits */
274#define DM_READONLY_FLAG (1 << 0) /* In/Out */ 274#define DM_READONLY_FLAG (1 << 0) /* In/Out */
@@ -316,4 +316,9 @@ enum {
316 */ 316 */
317#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */ 317#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */
318 318
319/*
320 * If set, a uevent was generated for which the caller may need to wait.
321 */
322#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */
323
319#endif /* _LINUX_DM_IOCTL_H */ 324#endif /* _LINUX_DM_IOCTL_H */