aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-mpath.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-mpath.c')
-rw-r--r--drivers/md/dm-mpath.c334
1 files changed, 220 insertions, 114 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 6a386ab4f7eb..c70604a20897 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -8,7 +8,6 @@
8#include <linux/device-mapper.h> 8#include <linux/device-mapper.h>
9 9
10#include "dm-path-selector.h" 10#include "dm-path-selector.h"
11#include "dm-bio-record.h"
12#include "dm-uevent.h" 11#include "dm-uevent.h"
13 12
14#include <linux/ctype.h> 13#include <linux/ctype.h>
@@ -35,6 +34,7 @@ struct pgpath {
35 34
36 struct dm_path path; 35 struct dm_path path;
37 struct work_struct deactivate_path; 36 struct work_struct deactivate_path;
37 struct work_struct activate_path;
38}; 38};
39 39
40#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 40#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -64,8 +64,6 @@ struct multipath {
64 spinlock_t lock; 64 spinlock_t lock;
65 65
66 const char *hw_handler_name; 66 const char *hw_handler_name;
67 struct work_struct activate_path;
68 struct pgpath *pgpath_to_activate;
69 unsigned nr_priority_groups; 67 unsigned nr_priority_groups;
70 struct list_head priority_groups; 68 struct list_head priority_groups;
71 unsigned pg_init_required; /* pg_init needs calling? */ 69 unsigned pg_init_required; /* pg_init needs calling? */
@@ -84,7 +82,7 @@ struct multipath {
84 unsigned pg_init_count; /* Number of times pg_init called */ 82 unsigned pg_init_count; /* Number of times pg_init called */
85 83
86 struct work_struct process_queued_ios; 84 struct work_struct process_queued_ios;
87 struct bio_list queued_ios; 85 struct list_head queued_ios;
88 unsigned queue_size; 86 unsigned queue_size;
89 87
90 struct work_struct trigger_event; 88 struct work_struct trigger_event;
@@ -101,7 +99,7 @@ struct multipath {
101 */ 99 */
102struct dm_mpath_io { 100struct dm_mpath_io {
103 struct pgpath *pgpath; 101 struct pgpath *pgpath;
104 struct dm_bio_details details; 102 size_t nr_bytes;
105}; 103};
106 104
107typedef int (*action_fn) (struct pgpath *pgpath); 105typedef int (*action_fn) (struct pgpath *pgpath);
@@ -128,6 +126,7 @@ static struct pgpath *alloc_pgpath(void)
128 if (pgpath) { 126 if (pgpath) {
129 pgpath->is_active = 1; 127 pgpath->is_active = 1;
130 INIT_WORK(&pgpath->deactivate_path, deactivate_path); 128 INIT_WORK(&pgpath->deactivate_path, deactivate_path);
129 INIT_WORK(&pgpath->activate_path, activate_path);
131 } 130 }
132 131
133 return pgpath; 132 return pgpath;
@@ -160,7 +159,6 @@ static struct priority_group *alloc_priority_group(void)
160 159
161static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 160static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
162{ 161{
163 unsigned long flags;
164 struct pgpath *pgpath, *tmp; 162 struct pgpath *pgpath, *tmp;
165 struct multipath *m = ti->private; 163 struct multipath *m = ti->private;
166 164
@@ -169,10 +167,6 @@ static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
169 if (m->hw_handler_name) 167 if (m->hw_handler_name)
170 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); 168 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
171 dm_put_device(ti, pgpath->path.dev); 169 dm_put_device(ti, pgpath->path.dev);
172 spin_lock_irqsave(&m->lock, flags);
173 if (m->pgpath_to_activate == pgpath)
174 m->pgpath_to_activate = NULL;
175 spin_unlock_irqrestore(&m->lock, flags);
176 free_pgpath(pgpath); 170 free_pgpath(pgpath);
177 } 171 }
178} 172}
@@ -198,11 +192,11 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
198 m = kzalloc(sizeof(*m), GFP_KERNEL); 192 m = kzalloc(sizeof(*m), GFP_KERNEL);
199 if (m) { 193 if (m) {
200 INIT_LIST_HEAD(&m->priority_groups); 194 INIT_LIST_HEAD(&m->priority_groups);
195 INIT_LIST_HEAD(&m->queued_ios);
201 spin_lock_init(&m->lock); 196 spin_lock_init(&m->lock);
202 m->queue_io = 1; 197 m->queue_io = 1;
203 INIT_WORK(&m->process_queued_ios, process_queued_ios); 198 INIT_WORK(&m->process_queued_ios, process_queued_ios);
204 INIT_WORK(&m->trigger_event, trigger_event); 199 INIT_WORK(&m->trigger_event, trigger_event);
205 INIT_WORK(&m->activate_path, activate_path);
206 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 200 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
207 if (!m->mpio_pool) { 201 if (!m->mpio_pool) {
208 kfree(m); 202 kfree(m);
@@ -250,11 +244,12 @@ static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
250 m->pg_init_count = 0; 244 m->pg_init_count = 0;
251} 245}
252 246
253static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg) 247static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
248 size_t nr_bytes)
254{ 249{
255 struct dm_path *path; 250 struct dm_path *path;
256 251
257 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count); 252 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
258 if (!path) 253 if (!path)
259 return -ENXIO; 254 return -ENXIO;
260 255
@@ -266,7 +261,7 @@ static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
266 return 0; 261 return 0;
267} 262}
268 263
269static void __choose_pgpath(struct multipath *m) 264static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
270{ 265{
271 struct priority_group *pg; 266 struct priority_group *pg;
272 unsigned bypassed = 1; 267 unsigned bypassed = 1;
@@ -278,12 +273,12 @@ static void __choose_pgpath(struct multipath *m)
278 if (m->next_pg) { 273 if (m->next_pg) {
279 pg = m->next_pg; 274 pg = m->next_pg;
280 m->next_pg = NULL; 275 m->next_pg = NULL;
281 if (!__choose_path_in_pg(m, pg)) 276 if (!__choose_path_in_pg(m, pg, nr_bytes))
282 return; 277 return;
283 } 278 }
284 279
285 /* Don't change PG until it has no remaining paths */ 280 /* Don't change PG until it has no remaining paths */
286 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg)) 281 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
287 return; 282 return;
288 283
289 /* 284 /*
@@ -295,7 +290,7 @@ static void __choose_pgpath(struct multipath *m)
295 list_for_each_entry(pg, &m->priority_groups, list) { 290 list_for_each_entry(pg, &m->priority_groups, list) {
296 if (pg->bypassed == bypassed) 291 if (pg->bypassed == bypassed)
297 continue; 292 continue;
298 if (!__choose_path_in_pg(m, pg)) 293 if (!__choose_path_in_pg(m, pg, nr_bytes))
299 return; 294 return;
300 } 295 }
301 } while (bypassed--); 296 } while (bypassed--);
@@ -322,19 +317,21 @@ static int __must_push_back(struct multipath *m)
322 dm_noflush_suspending(m->ti)); 317 dm_noflush_suspending(m->ti));
323} 318}
324 319
325static int map_io(struct multipath *m, struct bio *bio, 320static int map_io(struct multipath *m, struct request *clone,
326 struct dm_mpath_io *mpio, unsigned was_queued) 321 struct dm_mpath_io *mpio, unsigned was_queued)
327{ 322{
328 int r = DM_MAPIO_REMAPPED; 323 int r = DM_MAPIO_REMAPPED;
324 size_t nr_bytes = blk_rq_bytes(clone);
329 unsigned long flags; 325 unsigned long flags;
330 struct pgpath *pgpath; 326 struct pgpath *pgpath;
327 struct block_device *bdev;
331 328
332 spin_lock_irqsave(&m->lock, flags); 329 spin_lock_irqsave(&m->lock, flags);
333 330
334 /* Do we need to select a new pgpath? */ 331 /* Do we need to select a new pgpath? */
335 if (!m->current_pgpath || 332 if (!m->current_pgpath ||
336 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) 333 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
337 __choose_pgpath(m); 334 __choose_pgpath(m, nr_bytes);
338 335
339 pgpath = m->current_pgpath; 336 pgpath = m->current_pgpath;
340 337
@@ -344,21 +341,28 @@ static int map_io(struct multipath *m, struct bio *bio,
344 if ((pgpath && m->queue_io) || 341 if ((pgpath && m->queue_io) ||
345 (!pgpath && m->queue_if_no_path)) { 342 (!pgpath && m->queue_if_no_path)) {
346 /* Queue for the daemon to resubmit */ 343 /* Queue for the daemon to resubmit */
347 bio_list_add(&m->queued_ios, bio); 344 list_add_tail(&clone->queuelist, &m->queued_ios);
348 m->queue_size++; 345 m->queue_size++;
349 if ((m->pg_init_required && !m->pg_init_in_progress) || 346 if ((m->pg_init_required && !m->pg_init_in_progress) ||
350 !m->queue_io) 347 !m->queue_io)
351 queue_work(kmultipathd, &m->process_queued_ios); 348 queue_work(kmultipathd, &m->process_queued_ios);
352 pgpath = NULL; 349 pgpath = NULL;
353 r = DM_MAPIO_SUBMITTED; 350 r = DM_MAPIO_SUBMITTED;
354 } else if (pgpath) 351 } else if (pgpath) {
355 bio->bi_bdev = pgpath->path.dev->bdev; 352 bdev = pgpath->path.dev->bdev;
356 else if (__must_push_back(m)) 353 clone->q = bdev_get_queue(bdev);
354 clone->rq_disk = bdev->bd_disk;
355 } else if (__must_push_back(m))
357 r = DM_MAPIO_REQUEUE; 356 r = DM_MAPIO_REQUEUE;
358 else 357 else
359 r = -EIO; /* Failed */ 358 r = -EIO; /* Failed */
360 359
361 mpio->pgpath = pgpath; 360 mpio->pgpath = pgpath;
361 mpio->nr_bytes = nr_bytes;
362
363 if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
364 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
365 nr_bytes);
362 366
363 spin_unlock_irqrestore(&m->lock, flags); 367 spin_unlock_irqrestore(&m->lock, flags);
364 368
@@ -396,30 +400,31 @@ static void dispatch_queued_ios(struct multipath *m)
396{ 400{
397 int r; 401 int r;
398 unsigned long flags; 402 unsigned long flags;
399 struct bio *bio = NULL, *next;
400 struct dm_mpath_io *mpio; 403 struct dm_mpath_io *mpio;
401 union map_info *info; 404 union map_info *info;
405 struct request *clone, *n;
406 LIST_HEAD(cl);
402 407
403 spin_lock_irqsave(&m->lock, flags); 408 spin_lock_irqsave(&m->lock, flags);
404 bio = bio_list_get(&m->queued_ios); 409 list_splice_init(&m->queued_ios, &cl);
405 spin_unlock_irqrestore(&m->lock, flags); 410 spin_unlock_irqrestore(&m->lock, flags);
406 411
407 while (bio) { 412 list_for_each_entry_safe(clone, n, &cl, queuelist) {
408 next = bio->bi_next; 413 list_del_init(&clone->queuelist);
409 bio->bi_next = NULL;
410 414
411 info = dm_get_mapinfo(bio); 415 info = dm_get_rq_mapinfo(clone);
412 mpio = info->ptr; 416 mpio = info->ptr;
413 417
414 r = map_io(m, bio, mpio, 1); 418 r = map_io(m, clone, mpio, 1);
415 if (r < 0) 419 if (r < 0) {
416 bio_endio(bio, r); 420 mempool_free(mpio, m->mpio_pool);
417 else if (r == DM_MAPIO_REMAPPED) 421 dm_kill_unmapped_request(clone, r);
418 generic_make_request(bio); 422 } else if (r == DM_MAPIO_REMAPPED)
419 else if (r == DM_MAPIO_REQUEUE) 423 dm_dispatch_request(clone);
420 bio_endio(bio, -EIO); 424 else if (r == DM_MAPIO_REQUEUE) {
421 425 mempool_free(mpio, m->mpio_pool);
422 bio = next; 426 dm_requeue_unmapped_request(clone);
427 }
423 } 428 }
424} 429}
425 430
@@ -427,8 +432,8 @@ static void process_queued_ios(struct work_struct *work)
427{ 432{
428 struct multipath *m = 433 struct multipath *m =
429 container_of(work, struct multipath, process_queued_ios); 434 container_of(work, struct multipath, process_queued_ios);
430 struct pgpath *pgpath = NULL; 435 struct pgpath *pgpath = NULL, *tmp;
431 unsigned init_required = 0, must_queue = 1; 436 unsigned must_queue = 1;
432 unsigned long flags; 437 unsigned long flags;
433 438
434 spin_lock_irqsave(&m->lock, flags); 439 spin_lock_irqsave(&m->lock, flags);
@@ -437,7 +442,7 @@ static void process_queued_ios(struct work_struct *work)
437 goto out; 442 goto out;
438 443
439 if (!m->current_pgpath) 444 if (!m->current_pgpath)
440 __choose_pgpath(m); 445 __choose_pgpath(m, 0);
441 446
442 pgpath = m->current_pgpath; 447 pgpath = m->current_pgpath;
443 448
@@ -446,19 +451,15 @@ static void process_queued_ios(struct work_struct *work)
446 must_queue = 0; 451 must_queue = 0;
447 452
448 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) { 453 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
449 m->pgpath_to_activate = pgpath;
450 m->pg_init_count++; 454 m->pg_init_count++;
451 m->pg_init_required = 0; 455 m->pg_init_required = 0;
452 m->pg_init_in_progress = 1; 456 list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
453 init_required = 1; 457 if (queue_work(kmpath_handlerd, &tmp->activate_path))
458 m->pg_init_in_progress++;
459 }
454 } 460 }
455
456out: 461out:
457 spin_unlock_irqrestore(&m->lock, flags); 462 spin_unlock_irqrestore(&m->lock, flags);
458
459 if (init_required)
460 queue_work(kmpath_handlerd, &m->activate_path);
461
462 if (!must_queue) 463 if (!must_queue)
463 dispatch_queued_ios(m); 464 dispatch_queued_ios(m);
464} 465}
@@ -553,6 +554,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
553 return -EINVAL; 554 return -EINVAL;
554 } 555 }
555 556
557 if (ps_argc > as->argc) {
558 dm_put_path_selector(pst);
559 ti->error = "not enough arguments for path selector";
560 return -EINVAL;
561 }
562
556 r = pst->create(&pg->ps, ps_argc, as->argv); 563 r = pst->create(&pg->ps, ps_argc, as->argv);
557 if (r) { 564 if (r) {
558 dm_put_path_selector(pst); 565 dm_put_path_selector(pst);
@@ -591,9 +598,20 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
591 } 598 }
592 599
593 if (m->hw_handler_name) { 600 if (m->hw_handler_name) {
594 r = scsi_dh_attach(bdev_get_queue(p->path.dev->bdev), 601 struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
595 m->hw_handler_name); 602
603 r = scsi_dh_attach(q, m->hw_handler_name);
604 if (r == -EBUSY) {
605 /*
606 * Already attached to different hw_handler,
607 * try to reattach with correct one.
608 */
609 scsi_dh_detach(q);
610 r = scsi_dh_attach(q, m->hw_handler_name);
611 }
612
596 if (r < 0) { 613 if (r < 0) {
614 ti->error = "error attaching hardware handler";
597 dm_put_device(ti, p->path.dev); 615 dm_put_device(ti, p->path.dev);
598 goto bad; 616 goto bad;
599 } 617 }
@@ -699,6 +717,11 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
699 if (!hw_argc) 717 if (!hw_argc)
700 return 0; 718 return 0;
701 719
720 if (hw_argc > as->argc) {
721 ti->error = "not enough arguments for hardware handler";
722 return -EINVAL;
723 }
724
702 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); 725 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
703 request_module("scsi_dh_%s", m->hw_handler_name); 726 request_module("scsi_dh_%s", m->hw_handler_name);
704 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { 727 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
@@ -823,6 +846,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
823 goto bad; 846 goto bad;
824 } 847 }
825 848
849 ti->num_flush_requests = 1;
850
826 return 0; 851 return 0;
827 852
828 bad: 853 bad:
@@ -836,25 +861,29 @@ static void multipath_dtr(struct dm_target *ti)
836 861
837 flush_workqueue(kmpath_handlerd); 862 flush_workqueue(kmpath_handlerd);
838 flush_workqueue(kmultipathd); 863 flush_workqueue(kmultipathd);
864 flush_scheduled_work();
839 free_multipath(m); 865 free_multipath(m);
840} 866}
841 867
842/* 868/*
843 * Map bios, recording original fields for later in case we have to resubmit 869 * Map cloned requests
844 */ 870 */
845static int multipath_map(struct dm_target *ti, struct bio *bio, 871static int multipath_map(struct dm_target *ti, struct request *clone,
846 union map_info *map_context) 872 union map_info *map_context)
847{ 873{
848 int r; 874 int r;
849 struct dm_mpath_io *mpio; 875 struct dm_mpath_io *mpio;
850 struct multipath *m = (struct multipath *) ti->private; 876 struct multipath *m = (struct multipath *) ti->private;
851 877
852 mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); 878 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
853 dm_bio_record(&mpio->details, bio); 879 if (!mpio)
880 /* ENOMEM, requeue */
881 return DM_MAPIO_REQUEUE;
882 memset(mpio, 0, sizeof(*mpio));
854 883
855 map_context->ptr = mpio; 884 map_context->ptr = mpio;
856 bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); 885 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
857 r = map_io(m, bio, mpio, 0); 886 r = map_io(m, clone, mpio, 0);
858 if (r < 0 || r == DM_MAPIO_REQUEUE) 887 if (r < 0 || r == DM_MAPIO_REQUEUE)
859 mempool_free(mpio, m->mpio_pool); 888 mempool_free(mpio, m->mpio_pool);
860 889
@@ -924,9 +953,13 @@ static int reinstate_path(struct pgpath *pgpath)
924 953
925 pgpath->is_active = 1; 954 pgpath->is_active = 1;
926 955
927 m->current_pgpath = NULL; 956 if (!m->nr_valid_paths++ && m->queue_size) {
928 if (!m->nr_valid_paths++ && m->queue_size) 957 m->current_pgpath = NULL;
929 queue_work(kmultipathd, &m->process_queued_ios); 958 queue_work(kmultipathd, &m->process_queued_ios);
959 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
960 if (queue_work(kmpath_handlerd, &pgpath->activate_path))
961 m->pg_init_in_progress++;
962 }
930 963
931 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 964 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
932 pgpath->path.dev->name, m->nr_valid_paths); 965 pgpath->path.dev->name, m->nr_valid_paths);
@@ -1102,87 +1135,70 @@ static void pg_init_done(struct dm_path *path, int errors)
1102 1135
1103 spin_lock_irqsave(&m->lock, flags); 1136 spin_lock_irqsave(&m->lock, flags);
1104 if (errors) { 1137 if (errors) {
1105 DMERR("Could not failover device. Error %d.", errors); 1138 if (pgpath == m->current_pgpath) {
1106 m->current_pgpath = NULL; 1139 DMERR("Could not failover device. Error %d.", errors);
1107 m->current_pg = NULL; 1140 m->current_pgpath = NULL;
1141 m->current_pg = NULL;
1142 }
1108 } else if (!m->pg_init_required) { 1143 } else if (!m->pg_init_required) {
1109 m->queue_io = 0; 1144 m->queue_io = 0;
1110 pg->bypassed = 0; 1145 pg->bypassed = 0;
1111 } 1146 }
1112 1147
1113 m->pg_init_in_progress = 0; 1148 m->pg_init_in_progress--;
1114 queue_work(kmultipathd, &m->process_queued_ios); 1149 if (!m->pg_init_in_progress)
1150 queue_work(kmultipathd, &m->process_queued_ios);
1115 spin_unlock_irqrestore(&m->lock, flags); 1151 spin_unlock_irqrestore(&m->lock, flags);
1116} 1152}
1117 1153
1118static void activate_path(struct work_struct *work) 1154static void activate_path(struct work_struct *work)
1119{ 1155{
1120 int ret; 1156 int ret;
1121 struct multipath *m = 1157 struct pgpath *pgpath =
1122 container_of(work, struct multipath, activate_path); 1158 container_of(work, struct pgpath, activate_path);
1123 struct dm_path *path;
1124 unsigned long flags;
1125 1159
1126 spin_lock_irqsave(&m->lock, flags); 1160 ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
1127 path = &m->pgpath_to_activate->path; 1161 pg_init_done(&pgpath->path, ret);
1128 m->pgpath_to_activate = NULL;
1129 spin_unlock_irqrestore(&m->lock, flags);
1130 if (!path)
1131 return;
1132 ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
1133 pg_init_done(path, ret);
1134} 1162}
1135 1163
1136/* 1164/*
1137 * end_io handling 1165 * end_io handling
1138 */ 1166 */
1139static int do_end_io(struct multipath *m, struct bio *bio, 1167static int do_end_io(struct multipath *m, struct request *clone,
1140 int error, struct dm_mpath_io *mpio) 1168 int error, struct dm_mpath_io *mpio)
1141{ 1169{
1170 /*
1171 * We don't queue any clone request inside the multipath target
1172 * during end I/O handling, since those clone requests don't have
1173 * bio clones. If we queue them inside the multipath target,
1174 * we need to make bio clones, that requires memory allocation.
1175 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1176 * don't have bio clones.)
1177 * Instead of queueing the clone request here, we queue the original
1178 * request into dm core, which will remake a clone request and
1179 * clone bios for it and resubmit it later.
1180 */
1181 int r = DM_ENDIO_REQUEUE;
1142 unsigned long flags; 1182 unsigned long flags;
1143 1183
1144 if (!error) 1184 if (!error && !clone->errors)
1145 return 0; /* I/O complete */ 1185 return 0; /* I/O complete */
1146 1186
1147 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
1148 return error;
1149
1150 if (error == -EOPNOTSUPP) 1187 if (error == -EOPNOTSUPP)
1151 return error; 1188 return error;
1152 1189
1153 spin_lock_irqsave(&m->lock, flags);
1154 if (!m->nr_valid_paths) {
1155 if (__must_push_back(m)) {
1156 spin_unlock_irqrestore(&m->lock, flags);
1157 return DM_ENDIO_REQUEUE;
1158 } else if (!m->queue_if_no_path) {
1159 spin_unlock_irqrestore(&m->lock, flags);
1160 return -EIO;
1161 } else {
1162 spin_unlock_irqrestore(&m->lock, flags);
1163 goto requeue;
1164 }
1165 }
1166 spin_unlock_irqrestore(&m->lock, flags);
1167
1168 if (mpio->pgpath) 1190 if (mpio->pgpath)
1169 fail_path(mpio->pgpath); 1191 fail_path(mpio->pgpath);
1170 1192
1171 requeue:
1172 dm_bio_restore(&mpio->details, bio);
1173
1174 /* queue for the daemon to resubmit or fail */
1175 spin_lock_irqsave(&m->lock, flags); 1193 spin_lock_irqsave(&m->lock, flags);
1176 bio_list_add(&m->queued_ios, bio); 1194 if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
1177 m->queue_size++; 1195 r = -EIO;
1178 if (!m->queue_io)
1179 queue_work(kmultipathd, &m->process_queued_ios);
1180 spin_unlock_irqrestore(&m->lock, flags); 1196 spin_unlock_irqrestore(&m->lock, flags);
1181 1197
1182 return DM_ENDIO_INCOMPLETE; /* io not complete */ 1198 return r;
1183} 1199}
1184 1200
1185static int multipath_end_io(struct dm_target *ti, struct bio *bio, 1201static int multipath_end_io(struct dm_target *ti, struct request *clone,
1186 int error, union map_info *map_context) 1202 int error, union map_info *map_context)
1187{ 1203{
1188 struct multipath *m = ti->private; 1204 struct multipath *m = ti->private;
@@ -1191,14 +1207,13 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio,
1191 struct path_selector *ps; 1207 struct path_selector *ps;
1192 int r; 1208 int r;
1193 1209
1194 r = do_end_io(m, bio, error, mpio); 1210 r = do_end_io(m, clone, error, mpio);
1195 if (pgpath) { 1211 if (pgpath) {
1196 ps = &pgpath->pg->ps; 1212 ps = &pgpath->pg->ps;
1197 if (ps->type->end_io) 1213 if (ps->type->end_io)
1198 ps->type->end_io(ps, &pgpath->path); 1214 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1199 } 1215 }
1200 if (r != DM_ENDIO_INCOMPLETE) 1216 mempool_free(mpio, m->mpio_pool);
1201 mempool_free(mpio, m->mpio_pool);
1202 1217
1203 return r; 1218 return r;
1204} 1219}
@@ -1411,7 +1426,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1411 spin_lock_irqsave(&m->lock, flags); 1426 spin_lock_irqsave(&m->lock, flags);
1412 1427
1413 if (!m->current_pgpath) 1428 if (!m->current_pgpath)
1414 __choose_pgpath(m); 1429 __choose_pgpath(m, 0);
1415 1430
1416 if (m->current_pgpath) { 1431 if (m->current_pgpath) {
1417 bdev = m->current_pgpath->path.dev->bdev; 1432 bdev = m->current_pgpath->path.dev->bdev;
@@ -1428,22 +1443,113 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1428 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); 1443 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
1429} 1444}
1430 1445
1446static int multipath_iterate_devices(struct dm_target *ti,
1447 iterate_devices_callout_fn fn, void *data)
1448{
1449 struct multipath *m = ti->private;
1450 struct priority_group *pg;
1451 struct pgpath *p;
1452 int ret = 0;
1453
1454 list_for_each_entry(pg, &m->priority_groups, list) {
1455 list_for_each_entry(p, &pg->pgpaths, list) {
1456 ret = fn(ti, p->path.dev, ti->begin, data);
1457 if (ret)
1458 goto out;
1459 }
1460 }
1461
1462out:
1463 return ret;
1464}
1465
1466static int __pgpath_busy(struct pgpath *pgpath)
1467{
1468 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1469
1470 return dm_underlying_device_busy(q);
1471}
1472
1473/*
1474 * We return "busy", only when we can map I/Os but underlying devices
1475 * are busy (so even if we map I/Os now, the I/Os will wait on
1476 * the underlying queue).
1477 * In other words, if we want to kill I/Os or queue them inside us
1478 * due to map unavailability, we don't return "busy". Otherwise,
1479 * dm core won't give us the I/Os and we can't do what we want.
1480 */
1481static int multipath_busy(struct dm_target *ti)
1482{
1483 int busy = 0, has_active = 0;
1484 struct multipath *m = ti->private;
1485 struct priority_group *pg;
1486 struct pgpath *pgpath;
1487 unsigned long flags;
1488
1489 spin_lock_irqsave(&m->lock, flags);
1490
1491 /* Guess which priority_group will be used at next mapping time */
1492 if (unlikely(!m->current_pgpath && m->next_pg))
1493 pg = m->next_pg;
1494 else if (likely(m->current_pg))
1495 pg = m->current_pg;
1496 else
1497 /*
1498 * We don't know which pg will be used at next mapping time.
1499 * We don't call __choose_pgpath() here to avoid to trigger
1500 * pg_init just by busy checking.
1501 * So we don't know whether underlying devices we will be using
1502 * at next mapping time are busy or not. Just try mapping.
1503 */
1504 goto out;
1505
1506 /*
1507 * If there is one non-busy active path at least, the path selector
1508 * will be able to select it. So we consider such a pg as not busy.
1509 */
1510 busy = 1;
1511 list_for_each_entry(pgpath, &pg->pgpaths, list)
1512 if (pgpath->is_active) {
1513 has_active = 1;
1514
1515 if (!__pgpath_busy(pgpath)) {
1516 busy = 0;
1517 break;
1518 }
1519 }
1520
1521 if (!has_active)
1522 /*
1523 * No active path in this pg, so this pg won't be used and
1524 * the current_pg will be changed at next mapping time.
1525 * We need to try mapping to determine it.
1526 */
1527 busy = 0;
1528
1529out:
1530 spin_unlock_irqrestore(&m->lock, flags);
1531
1532 return busy;
1533}
1534
1431/*----------------------------------------------------------------- 1535/*-----------------------------------------------------------------
1432 * Module setup 1536 * Module setup
1433 *---------------------------------------------------------------*/ 1537 *---------------------------------------------------------------*/
1434static struct target_type multipath_target = { 1538static struct target_type multipath_target = {
1435 .name = "multipath", 1539 .name = "multipath",
1436 .version = {1, 0, 5}, 1540 .version = {1, 1, 0},
1437 .module = THIS_MODULE, 1541 .module = THIS_MODULE,
1438 .ctr = multipath_ctr, 1542 .ctr = multipath_ctr,
1439 .dtr = multipath_dtr, 1543 .dtr = multipath_dtr,
1440 .map = multipath_map, 1544 .map_rq = multipath_map,
1441 .end_io = multipath_end_io, 1545 .rq_end_io = multipath_end_io,
1442 .presuspend = multipath_presuspend, 1546 .presuspend = multipath_presuspend,
1443 .resume = multipath_resume, 1547 .resume = multipath_resume,
1444 .status = multipath_status, 1548 .status = multipath_status,
1445 .message = multipath_message, 1549 .message = multipath_message,
1446 .ioctl = multipath_ioctl, 1550 .ioctl = multipath_ioctl,
1551 .iterate_devices = multipath_iterate_devices,
1552 .busy = multipath_busy,
1447}; 1553};
1448 1554
1449static int __init dm_multipath_init(void) 1555static int __init dm_multipath_init(void)