aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/dm-mpath.c193
1 files changed, 128 insertions, 65 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index f8aeaaa54afe..c70604a20897 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -8,7 +8,6 @@
8#include <linux/device-mapper.h> 8#include <linux/device-mapper.h>
9 9
10#include "dm-path-selector.h" 10#include "dm-path-selector.h"
11#include "dm-bio-record.h"
12#include "dm-uevent.h" 11#include "dm-uevent.h"
13 12
14#include <linux/ctype.h> 13#include <linux/ctype.h>
@@ -83,7 +82,7 @@ struct multipath {
83 unsigned pg_init_count; /* Number of times pg_init called */ 82 unsigned pg_init_count; /* Number of times pg_init called */
84 83
85 struct work_struct process_queued_ios; 84 struct work_struct process_queued_ios;
86 struct bio_list queued_ios; 85 struct list_head queued_ios;
87 unsigned queue_size; 86 unsigned queue_size;
88 87
89 struct work_struct trigger_event; 88 struct work_struct trigger_event;
@@ -100,7 +99,6 @@ struct multipath {
100 */ 99 */
101struct dm_mpath_io { 100struct dm_mpath_io {
102 struct pgpath *pgpath; 101 struct pgpath *pgpath;
103 struct dm_bio_details details;
104 size_t nr_bytes; 102 size_t nr_bytes;
105}; 103};
106 104
@@ -194,6 +192,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
194 m = kzalloc(sizeof(*m), GFP_KERNEL); 192 m = kzalloc(sizeof(*m), GFP_KERNEL);
195 if (m) { 193 if (m) {
196 INIT_LIST_HEAD(&m->priority_groups); 194 INIT_LIST_HEAD(&m->priority_groups);
195 INIT_LIST_HEAD(&m->queued_ios);
197 spin_lock_init(&m->lock); 196 spin_lock_init(&m->lock);
198 m->queue_io = 1; 197 m->queue_io = 1;
199 INIT_WORK(&m->process_queued_ios, process_queued_ios); 198 INIT_WORK(&m->process_queued_ios, process_queued_ios);
@@ -318,13 +317,14 @@ static int __must_push_back(struct multipath *m)
318 dm_noflush_suspending(m->ti)); 317 dm_noflush_suspending(m->ti));
319} 318}
320 319
321static int map_io(struct multipath *m, struct bio *bio, 320static int map_io(struct multipath *m, struct request *clone,
322 struct dm_mpath_io *mpio, unsigned was_queued) 321 struct dm_mpath_io *mpio, unsigned was_queued)
323{ 322{
324 int r = DM_MAPIO_REMAPPED; 323 int r = DM_MAPIO_REMAPPED;
325 size_t nr_bytes = bio->bi_size; 324 size_t nr_bytes = blk_rq_bytes(clone);
326 unsigned long flags; 325 unsigned long flags;
327 struct pgpath *pgpath; 326 struct pgpath *pgpath;
327 struct block_device *bdev;
328 328
329 spin_lock_irqsave(&m->lock, flags); 329 spin_lock_irqsave(&m->lock, flags);
330 330
@@ -341,16 +341,18 @@ static int map_io(struct multipath *m, struct bio *bio,
341 if ((pgpath && m->queue_io) || 341 if ((pgpath && m->queue_io) ||
342 (!pgpath && m->queue_if_no_path)) { 342 (!pgpath && m->queue_if_no_path)) {
343 /* Queue for the daemon to resubmit */ 343 /* Queue for the daemon to resubmit */
344 bio_list_add(&m->queued_ios, bio); 344 list_add_tail(&clone->queuelist, &m->queued_ios);
345 m->queue_size++; 345 m->queue_size++;
346 if ((m->pg_init_required && !m->pg_init_in_progress) || 346 if ((m->pg_init_required && !m->pg_init_in_progress) ||
347 !m->queue_io) 347 !m->queue_io)
348 queue_work(kmultipathd, &m->process_queued_ios); 348 queue_work(kmultipathd, &m->process_queued_ios);
349 pgpath = NULL; 349 pgpath = NULL;
350 r = DM_MAPIO_SUBMITTED; 350 r = DM_MAPIO_SUBMITTED;
351 } else if (pgpath) 351 } else if (pgpath) {
352 bio->bi_bdev = pgpath->path.dev->bdev; 352 bdev = pgpath->path.dev->bdev;
353 else if (__must_push_back(m)) 353 clone->q = bdev_get_queue(bdev);
354 clone->rq_disk = bdev->bd_disk;
355 } else if (__must_push_back(m))
354 r = DM_MAPIO_REQUEUE; 356 r = DM_MAPIO_REQUEUE;
355 else 357 else
356 r = -EIO; /* Failed */ 358 r = -EIO; /* Failed */
@@ -398,30 +400,31 @@ static void dispatch_queued_ios(struct multipath *m)
398{ 400{
399 int r; 401 int r;
400 unsigned long flags; 402 unsigned long flags;
401 struct bio *bio = NULL, *next;
402 struct dm_mpath_io *mpio; 403 struct dm_mpath_io *mpio;
403 union map_info *info; 404 union map_info *info;
405 struct request *clone, *n;
406 LIST_HEAD(cl);
404 407
405 spin_lock_irqsave(&m->lock, flags); 408 spin_lock_irqsave(&m->lock, flags);
406 bio = bio_list_get(&m->queued_ios); 409 list_splice_init(&m->queued_ios, &cl);
407 spin_unlock_irqrestore(&m->lock, flags); 410 spin_unlock_irqrestore(&m->lock, flags);
408 411
409 while (bio) { 412 list_for_each_entry_safe(clone, n, &cl, queuelist) {
410 next = bio->bi_next; 413 list_del_init(&clone->queuelist);
411 bio->bi_next = NULL;
412 414
413 info = dm_get_mapinfo(bio); 415 info = dm_get_rq_mapinfo(clone);
414 mpio = info->ptr; 416 mpio = info->ptr;
415 417
416 r = map_io(m, bio, mpio, 1); 418 r = map_io(m, clone, mpio, 1);
417 if (r < 0) 419 if (r < 0) {
418 bio_endio(bio, r); 420 mempool_free(mpio, m->mpio_pool);
419 else if (r == DM_MAPIO_REMAPPED) 421 dm_kill_unmapped_request(clone, r);
420 generic_make_request(bio); 422 } else if (r == DM_MAPIO_REMAPPED)
421 else if (r == DM_MAPIO_REQUEUE) 423 dm_dispatch_request(clone);
422 bio_endio(bio, -EIO); 424 else if (r == DM_MAPIO_REQUEUE) {
423 425 mempool_free(mpio, m->mpio_pool);
424 bio = next; 426 dm_requeue_unmapped_request(clone);
427 }
425 } 428 }
426} 429}
427 430
@@ -863,21 +866,24 @@ static void multipath_dtr(struct dm_target *ti)
863} 866}
864 867
865/* 868/*
866 * Map bios, recording original fields for later in case we have to resubmit 869 * Map cloned requests
867 */ 870 */
868static int multipath_map(struct dm_target *ti, struct bio *bio, 871static int multipath_map(struct dm_target *ti, struct request *clone,
869 union map_info *map_context) 872 union map_info *map_context)
870{ 873{
871 int r; 874 int r;
872 struct dm_mpath_io *mpio; 875 struct dm_mpath_io *mpio;
873 struct multipath *m = (struct multipath *) ti->private; 876 struct multipath *m = (struct multipath *) ti->private;
874 877
875 mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); 878 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
876 dm_bio_record(&mpio->details, bio); 879 if (!mpio)
880 /* ENOMEM, requeue */
881 return DM_MAPIO_REQUEUE;
882 memset(mpio, 0, sizeof(*mpio));
877 883
878 map_context->ptr = mpio; 884 map_context->ptr = mpio;
879 bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); 885 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
880 r = map_io(m, bio, mpio, 0); 886 r = map_io(m, clone, mpio, 0);
881 if (r < 0 || r == DM_MAPIO_REQUEUE) 887 if (r < 0 || r == DM_MAPIO_REQUEUE)
882 mempool_free(mpio, m->mpio_pool); 888 mempool_free(mpio, m->mpio_pool);
883 889
@@ -1158,53 +1164,41 @@ static void activate_path(struct work_struct *work)
1158/* 1164/*
1159 * end_io handling 1165 * end_io handling
1160 */ 1166 */
1161static int do_end_io(struct multipath *m, struct bio *bio, 1167static int do_end_io(struct multipath *m, struct request *clone,
1162 int error, struct dm_mpath_io *mpio) 1168 int error, struct dm_mpath_io *mpio)
1163{ 1169{
1170 /*
1171 * We don't queue any clone request inside the multipath target
1172 * during end I/O handling, since those clone requests don't have
1173 * bio clones. If we queue them inside the multipath target,
1174 * we need to make bio clones, that requires memory allocation.
1175 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1176 * don't have bio clones.)
1177 * Instead of queueing the clone request here, we queue the original
1178 * request into dm core, which will remake a clone request and
1179 * clone bios for it and resubmit it later.
1180 */
1181 int r = DM_ENDIO_REQUEUE;
1164 unsigned long flags; 1182 unsigned long flags;
1165 1183
1166 if (!error) 1184 if (!error && !clone->errors)
1167 return 0; /* I/O complete */ 1185 return 0; /* I/O complete */
1168 1186
1169 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
1170 return error;
1171
1172 if (error == -EOPNOTSUPP) 1187 if (error == -EOPNOTSUPP)
1173 return error; 1188 return error;
1174 1189
1175 spin_lock_irqsave(&m->lock, flags);
1176 if (!m->nr_valid_paths) {
1177 if (__must_push_back(m)) {
1178 spin_unlock_irqrestore(&m->lock, flags);
1179 return DM_ENDIO_REQUEUE;
1180 } else if (!m->queue_if_no_path) {
1181 spin_unlock_irqrestore(&m->lock, flags);
1182 return -EIO;
1183 } else {
1184 spin_unlock_irqrestore(&m->lock, flags);
1185 goto requeue;
1186 }
1187 }
1188 spin_unlock_irqrestore(&m->lock, flags);
1189
1190 if (mpio->pgpath) 1190 if (mpio->pgpath)
1191 fail_path(mpio->pgpath); 1191 fail_path(mpio->pgpath);
1192 1192
1193 requeue:
1194 dm_bio_restore(&mpio->details, bio);
1195
1196 /* queue for the daemon to resubmit or fail */
1197 spin_lock_irqsave(&m->lock, flags); 1193 spin_lock_irqsave(&m->lock, flags);
1198 bio_list_add(&m->queued_ios, bio); 1194 if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
1199 m->queue_size++; 1195 r = -EIO;
1200 if (!m->queue_io)
1201 queue_work(kmultipathd, &m->process_queued_ios);
1202 spin_unlock_irqrestore(&m->lock, flags); 1196 spin_unlock_irqrestore(&m->lock, flags);
1203 1197
1204 return DM_ENDIO_INCOMPLETE; /* io not complete */ 1198 return r;
1205} 1199}
1206 1200
1207static int multipath_end_io(struct dm_target *ti, struct bio *bio, 1201static int multipath_end_io(struct dm_target *ti, struct request *clone,
1208 int error, union map_info *map_context) 1202 int error, union map_info *map_context)
1209{ 1203{
1210 struct multipath *m = ti->private; 1204 struct multipath *m = ti->private;
@@ -1213,14 +1207,13 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio,
1213 struct path_selector *ps; 1207 struct path_selector *ps;
1214 int r; 1208 int r;
1215 1209
1216 r = do_end_io(m, bio, error, mpio); 1210 r = do_end_io(m, clone, error, mpio);
1217 if (pgpath) { 1211 if (pgpath) {
1218 ps = &pgpath->pg->ps; 1212 ps = &pgpath->pg->ps;
1219 if (ps->type->end_io) 1213 if (ps->type->end_io)
1220 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1214 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1221 } 1215 }
1222 if (r != DM_ENDIO_INCOMPLETE) 1216 mempool_free(mpio, m->mpio_pool);
1223 mempool_free(mpio, m->mpio_pool);
1224 1217
1225 return r; 1218 return r;
1226} 1219}
@@ -1470,6 +1463,75 @@ out:
1470 return ret; 1463 return ret;
1471} 1464}
1472 1465
1466static int __pgpath_busy(struct pgpath *pgpath)
1467{
1468 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1469
1470 return dm_underlying_device_busy(q);
1471}
1472
1473/*
1474 * We return "busy", only when we can map I/Os but underlying devices
1475 * are busy (so even if we map I/Os now, the I/Os will wait on
1476 * the underlying queue).
1477 * In other words, if we want to kill I/Os or queue them inside us
1478 * due to map unavailability, we don't return "busy". Otherwise,
1479 * dm core won't give us the I/Os and we can't do what we want.
1480 */
1481static int multipath_busy(struct dm_target *ti)
1482{
1483 int busy = 0, has_active = 0;
1484 struct multipath *m = ti->private;
1485 struct priority_group *pg;
1486 struct pgpath *pgpath;
1487 unsigned long flags;
1488
1489 spin_lock_irqsave(&m->lock, flags);
1490
1491 /* Guess which priority_group will be used at next mapping time */
1492 if (unlikely(!m->current_pgpath && m->next_pg))
1493 pg = m->next_pg;
1494 else if (likely(m->current_pg))
1495 pg = m->current_pg;
1496 else
1497 /*
1498 * We don't know which pg will be used at next mapping time.
1499 * We don't call __choose_pgpath() here to avoid to trigger
1500 * pg_init just by busy checking.
1501 * So we don't know whether underlying devices we will be using
1502 * at next mapping time are busy or not. Just try mapping.
1503 */
1504 goto out;
1505
1506 /*
1507 * If there is one non-busy active path at least, the path selector
1508 * will be able to select it. So we consider such a pg as not busy.
1509 */
1510 busy = 1;
1511 list_for_each_entry(pgpath, &pg->pgpaths, list)
1512 if (pgpath->is_active) {
1513 has_active = 1;
1514
1515 if (!__pgpath_busy(pgpath)) {
1516 busy = 0;
1517 break;
1518 }
1519 }
1520
1521 if (!has_active)
1522 /*
1523 * No active path in this pg, so this pg won't be used and
1524 * the current_pg will be changed at next mapping time.
1525 * We need to try mapping to determine it.
1526 */
1527 busy = 0;
1528
1529out:
1530 spin_unlock_irqrestore(&m->lock, flags);
1531
1532 return busy;
1533}
1534
1473/*----------------------------------------------------------------- 1535/*-----------------------------------------------------------------
1474 * Module setup 1536 * Module setup
1475 *---------------------------------------------------------------*/ 1537 *---------------------------------------------------------------*/
@@ -1479,14 +1541,15 @@ static struct target_type multipath_target = {
1479 .module = THIS_MODULE, 1541 .module = THIS_MODULE,
1480 .ctr = multipath_ctr, 1542 .ctr = multipath_ctr,
1481 .dtr = multipath_dtr, 1543 .dtr = multipath_dtr,
1482 .map = multipath_map, 1544 .map_rq = multipath_map,
1483 .end_io = multipath_end_io, 1545 .rq_end_io = multipath_end_io,
1484 .presuspend = multipath_presuspend, 1546 .presuspend = multipath_presuspend,
1485 .resume = multipath_resume, 1547 .resume = multipath_resume,
1486 .status = multipath_status, 1548 .status = multipath_status,
1487 .message = multipath_message, 1549 .message = multipath_message,
1488 .ioctl = multipath_ioctl, 1550 .ioctl = multipath_ioctl,
1489 .iterate_devices = multipath_iterate_devices, 1551 .iterate_devices = multipath_iterate_devices,
1552 .busy = multipath_busy,
1490}; 1553};
1491 1554
1492static int __init dm_multipath_init(void) 1555static int __init dm_multipath_init(void)