diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-mpath.c | 193 |
1 files changed, 128 insertions, 65 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f8aeaaa54afe..c70604a20897 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/device-mapper.h> | 8 | #include <linux/device-mapper.h> |
9 | 9 | ||
10 | #include "dm-path-selector.h" | 10 | #include "dm-path-selector.h" |
11 | #include "dm-bio-record.h" | ||
12 | #include "dm-uevent.h" | 11 | #include "dm-uevent.h" |
13 | 12 | ||
14 | #include <linux/ctype.h> | 13 | #include <linux/ctype.h> |
@@ -83,7 +82,7 @@ struct multipath { | |||
83 | unsigned pg_init_count; /* Number of times pg_init called */ | 82 | unsigned pg_init_count; /* Number of times pg_init called */ |
84 | 83 | ||
85 | struct work_struct process_queued_ios; | 84 | struct work_struct process_queued_ios; |
86 | struct bio_list queued_ios; | 85 | struct list_head queued_ios; |
87 | unsigned queue_size; | 86 | unsigned queue_size; |
88 | 87 | ||
89 | struct work_struct trigger_event; | 88 | struct work_struct trigger_event; |
@@ -100,7 +99,6 @@ struct multipath { | |||
100 | */ | 99 | */ |
101 | struct dm_mpath_io { | 100 | struct dm_mpath_io { |
102 | struct pgpath *pgpath; | 101 | struct pgpath *pgpath; |
103 | struct dm_bio_details details; | ||
104 | size_t nr_bytes; | 102 | size_t nr_bytes; |
105 | }; | 103 | }; |
106 | 104 | ||
@@ -194,6 +192,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) | |||
194 | m = kzalloc(sizeof(*m), GFP_KERNEL); | 192 | m = kzalloc(sizeof(*m), GFP_KERNEL); |
195 | if (m) { | 193 | if (m) { |
196 | INIT_LIST_HEAD(&m->priority_groups); | 194 | INIT_LIST_HEAD(&m->priority_groups); |
195 | INIT_LIST_HEAD(&m->queued_ios); | ||
197 | spin_lock_init(&m->lock); | 196 | spin_lock_init(&m->lock); |
198 | m->queue_io = 1; | 197 | m->queue_io = 1; |
199 | INIT_WORK(&m->process_queued_ios, process_queued_ios); | 198 | INIT_WORK(&m->process_queued_ios, process_queued_ios); |
@@ -318,13 +317,14 @@ static int __must_push_back(struct multipath *m) | |||
318 | dm_noflush_suspending(m->ti)); | 317 | dm_noflush_suspending(m->ti)); |
319 | } | 318 | } |
320 | 319 | ||
321 | static int map_io(struct multipath *m, struct bio *bio, | 320 | static int map_io(struct multipath *m, struct request *clone, |
322 | struct dm_mpath_io *mpio, unsigned was_queued) | 321 | struct dm_mpath_io *mpio, unsigned was_queued) |
323 | { | 322 | { |
324 | int r = DM_MAPIO_REMAPPED; | 323 | int r = DM_MAPIO_REMAPPED; |
325 | size_t nr_bytes = bio->bi_size; | 324 | size_t nr_bytes = blk_rq_bytes(clone); |
326 | unsigned long flags; | 325 | unsigned long flags; |
327 | struct pgpath *pgpath; | 326 | struct pgpath *pgpath; |
327 | struct block_device *bdev; | ||
328 | 328 | ||
329 | spin_lock_irqsave(&m->lock, flags); | 329 | spin_lock_irqsave(&m->lock, flags); |
330 | 330 | ||
@@ -341,16 +341,18 @@ static int map_io(struct multipath *m, struct bio *bio, | |||
341 | if ((pgpath && m->queue_io) || | 341 | if ((pgpath && m->queue_io) || |
342 | (!pgpath && m->queue_if_no_path)) { | 342 | (!pgpath && m->queue_if_no_path)) { |
343 | /* Queue for the daemon to resubmit */ | 343 | /* Queue for the daemon to resubmit */ |
344 | bio_list_add(&m->queued_ios, bio); | 344 | list_add_tail(&clone->queuelist, &m->queued_ios); |
345 | m->queue_size++; | 345 | m->queue_size++; |
346 | if ((m->pg_init_required && !m->pg_init_in_progress) || | 346 | if ((m->pg_init_required && !m->pg_init_in_progress) || |
347 | !m->queue_io) | 347 | !m->queue_io) |
348 | queue_work(kmultipathd, &m->process_queued_ios); | 348 | queue_work(kmultipathd, &m->process_queued_ios); |
349 | pgpath = NULL; | 349 | pgpath = NULL; |
350 | r = DM_MAPIO_SUBMITTED; | 350 | r = DM_MAPIO_SUBMITTED; |
351 | } else if (pgpath) | 351 | } else if (pgpath) { |
352 | bio->bi_bdev = pgpath->path.dev->bdev; | 352 | bdev = pgpath->path.dev->bdev; |
353 | else if (__must_push_back(m)) | 353 | clone->q = bdev_get_queue(bdev); |
354 | clone->rq_disk = bdev->bd_disk; | ||
355 | } else if (__must_push_back(m)) | ||
354 | r = DM_MAPIO_REQUEUE; | 356 | r = DM_MAPIO_REQUEUE; |
355 | else | 357 | else |
356 | r = -EIO; /* Failed */ | 358 | r = -EIO; /* Failed */ |
@@ -398,30 +400,31 @@ static void dispatch_queued_ios(struct multipath *m) | |||
398 | { | 400 | { |
399 | int r; | 401 | int r; |
400 | unsigned long flags; | 402 | unsigned long flags; |
401 | struct bio *bio = NULL, *next; | ||
402 | struct dm_mpath_io *mpio; | 403 | struct dm_mpath_io *mpio; |
403 | union map_info *info; | 404 | union map_info *info; |
405 | struct request *clone, *n; | ||
406 | LIST_HEAD(cl); | ||
404 | 407 | ||
405 | spin_lock_irqsave(&m->lock, flags); | 408 | spin_lock_irqsave(&m->lock, flags); |
406 | bio = bio_list_get(&m->queued_ios); | 409 | list_splice_init(&m->queued_ios, &cl); |
407 | spin_unlock_irqrestore(&m->lock, flags); | 410 | spin_unlock_irqrestore(&m->lock, flags); |
408 | 411 | ||
409 | while (bio) { | 412 | list_for_each_entry_safe(clone, n, &cl, queuelist) { |
410 | next = bio->bi_next; | 413 | list_del_init(&clone->queuelist); |
411 | bio->bi_next = NULL; | ||
412 | 414 | ||
413 | info = dm_get_mapinfo(bio); | 415 | info = dm_get_rq_mapinfo(clone); |
414 | mpio = info->ptr; | 416 | mpio = info->ptr; |
415 | 417 | ||
416 | r = map_io(m, bio, mpio, 1); | 418 | r = map_io(m, clone, mpio, 1); |
417 | if (r < 0) | 419 | if (r < 0) { |
418 | bio_endio(bio, r); | 420 | mempool_free(mpio, m->mpio_pool); |
419 | else if (r == DM_MAPIO_REMAPPED) | 421 | dm_kill_unmapped_request(clone, r); |
420 | generic_make_request(bio); | 422 | } else if (r == DM_MAPIO_REMAPPED) |
421 | else if (r == DM_MAPIO_REQUEUE) | 423 | dm_dispatch_request(clone); |
422 | bio_endio(bio, -EIO); | 424 | else if (r == DM_MAPIO_REQUEUE) { |
423 | 425 | mempool_free(mpio, m->mpio_pool); | |
424 | bio = next; | 426 | dm_requeue_unmapped_request(clone); |
427 | } | ||
425 | } | 428 | } |
426 | } | 429 | } |
427 | 430 | ||
@@ -863,21 +866,24 @@ static void multipath_dtr(struct dm_target *ti) | |||
863 | } | 866 | } |
864 | 867 | ||
865 | /* | 868 | /* |
866 | * Map bios, recording original fields for later in case we have to resubmit | 869 | * Map cloned requests |
867 | */ | 870 | */ |
868 | static int multipath_map(struct dm_target *ti, struct bio *bio, | 871 | static int multipath_map(struct dm_target *ti, struct request *clone, |
869 | union map_info *map_context) | 872 | union map_info *map_context) |
870 | { | 873 | { |
871 | int r; | 874 | int r; |
872 | struct dm_mpath_io *mpio; | 875 | struct dm_mpath_io *mpio; |
873 | struct multipath *m = (struct multipath *) ti->private; | 876 | struct multipath *m = (struct multipath *) ti->private; |
874 | 877 | ||
875 | mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); | 878 | mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); |
876 | dm_bio_record(&mpio->details, bio); | 879 | if (!mpio) |
880 | /* ENOMEM, requeue */ | ||
881 | return DM_MAPIO_REQUEUE; | ||
882 | memset(mpio, 0, sizeof(*mpio)); | ||
877 | 883 | ||
878 | map_context->ptr = mpio; | 884 | map_context->ptr = mpio; |
879 | bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); | 885 | clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; |
880 | r = map_io(m, bio, mpio, 0); | 886 | r = map_io(m, clone, mpio, 0); |
881 | if (r < 0 || r == DM_MAPIO_REQUEUE) | 887 | if (r < 0 || r == DM_MAPIO_REQUEUE) |
882 | mempool_free(mpio, m->mpio_pool); | 888 | mempool_free(mpio, m->mpio_pool); |
883 | 889 | ||
@@ -1158,53 +1164,41 @@ static void activate_path(struct work_struct *work) | |||
1158 | /* | 1164 | /* |
1159 | * end_io handling | 1165 | * end_io handling |
1160 | */ | 1166 | */ |
1161 | static int do_end_io(struct multipath *m, struct bio *bio, | 1167 | static int do_end_io(struct multipath *m, struct request *clone, |
1162 | int error, struct dm_mpath_io *mpio) | 1168 | int error, struct dm_mpath_io *mpio) |
1163 | { | 1169 | { |
1170 | /* | ||
1171 | * We don't queue any clone request inside the multipath target | ||
1172 | * during end I/O handling, since those clone requests don't have | ||
1173 | * bio clones. If we queue them inside the multipath target, | ||
1174 | * we need to make bio clones, that requires memory allocation. | ||
1175 | * (See drivers/md/dm.c:end_clone_bio() about why the clone requests | ||
1176 | * don't have bio clones.) | ||
1177 | * Instead of queueing the clone request here, we queue the original | ||
1178 | * request into dm core, which will remake a clone request and | ||
1179 | * clone bios for it and resubmit it later. | ||
1180 | */ | ||
1181 | int r = DM_ENDIO_REQUEUE; | ||
1164 | unsigned long flags; | 1182 | unsigned long flags; |
1165 | 1183 | ||
1166 | if (!error) | 1184 | if (!error && !clone->errors) |
1167 | return 0; /* I/O complete */ | 1185 | return 0; /* I/O complete */ |
1168 | 1186 | ||
1169 | if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio)) | ||
1170 | return error; | ||
1171 | |||
1172 | if (error == -EOPNOTSUPP) | 1187 | if (error == -EOPNOTSUPP) |
1173 | return error; | 1188 | return error; |
1174 | 1189 | ||
1175 | spin_lock_irqsave(&m->lock, flags); | ||
1176 | if (!m->nr_valid_paths) { | ||
1177 | if (__must_push_back(m)) { | ||
1178 | spin_unlock_irqrestore(&m->lock, flags); | ||
1179 | return DM_ENDIO_REQUEUE; | ||
1180 | } else if (!m->queue_if_no_path) { | ||
1181 | spin_unlock_irqrestore(&m->lock, flags); | ||
1182 | return -EIO; | ||
1183 | } else { | ||
1184 | spin_unlock_irqrestore(&m->lock, flags); | ||
1185 | goto requeue; | ||
1186 | } | ||
1187 | } | ||
1188 | spin_unlock_irqrestore(&m->lock, flags); | ||
1189 | |||
1190 | if (mpio->pgpath) | 1190 | if (mpio->pgpath) |
1191 | fail_path(mpio->pgpath); | 1191 | fail_path(mpio->pgpath); |
1192 | 1192 | ||
1193 | requeue: | ||
1194 | dm_bio_restore(&mpio->details, bio); | ||
1195 | |||
1196 | /* queue for the daemon to resubmit or fail */ | ||
1197 | spin_lock_irqsave(&m->lock, flags); | 1193 | spin_lock_irqsave(&m->lock, flags); |
1198 | bio_list_add(&m->queued_ios, bio); | 1194 | if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m)) |
1199 | m->queue_size++; | 1195 | r = -EIO; |
1200 | if (!m->queue_io) | ||
1201 | queue_work(kmultipathd, &m->process_queued_ios); | ||
1202 | spin_unlock_irqrestore(&m->lock, flags); | 1196 | spin_unlock_irqrestore(&m->lock, flags); |
1203 | 1197 | ||
1204 | return DM_ENDIO_INCOMPLETE; /* io not complete */ | 1198 | return r; |
1205 | } | 1199 | } |
1206 | 1200 | ||
1207 | static int multipath_end_io(struct dm_target *ti, struct bio *bio, | 1201 | static int multipath_end_io(struct dm_target *ti, struct request *clone, |
1208 | int error, union map_info *map_context) | 1202 | int error, union map_info *map_context) |
1209 | { | 1203 | { |
1210 | struct multipath *m = ti->private; | 1204 | struct multipath *m = ti->private; |
@@ -1213,14 +1207,13 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio, | |||
1213 | struct path_selector *ps; | 1207 | struct path_selector *ps; |
1214 | int r; | 1208 | int r; |
1215 | 1209 | ||
1216 | r = do_end_io(m, bio, error, mpio); | 1210 | r = do_end_io(m, clone, error, mpio); |
1217 | if (pgpath) { | 1211 | if (pgpath) { |
1218 | ps = &pgpath->pg->ps; | 1212 | ps = &pgpath->pg->ps; |
1219 | if (ps->type->end_io) | 1213 | if (ps->type->end_io) |
1220 | ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); | 1214 | ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); |
1221 | } | 1215 | } |
1222 | if (r != DM_ENDIO_INCOMPLETE) | 1216 | mempool_free(mpio, m->mpio_pool); |
1223 | mempool_free(mpio, m->mpio_pool); | ||
1224 | 1217 | ||
1225 | return r; | 1218 | return r; |
1226 | } | 1219 | } |
@@ -1470,6 +1463,75 @@ out: | |||
1470 | return ret; | 1463 | return ret; |
1471 | } | 1464 | } |
1472 | 1465 | ||
1466 | static int __pgpath_busy(struct pgpath *pgpath) | ||
1467 | { | ||
1468 | struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); | ||
1469 | |||
1470 | return dm_underlying_device_busy(q); | ||
1471 | } | ||
1472 | |||
1473 | /* | ||
1474 | * We return "busy", only when we can map I/Os but underlying devices | ||
1475 | * are busy (so even if we map I/Os now, the I/Os will wait on | ||
1476 | * the underlying queue). | ||
1477 | * In other words, if we want to kill I/Os or queue them inside us | ||
1478 | * due to map unavailability, we don't return "busy". Otherwise, | ||
1479 | * dm core won't give us the I/Os and we can't do what we want. | ||
1480 | */ | ||
1481 | static int multipath_busy(struct dm_target *ti) | ||
1482 | { | ||
1483 | int busy = 0, has_active = 0; | ||
1484 | struct multipath *m = ti->private; | ||
1485 | struct priority_group *pg; | ||
1486 | struct pgpath *pgpath; | ||
1487 | unsigned long flags; | ||
1488 | |||
1489 | spin_lock_irqsave(&m->lock, flags); | ||
1490 | |||
1491 | /* Guess which priority_group will be used at next mapping time */ | ||
1492 | if (unlikely(!m->current_pgpath && m->next_pg)) | ||
1493 | pg = m->next_pg; | ||
1494 | else if (likely(m->current_pg)) | ||
1495 | pg = m->current_pg; | ||
1496 | else | ||
1497 | /* | ||
1498 | * We don't know which pg will be used at next mapping time. | ||
1499 | * We don't call __choose_pgpath() here to avoid to trigger | ||
1500 | * pg_init just by busy checking. | ||
1501 | * So we don't know whether underlying devices we will be using | ||
1502 | * at next mapping time are busy or not. Just try mapping. | ||
1503 | */ | ||
1504 | goto out; | ||
1505 | |||
1506 | /* | ||
1507 | * If there is one non-busy active path at least, the path selector | ||
1508 | * will be able to select it. So we consider such a pg as not busy. | ||
1509 | */ | ||
1510 | busy = 1; | ||
1511 | list_for_each_entry(pgpath, &pg->pgpaths, list) | ||
1512 | if (pgpath->is_active) { | ||
1513 | has_active = 1; | ||
1514 | |||
1515 | if (!__pgpath_busy(pgpath)) { | ||
1516 | busy = 0; | ||
1517 | break; | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | if (!has_active) | ||
1522 | /* | ||
1523 | * No active path in this pg, so this pg won't be used and | ||
1524 | * the current_pg will be changed at next mapping time. | ||
1525 | * We need to try mapping to determine it. | ||
1526 | */ | ||
1527 | busy = 0; | ||
1528 | |||
1529 | out: | ||
1530 | spin_unlock_irqrestore(&m->lock, flags); | ||
1531 | |||
1532 | return busy; | ||
1533 | } | ||
1534 | |||
1473 | /*----------------------------------------------------------------- | 1535 | /*----------------------------------------------------------------- |
1474 | * Module setup | 1536 | * Module setup |
1475 | *---------------------------------------------------------------*/ | 1537 | *---------------------------------------------------------------*/ |
@@ -1479,14 +1541,15 @@ static struct target_type multipath_target = { | |||
1479 | .module = THIS_MODULE, | 1541 | .module = THIS_MODULE, |
1480 | .ctr = multipath_ctr, | 1542 | .ctr = multipath_ctr, |
1481 | .dtr = multipath_dtr, | 1543 | .dtr = multipath_dtr, |
1482 | .map = multipath_map, | 1544 | .map_rq = multipath_map, |
1483 | .end_io = multipath_end_io, | 1545 | .rq_end_io = multipath_end_io, |
1484 | .presuspend = multipath_presuspend, | 1546 | .presuspend = multipath_presuspend, |
1485 | .resume = multipath_resume, | 1547 | .resume = multipath_resume, |
1486 | .status = multipath_status, | 1548 | .status = multipath_status, |
1487 | .message = multipath_message, | 1549 | .message = multipath_message, |
1488 | .ioctl = multipath_ioctl, | 1550 | .ioctl = multipath_ioctl, |
1489 | .iterate_devices = multipath_iterate_devices, | 1551 | .iterate_devices = multipath_iterate_devices, |
1552 | .busy = multipath_busy, | ||
1490 | }; | 1553 | }; |
1491 | 1554 | ||
1492 | static int __init dm_multipath_init(void) | 1555 | static int __init dm_multipath_init(void) |