diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 238 |
1 files changed, 149 insertions, 89 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f2d24eb3208c..6617ce4af095 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -71,9 +71,22 @@ union map_info *dm_get_mapinfo(struct bio *bio) | |||
71 | #define DMF_DELETING 4 | 71 | #define DMF_DELETING 4 |
72 | #define DMF_NOFLUSH_SUSPENDING 5 | 72 | #define DMF_NOFLUSH_SUSPENDING 5 |
73 | 73 | ||
74 | /* | ||
75 | * Work processed by per-device workqueue. | ||
76 | */ | ||
77 | struct dm_wq_req { | ||
78 | enum { | ||
79 | DM_WQ_FLUSH_ALL, | ||
80 | DM_WQ_FLUSH_DEFERRED, | ||
81 | } type; | ||
82 | struct work_struct work; | ||
83 | struct mapped_device *md; | ||
84 | void *context; | ||
85 | }; | ||
86 | |||
74 | struct mapped_device { | 87 | struct mapped_device { |
75 | struct rw_semaphore io_lock; | 88 | struct rw_semaphore io_lock; |
76 | struct semaphore suspend_lock; | 89 | struct mutex suspend_lock; |
77 | spinlock_t pushback_lock; | 90 | spinlock_t pushback_lock; |
78 | rwlock_t map_lock; | 91 | rwlock_t map_lock; |
79 | atomic_t holders; | 92 | atomic_t holders; |
@@ -96,6 +109,11 @@ struct mapped_device { | |||
96 | struct bio_list pushback; | 109 | struct bio_list pushback; |
97 | 110 | ||
98 | /* | 111 | /* |
112 | * Processing queue (flush/barriers) | ||
113 | */ | ||
114 | struct workqueue_struct *wq; | ||
115 | |||
116 | /* | ||
99 | * The current mapping. | 117 | * The current mapping. |
100 | */ | 118 | */ |
101 | struct dm_table *map; | 119 | struct dm_table *map; |
@@ -181,7 +199,7 @@ static void local_exit(void) | |||
181 | DMINFO("cleaned up"); | 199 | DMINFO("cleaned up"); |
182 | } | 200 | } |
183 | 201 | ||
184 | int (*_inits[])(void) __initdata = { | 202 | static int (*_inits[])(void) __initdata = { |
185 | local_init, | 203 | local_init, |
186 | dm_target_init, | 204 | dm_target_init, |
187 | dm_linear_init, | 205 | dm_linear_init, |
@@ -189,7 +207,7 @@ int (*_inits[])(void) __initdata = { | |||
189 | dm_interface_init, | 207 | dm_interface_init, |
190 | }; | 208 | }; |
191 | 209 | ||
192 | void (*_exits[])(void) = { | 210 | static void (*_exits[])(void) = { |
193 | local_exit, | 211 | local_exit, |
194 | dm_target_exit, | 212 | dm_target_exit, |
195 | dm_linear_exit, | 213 | dm_linear_exit, |
@@ -982,7 +1000,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
982 | } | 1000 | } |
983 | 1001 | ||
984 | if (!try_module_get(THIS_MODULE)) | 1002 | if (!try_module_get(THIS_MODULE)) |
985 | goto bad0; | 1003 | goto bad_module_get; |
986 | 1004 | ||
987 | /* get a minor number for the dev */ | 1005 | /* get a minor number for the dev */ |
988 | if (minor == DM_ANY_MINOR) | 1006 | if (minor == DM_ANY_MINOR) |
@@ -990,11 +1008,11 @@ static struct mapped_device *alloc_dev(int minor) | |||
990 | else | 1008 | else |
991 | r = specific_minor(md, minor); | 1009 | r = specific_minor(md, minor); |
992 | if (r < 0) | 1010 | if (r < 0) |
993 | goto bad1; | 1011 | goto bad_minor; |
994 | 1012 | ||
995 | memset(md, 0, sizeof(*md)); | 1013 | memset(md, 0, sizeof(*md)); |
996 | init_rwsem(&md->io_lock); | 1014 | init_rwsem(&md->io_lock); |
997 | init_MUTEX(&md->suspend_lock); | 1015 | mutex_init(&md->suspend_lock); |
998 | spin_lock_init(&md->pushback_lock); | 1016 | spin_lock_init(&md->pushback_lock); |
999 | rwlock_init(&md->map_lock); | 1017 | rwlock_init(&md->map_lock); |
1000 | atomic_set(&md->holders, 1); | 1018 | atomic_set(&md->holders, 1); |
@@ -1006,7 +1024,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
1006 | 1024 | ||
1007 | md->queue = blk_alloc_queue(GFP_KERNEL); | 1025 | md->queue = blk_alloc_queue(GFP_KERNEL); |
1008 | if (!md->queue) | 1026 | if (!md->queue) |
1009 | goto bad1_free_minor; | 1027 | goto bad_queue; |
1010 | 1028 | ||
1011 | md->queue->queuedata = md; | 1029 | md->queue->queuedata = md; |
1012 | md->queue->backing_dev_info.congested_fn = dm_any_congested; | 1030 | md->queue->backing_dev_info.congested_fn = dm_any_congested; |
@@ -1017,11 +1035,11 @@ static struct mapped_device *alloc_dev(int minor) | |||
1017 | 1035 | ||
1018 | md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); | 1036 | md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); |
1019 | if (!md->io_pool) | 1037 | if (!md->io_pool) |
1020 | goto bad2; | 1038 | goto bad_io_pool; |
1021 | 1039 | ||
1022 | md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); | 1040 | md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); |
1023 | if (!md->tio_pool) | 1041 | if (!md->tio_pool) |
1024 | goto bad3; | 1042 | goto bad_tio_pool; |
1025 | 1043 | ||
1026 | md->bs = bioset_create(16, 16); | 1044 | md->bs = bioset_create(16, 16); |
1027 | if (!md->bs) | 1045 | if (!md->bs) |
@@ -1029,7 +1047,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
1029 | 1047 | ||
1030 | md->disk = alloc_disk(1); | 1048 | md->disk = alloc_disk(1); |
1031 | if (!md->disk) | 1049 | if (!md->disk) |
1032 | goto bad4; | 1050 | goto bad_disk; |
1033 | 1051 | ||
1034 | atomic_set(&md->pending, 0); | 1052 | atomic_set(&md->pending, 0); |
1035 | init_waitqueue_head(&md->wait); | 1053 | init_waitqueue_head(&md->wait); |
@@ -1044,6 +1062,10 @@ static struct mapped_device *alloc_dev(int minor) | |||
1044 | add_disk(md->disk); | 1062 | add_disk(md->disk); |
1045 | format_dev_t(md->name, MKDEV(_major, minor)); | 1063 | format_dev_t(md->name, MKDEV(_major, minor)); |
1046 | 1064 | ||
1065 | md->wq = create_singlethread_workqueue("kdmflush"); | ||
1066 | if (!md->wq) | ||
1067 | goto bad_thread; | ||
1068 | |||
1047 | /* Populate the mapping, nobody knows we exist yet */ | 1069 | /* Populate the mapping, nobody knows we exist yet */ |
1048 | spin_lock(&_minor_lock); | 1070 | spin_lock(&_minor_lock); |
1049 | old_md = idr_replace(&_minor_idr, md, minor); | 1071 | old_md = idr_replace(&_minor_idr, md, minor); |
@@ -1053,19 +1075,21 @@ static struct mapped_device *alloc_dev(int minor) | |||
1053 | 1075 | ||
1054 | return md; | 1076 | return md; |
1055 | 1077 | ||
1056 | bad4: | 1078 | bad_thread: |
1079 | put_disk(md->disk); | ||
1080 | bad_disk: | ||
1057 | bioset_free(md->bs); | 1081 | bioset_free(md->bs); |
1058 | bad_no_bioset: | 1082 | bad_no_bioset: |
1059 | mempool_destroy(md->tio_pool); | 1083 | mempool_destroy(md->tio_pool); |
1060 | bad3: | 1084 | bad_tio_pool: |
1061 | mempool_destroy(md->io_pool); | 1085 | mempool_destroy(md->io_pool); |
1062 | bad2: | 1086 | bad_io_pool: |
1063 | blk_cleanup_queue(md->queue); | 1087 | blk_cleanup_queue(md->queue); |
1064 | bad1_free_minor: | 1088 | bad_queue: |
1065 | free_minor(minor); | 1089 | free_minor(minor); |
1066 | bad1: | 1090 | bad_minor: |
1067 | module_put(THIS_MODULE); | 1091 | module_put(THIS_MODULE); |
1068 | bad0: | 1092 | bad_module_get: |
1069 | kfree(md); | 1093 | kfree(md); |
1070 | return NULL; | 1094 | return NULL; |
1071 | } | 1095 | } |
@@ -1080,6 +1104,7 @@ static void free_dev(struct mapped_device *md) | |||
1080 | unlock_fs(md); | 1104 | unlock_fs(md); |
1081 | bdput(md->suspended_bdev); | 1105 | bdput(md->suspended_bdev); |
1082 | } | 1106 | } |
1107 | destroy_workqueue(md->wq); | ||
1083 | mempool_destroy(md->tio_pool); | 1108 | mempool_destroy(md->tio_pool); |
1084 | mempool_destroy(md->io_pool); | 1109 | mempool_destroy(md->io_pool); |
1085 | bioset_free(md->bs); | 1110 | bioset_free(md->bs); |
@@ -1259,20 +1284,91 @@ void dm_put(struct mapped_device *md) | |||
1259 | } | 1284 | } |
1260 | EXPORT_SYMBOL_GPL(dm_put); | 1285 | EXPORT_SYMBOL_GPL(dm_put); |
1261 | 1286 | ||
1287 | static int dm_wait_for_completion(struct mapped_device *md) | ||
1288 | { | ||
1289 | int r = 0; | ||
1290 | |||
1291 | while (1) { | ||
1292 | set_current_state(TASK_INTERRUPTIBLE); | ||
1293 | |||
1294 | smp_mb(); | ||
1295 | if (!atomic_read(&md->pending)) | ||
1296 | break; | ||
1297 | |||
1298 | if (signal_pending(current)) { | ||
1299 | r = -EINTR; | ||
1300 | break; | ||
1301 | } | ||
1302 | |||
1303 | io_schedule(); | ||
1304 | } | ||
1305 | set_current_state(TASK_RUNNING); | ||
1306 | |||
1307 | return r; | ||
1308 | } | ||
1309 | |||
1262 | /* | 1310 | /* |
1263 | * Process the deferred bios | 1311 | * Process the deferred bios |
1264 | */ | 1312 | */ |
1265 | static void __flush_deferred_io(struct mapped_device *md, struct bio *c) | 1313 | static void __flush_deferred_io(struct mapped_device *md) |
1266 | { | 1314 | { |
1267 | struct bio *n; | 1315 | struct bio *c; |
1268 | 1316 | ||
1269 | while (c) { | 1317 | while ((c = bio_list_pop(&md->deferred))) { |
1270 | n = c->bi_next; | ||
1271 | c->bi_next = NULL; | ||
1272 | if (__split_bio(md, c)) | 1318 | if (__split_bio(md, c)) |
1273 | bio_io_error(c); | 1319 | bio_io_error(c); |
1274 | c = n; | ||
1275 | } | 1320 | } |
1321 | |||
1322 | clear_bit(DMF_BLOCK_IO, &md->flags); | ||
1323 | } | ||
1324 | |||
1325 | static void __merge_pushback_list(struct mapped_device *md) | ||
1326 | { | ||
1327 | unsigned long flags; | ||
1328 | |||
1329 | spin_lock_irqsave(&md->pushback_lock, flags); | ||
1330 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
1331 | bio_list_merge_head(&md->deferred, &md->pushback); | ||
1332 | bio_list_init(&md->pushback); | ||
1333 | spin_unlock_irqrestore(&md->pushback_lock, flags); | ||
1334 | } | ||
1335 | |||
1336 | static void dm_wq_work(struct work_struct *work) | ||
1337 | { | ||
1338 | struct dm_wq_req *req = container_of(work, struct dm_wq_req, work); | ||
1339 | struct mapped_device *md = req->md; | ||
1340 | |||
1341 | down_write(&md->io_lock); | ||
1342 | switch (req->type) { | ||
1343 | case DM_WQ_FLUSH_ALL: | ||
1344 | __merge_pushback_list(md); | ||
1345 | /* pass through */ | ||
1346 | case DM_WQ_FLUSH_DEFERRED: | ||
1347 | __flush_deferred_io(md); | ||
1348 | break; | ||
1349 | default: | ||
1350 | DMERR("dm_wq_work: unrecognised work type %d", req->type); | ||
1351 | BUG(); | ||
1352 | } | ||
1353 | up_write(&md->io_lock); | ||
1354 | } | ||
1355 | |||
1356 | static void dm_wq_queue(struct mapped_device *md, int type, void *context, | ||
1357 | struct dm_wq_req *req) | ||
1358 | { | ||
1359 | req->type = type; | ||
1360 | req->md = md; | ||
1361 | req->context = context; | ||
1362 | INIT_WORK(&req->work, dm_wq_work); | ||
1363 | queue_work(md->wq, &req->work); | ||
1364 | } | ||
1365 | |||
1366 | static void dm_queue_flush(struct mapped_device *md, int type, void *context) | ||
1367 | { | ||
1368 | struct dm_wq_req req; | ||
1369 | |||
1370 | dm_wq_queue(md, type, context, &req); | ||
1371 | flush_workqueue(md->wq); | ||
1276 | } | 1372 | } |
1277 | 1373 | ||
1278 | /* | 1374 | /* |
@@ -1282,7 +1378,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |||
1282 | { | 1378 | { |
1283 | int r = -EINVAL; | 1379 | int r = -EINVAL; |
1284 | 1380 | ||
1285 | down(&md->suspend_lock); | 1381 | mutex_lock(&md->suspend_lock); |
1286 | 1382 | ||
1287 | /* device must be suspended */ | 1383 | /* device must be suspended */ |
1288 | if (!dm_suspended(md)) | 1384 | if (!dm_suspended(md)) |
@@ -1297,7 +1393,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |||
1297 | r = __bind(md, table); | 1393 | r = __bind(md, table); |
1298 | 1394 | ||
1299 | out: | 1395 | out: |
1300 | up(&md->suspend_lock); | 1396 | mutex_unlock(&md->suspend_lock); |
1301 | return r; | 1397 | return r; |
1302 | } | 1398 | } |
1303 | 1399 | ||
@@ -1346,17 +1442,17 @@ static void unlock_fs(struct mapped_device *md) | |||
1346 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 1442 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
1347 | { | 1443 | { |
1348 | struct dm_table *map = NULL; | 1444 | struct dm_table *map = NULL; |
1349 | unsigned long flags; | ||
1350 | DECLARE_WAITQUEUE(wait, current); | 1445 | DECLARE_WAITQUEUE(wait, current); |
1351 | struct bio *def; | 1446 | int r = 0; |
1352 | int r = -EINVAL; | ||
1353 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; | 1447 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; |
1354 | int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; | 1448 | int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; |
1355 | 1449 | ||
1356 | down(&md->suspend_lock); | 1450 | mutex_lock(&md->suspend_lock); |
1357 | 1451 | ||
1358 | if (dm_suspended(md)) | 1452 | if (dm_suspended(md)) { |
1453 | r = -EINVAL; | ||
1359 | goto out_unlock; | 1454 | goto out_unlock; |
1455 | } | ||
1360 | 1456 | ||
1361 | map = dm_get_table(md); | 1457 | map = dm_get_table(md); |
1362 | 1458 | ||
@@ -1378,16 +1474,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1378 | r = -ENOMEM; | 1474 | r = -ENOMEM; |
1379 | goto flush_and_out; | 1475 | goto flush_and_out; |
1380 | } | 1476 | } |
1381 | } | ||
1382 | 1477 | ||
1383 | /* | 1478 | /* |
1384 | * Flush I/O to the device. | 1479 | * Flush I/O to the device. noflush supersedes do_lockfs, |
1385 | * noflush supersedes do_lockfs, because lock_fs() needs to flush I/Os. | 1480 | * because lock_fs() needs to flush I/Os. |
1386 | */ | 1481 | */ |
1387 | if (do_lockfs && !noflush) { | 1482 | if (do_lockfs) { |
1388 | r = lock_fs(md); | 1483 | r = lock_fs(md); |
1389 | if (r) | 1484 | if (r) |
1390 | goto out; | 1485 | goto out; |
1486 | } | ||
1391 | } | 1487 | } |
1392 | 1488 | ||
1393 | /* | 1489 | /* |
@@ -1404,66 +1500,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1404 | dm_table_unplug_all(map); | 1500 | dm_table_unplug_all(map); |
1405 | 1501 | ||
1406 | /* | 1502 | /* |
1407 | * Then we wait for the already mapped ios to | 1503 | * Wait for the already-mapped ios to complete. |
1408 | * complete. | ||
1409 | */ | 1504 | */ |
1410 | while (1) { | 1505 | r = dm_wait_for_completion(md); |
1411 | set_current_state(TASK_INTERRUPTIBLE); | ||
1412 | |||
1413 | if (!atomic_read(&md->pending) || signal_pending(current)) | ||
1414 | break; | ||
1415 | |||
1416 | io_schedule(); | ||
1417 | } | ||
1418 | set_current_state(TASK_RUNNING); | ||
1419 | 1506 | ||
1420 | down_write(&md->io_lock); | 1507 | down_write(&md->io_lock); |
1421 | remove_wait_queue(&md->wait, &wait); | 1508 | remove_wait_queue(&md->wait, &wait); |
1422 | 1509 | ||
1423 | if (noflush) { | 1510 | if (noflush) |
1424 | spin_lock_irqsave(&md->pushback_lock, flags); | 1511 | __merge_pushback_list(md); |
1425 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 1512 | up_write(&md->io_lock); |
1426 | bio_list_merge_head(&md->deferred, &md->pushback); | ||
1427 | bio_list_init(&md->pushback); | ||
1428 | spin_unlock_irqrestore(&md->pushback_lock, flags); | ||
1429 | } | ||
1430 | 1513 | ||
1431 | /* were we interrupted ? */ | 1514 | /* were we interrupted ? */ |
1432 | r = -EINTR; | 1515 | if (r < 0) { |
1433 | if (atomic_read(&md->pending)) { | 1516 | dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); |
1434 | clear_bit(DMF_BLOCK_IO, &md->flags); | 1517 | |
1435 | def = bio_list_get(&md->deferred); | ||
1436 | __flush_deferred_io(md, def); | ||
1437 | up_write(&md->io_lock); | ||
1438 | unlock_fs(md); | 1518 | unlock_fs(md); |
1439 | goto out; /* pushback list is already flushed, so skip flush */ | 1519 | goto out; /* pushback list is already flushed, so skip flush */ |
1440 | } | 1520 | } |
1441 | up_write(&md->io_lock); | ||
1442 | 1521 | ||
1443 | dm_table_postsuspend_targets(map); | 1522 | dm_table_postsuspend_targets(map); |
1444 | 1523 | ||
1445 | set_bit(DMF_SUSPENDED, &md->flags); | 1524 | set_bit(DMF_SUSPENDED, &md->flags); |
1446 | 1525 | ||
1447 | r = 0; | ||
1448 | |||
1449 | flush_and_out: | 1526 | flush_and_out: |
1450 | if (r && noflush) { | 1527 | if (r && noflush) |
1451 | /* | 1528 | /* |
1452 | * Because there may be already I/Os in the pushback list, | 1529 | * Because there may be already I/Os in the pushback list, |
1453 | * flush them before return. | 1530 | * flush them before return. |
1454 | */ | 1531 | */ |
1455 | down_write(&md->io_lock); | 1532 | dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL); |
1456 | |||
1457 | spin_lock_irqsave(&md->pushback_lock, flags); | ||
1458 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
1459 | bio_list_merge_head(&md->deferred, &md->pushback); | ||
1460 | bio_list_init(&md->pushback); | ||
1461 | spin_unlock_irqrestore(&md->pushback_lock, flags); | ||
1462 | |||
1463 | def = bio_list_get(&md->deferred); | ||
1464 | __flush_deferred_io(md, def); | ||
1465 | up_write(&md->io_lock); | ||
1466 | } | ||
1467 | 1533 | ||
1468 | out: | 1534 | out: |
1469 | if (r && md->suspended_bdev) { | 1535 | if (r && md->suspended_bdev) { |
@@ -1474,17 +1540,16 @@ out: | |||
1474 | dm_table_put(map); | 1540 | dm_table_put(map); |
1475 | 1541 | ||
1476 | out_unlock: | 1542 | out_unlock: |
1477 | up(&md->suspend_lock); | 1543 | mutex_unlock(&md->suspend_lock); |
1478 | return r; | 1544 | return r; |
1479 | } | 1545 | } |
1480 | 1546 | ||
1481 | int dm_resume(struct mapped_device *md) | 1547 | int dm_resume(struct mapped_device *md) |
1482 | { | 1548 | { |
1483 | int r = -EINVAL; | 1549 | int r = -EINVAL; |
1484 | struct bio *def; | ||
1485 | struct dm_table *map = NULL; | 1550 | struct dm_table *map = NULL; |
1486 | 1551 | ||
1487 | down(&md->suspend_lock); | 1552 | mutex_lock(&md->suspend_lock); |
1488 | if (!dm_suspended(md)) | 1553 | if (!dm_suspended(md)) |
1489 | goto out; | 1554 | goto out; |
1490 | 1555 | ||
@@ -1496,12 +1561,7 @@ int dm_resume(struct mapped_device *md) | |||
1496 | if (r) | 1561 | if (r) |
1497 | goto out; | 1562 | goto out; |
1498 | 1563 | ||
1499 | down_write(&md->io_lock); | 1564 | dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); |
1500 | clear_bit(DMF_BLOCK_IO, &md->flags); | ||
1501 | |||
1502 | def = bio_list_get(&md->deferred); | ||
1503 | __flush_deferred_io(md, def); | ||
1504 | up_write(&md->io_lock); | ||
1505 | 1565 | ||
1506 | unlock_fs(md); | 1566 | unlock_fs(md); |
1507 | 1567 | ||
@@ -1520,7 +1580,7 @@ int dm_resume(struct mapped_device *md) | |||
1520 | 1580 | ||
1521 | out: | 1581 | out: |
1522 | dm_table_put(map); | 1582 | dm_table_put(map); |
1523 | up(&md->suspend_lock); | 1583 | mutex_unlock(&md->suspend_lock); |
1524 | 1584 | ||
1525 | return r; | 1585 | return r; |
1526 | } | 1586 | } |