aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c238
1 files changed, 149 insertions, 89 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f2d24eb3208c..6617ce4af095 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -71,9 +71,22 @@ union map_info *dm_get_mapinfo(struct bio *bio)
71#define DMF_DELETING 4 71#define DMF_DELETING 4
72#define DMF_NOFLUSH_SUSPENDING 5 72#define DMF_NOFLUSH_SUSPENDING 5
73 73
74/*
75 * Work processed by per-device workqueue.
76 */
77struct dm_wq_req {
78 enum {
79 DM_WQ_FLUSH_ALL,
80 DM_WQ_FLUSH_DEFERRED,
81 } type;
82 struct work_struct work;
83 struct mapped_device *md;
84 void *context;
85};
86
74struct mapped_device { 87struct mapped_device {
75 struct rw_semaphore io_lock; 88 struct rw_semaphore io_lock;
76 struct semaphore suspend_lock; 89 struct mutex suspend_lock;
77 spinlock_t pushback_lock; 90 spinlock_t pushback_lock;
78 rwlock_t map_lock; 91 rwlock_t map_lock;
79 atomic_t holders; 92 atomic_t holders;
@@ -96,6 +109,11 @@ struct mapped_device {
96 struct bio_list pushback; 109 struct bio_list pushback;
97 110
98 /* 111 /*
112 * Processing queue (flush/barriers)
113 */
114 struct workqueue_struct *wq;
115
116 /*
99 * The current mapping. 117 * The current mapping.
100 */ 118 */
101 struct dm_table *map; 119 struct dm_table *map;
@@ -181,7 +199,7 @@ static void local_exit(void)
181 DMINFO("cleaned up"); 199 DMINFO("cleaned up");
182} 200}
183 201
184int (*_inits[])(void) __initdata = { 202static int (*_inits[])(void) __initdata = {
185 local_init, 203 local_init,
186 dm_target_init, 204 dm_target_init,
187 dm_linear_init, 205 dm_linear_init,
@@ -189,7 +207,7 @@ int (*_inits[])(void) __initdata = {
189 dm_interface_init, 207 dm_interface_init,
190}; 208};
191 209
192void (*_exits[])(void) = { 210static void (*_exits[])(void) = {
193 local_exit, 211 local_exit,
194 dm_target_exit, 212 dm_target_exit,
195 dm_linear_exit, 213 dm_linear_exit,
@@ -982,7 +1000,7 @@ static struct mapped_device *alloc_dev(int minor)
982 } 1000 }
983 1001
984 if (!try_module_get(THIS_MODULE)) 1002 if (!try_module_get(THIS_MODULE))
985 goto bad0; 1003 goto bad_module_get;
986 1004
987 /* get a minor number for the dev */ 1005 /* get a minor number for the dev */
988 if (minor == DM_ANY_MINOR) 1006 if (minor == DM_ANY_MINOR)
@@ -990,11 +1008,11 @@ static struct mapped_device *alloc_dev(int minor)
990 else 1008 else
991 r = specific_minor(md, minor); 1009 r = specific_minor(md, minor);
992 if (r < 0) 1010 if (r < 0)
993 goto bad1; 1011 goto bad_minor;
994 1012
995 memset(md, 0, sizeof(*md)); 1013 memset(md, 0, sizeof(*md));
996 init_rwsem(&md->io_lock); 1014 init_rwsem(&md->io_lock);
997 init_MUTEX(&md->suspend_lock); 1015 mutex_init(&md->suspend_lock);
998 spin_lock_init(&md->pushback_lock); 1016 spin_lock_init(&md->pushback_lock);
999 rwlock_init(&md->map_lock); 1017 rwlock_init(&md->map_lock);
1000 atomic_set(&md->holders, 1); 1018 atomic_set(&md->holders, 1);
@@ -1006,7 +1024,7 @@ static struct mapped_device *alloc_dev(int minor)
1006 1024
1007 md->queue = blk_alloc_queue(GFP_KERNEL); 1025 md->queue = blk_alloc_queue(GFP_KERNEL);
1008 if (!md->queue) 1026 if (!md->queue)
1009 goto bad1_free_minor; 1027 goto bad_queue;
1010 1028
1011 md->queue->queuedata = md; 1029 md->queue->queuedata = md;
1012 md->queue->backing_dev_info.congested_fn = dm_any_congested; 1030 md->queue->backing_dev_info.congested_fn = dm_any_congested;
@@ -1017,11 +1035,11 @@ static struct mapped_device *alloc_dev(int minor)
1017 1035
1018 md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); 1036 md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
1019 if (!md->io_pool) 1037 if (!md->io_pool)
1020 goto bad2; 1038 goto bad_io_pool;
1021 1039
1022 md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); 1040 md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
1023 if (!md->tio_pool) 1041 if (!md->tio_pool)
1024 goto bad3; 1042 goto bad_tio_pool;
1025 1043
1026 md->bs = bioset_create(16, 16); 1044 md->bs = bioset_create(16, 16);
1027 if (!md->bs) 1045 if (!md->bs)
@@ -1029,7 +1047,7 @@ static struct mapped_device *alloc_dev(int minor)
1029 1047
1030 md->disk = alloc_disk(1); 1048 md->disk = alloc_disk(1);
1031 if (!md->disk) 1049 if (!md->disk)
1032 goto bad4; 1050 goto bad_disk;
1033 1051
1034 atomic_set(&md->pending, 0); 1052 atomic_set(&md->pending, 0);
1035 init_waitqueue_head(&md->wait); 1053 init_waitqueue_head(&md->wait);
@@ -1044,6 +1062,10 @@ static struct mapped_device *alloc_dev(int minor)
1044 add_disk(md->disk); 1062 add_disk(md->disk);
1045 format_dev_t(md->name, MKDEV(_major, minor)); 1063 format_dev_t(md->name, MKDEV(_major, minor));
1046 1064
1065 md->wq = create_singlethread_workqueue("kdmflush");
1066 if (!md->wq)
1067 goto bad_thread;
1068
1047 /* Populate the mapping, nobody knows we exist yet */ 1069 /* Populate the mapping, nobody knows we exist yet */
1048 spin_lock(&_minor_lock); 1070 spin_lock(&_minor_lock);
1049 old_md = idr_replace(&_minor_idr, md, minor); 1071 old_md = idr_replace(&_minor_idr, md, minor);
@@ -1053,19 +1075,21 @@ static struct mapped_device *alloc_dev(int minor)
1053 1075
1054 return md; 1076 return md;
1055 1077
1056 bad4: 1078bad_thread:
1079 put_disk(md->disk);
1080bad_disk:
1057 bioset_free(md->bs); 1081 bioset_free(md->bs);
1058 bad_no_bioset: 1082bad_no_bioset:
1059 mempool_destroy(md->tio_pool); 1083 mempool_destroy(md->tio_pool);
1060 bad3: 1084bad_tio_pool:
1061 mempool_destroy(md->io_pool); 1085 mempool_destroy(md->io_pool);
1062 bad2: 1086bad_io_pool:
1063 blk_cleanup_queue(md->queue); 1087 blk_cleanup_queue(md->queue);
1064 bad1_free_minor: 1088bad_queue:
1065 free_minor(minor); 1089 free_minor(minor);
1066 bad1: 1090bad_minor:
1067 module_put(THIS_MODULE); 1091 module_put(THIS_MODULE);
1068 bad0: 1092bad_module_get:
1069 kfree(md); 1093 kfree(md);
1070 return NULL; 1094 return NULL;
1071} 1095}
@@ -1080,6 +1104,7 @@ static void free_dev(struct mapped_device *md)
1080 unlock_fs(md); 1104 unlock_fs(md);
1081 bdput(md->suspended_bdev); 1105 bdput(md->suspended_bdev);
1082 } 1106 }
1107 destroy_workqueue(md->wq);
1083 mempool_destroy(md->tio_pool); 1108 mempool_destroy(md->tio_pool);
1084 mempool_destroy(md->io_pool); 1109 mempool_destroy(md->io_pool);
1085 bioset_free(md->bs); 1110 bioset_free(md->bs);
@@ -1259,20 +1284,91 @@ void dm_put(struct mapped_device *md)
1259} 1284}
1260EXPORT_SYMBOL_GPL(dm_put); 1285EXPORT_SYMBOL_GPL(dm_put);
1261 1286
1287static int dm_wait_for_completion(struct mapped_device *md)
1288{
1289 int r = 0;
1290
1291 while (1) {
1292 set_current_state(TASK_INTERRUPTIBLE);
1293
1294 smp_mb();
1295 if (!atomic_read(&md->pending))
1296 break;
1297
1298 if (signal_pending(current)) {
1299 r = -EINTR;
1300 break;
1301 }
1302
1303 io_schedule();
1304 }
1305 set_current_state(TASK_RUNNING);
1306
1307 return r;
1308}
1309
1262/* 1310/*
1263 * Process the deferred bios 1311 * Process the deferred bios
1264 */ 1312 */
1265static void __flush_deferred_io(struct mapped_device *md, struct bio *c) 1313static void __flush_deferred_io(struct mapped_device *md)
1266{ 1314{
1267 struct bio *n; 1315 struct bio *c;
1268 1316
1269 while (c) { 1317 while ((c = bio_list_pop(&md->deferred))) {
1270 n = c->bi_next;
1271 c->bi_next = NULL;
1272 if (__split_bio(md, c)) 1318 if (__split_bio(md, c))
1273 bio_io_error(c); 1319 bio_io_error(c);
1274 c = n;
1275 } 1320 }
1321
1322 clear_bit(DMF_BLOCK_IO, &md->flags);
1323}
1324
1325static void __merge_pushback_list(struct mapped_device *md)
1326{
1327 unsigned long flags;
1328
1329 spin_lock_irqsave(&md->pushback_lock, flags);
1330 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
1331 bio_list_merge_head(&md->deferred, &md->pushback);
1332 bio_list_init(&md->pushback);
1333 spin_unlock_irqrestore(&md->pushback_lock, flags);
1334}
1335
1336static void dm_wq_work(struct work_struct *work)
1337{
1338 struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
1339 struct mapped_device *md = req->md;
1340
1341 down_write(&md->io_lock);
1342 switch (req->type) {
1343 case DM_WQ_FLUSH_ALL:
1344 __merge_pushback_list(md);
1345 /* pass through */
1346 case DM_WQ_FLUSH_DEFERRED:
1347 __flush_deferred_io(md);
1348 break;
1349 default:
1350 DMERR("dm_wq_work: unrecognised work type %d", req->type);
1351 BUG();
1352 }
1353 up_write(&md->io_lock);
1354}
1355
1356static void dm_wq_queue(struct mapped_device *md, int type, void *context,
1357 struct dm_wq_req *req)
1358{
1359 req->type = type;
1360 req->md = md;
1361 req->context = context;
1362 INIT_WORK(&req->work, dm_wq_work);
1363 queue_work(md->wq, &req->work);
1364}
1365
1366static void dm_queue_flush(struct mapped_device *md, int type, void *context)
1367{
1368 struct dm_wq_req req;
1369
1370 dm_wq_queue(md, type, context, &req);
1371 flush_workqueue(md->wq);
1276} 1372}
1277 1373
1278/* 1374/*
@@ -1282,7 +1378,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1282{ 1378{
1283 int r = -EINVAL; 1379 int r = -EINVAL;
1284 1380
1285 down(&md->suspend_lock); 1381 mutex_lock(&md->suspend_lock);
1286 1382
1287 /* device must be suspended */ 1383 /* device must be suspended */
1288 if (!dm_suspended(md)) 1384 if (!dm_suspended(md))
@@ -1297,7 +1393,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1297 r = __bind(md, table); 1393 r = __bind(md, table);
1298 1394
1299out: 1395out:
1300 up(&md->suspend_lock); 1396 mutex_unlock(&md->suspend_lock);
1301 return r; 1397 return r;
1302} 1398}
1303 1399
@@ -1346,17 +1442,17 @@ static void unlock_fs(struct mapped_device *md)
1346int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 1442int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1347{ 1443{
1348 struct dm_table *map = NULL; 1444 struct dm_table *map = NULL;
1349 unsigned long flags;
1350 DECLARE_WAITQUEUE(wait, current); 1445 DECLARE_WAITQUEUE(wait, current);
1351 struct bio *def; 1446 int r = 0;
1352 int r = -EINVAL;
1353 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; 1447 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
1354 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; 1448 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
1355 1449
1356 down(&md->suspend_lock); 1450 mutex_lock(&md->suspend_lock);
1357 1451
1358 if (dm_suspended(md)) 1452 if (dm_suspended(md)) {
1453 r = -EINVAL;
1359 goto out_unlock; 1454 goto out_unlock;
1455 }
1360 1456
1361 map = dm_get_table(md); 1457 map = dm_get_table(md);
1362 1458
@@ -1378,16 +1474,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1378 r = -ENOMEM; 1474 r = -ENOMEM;
1379 goto flush_and_out; 1475 goto flush_and_out;
1380 } 1476 }
1381 }
1382 1477
1383 /* 1478 /*
1384 * Flush I/O to the device. 1479 * Flush I/O to the device. noflush supersedes do_lockfs,
1385 * noflush supersedes do_lockfs, because lock_fs() needs to flush I/Os. 1480 * because lock_fs() needs to flush I/Os.
1386 */ 1481 */
1387 if (do_lockfs && !noflush) { 1482 if (do_lockfs) {
1388 r = lock_fs(md); 1483 r = lock_fs(md);
1389 if (r) 1484 if (r)
1390 goto out; 1485 goto out;
1486 }
1391 } 1487 }
1392 1488
1393 /* 1489 /*
@@ -1404,66 +1500,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1404 dm_table_unplug_all(map); 1500 dm_table_unplug_all(map);
1405 1501
1406 /* 1502 /*
1407 * Then we wait for the already mapped ios to 1503 * Wait for the already-mapped ios to complete.
1408 * complete.
1409 */ 1504 */
1410 while (1) { 1505 r = dm_wait_for_completion(md);
1411 set_current_state(TASK_INTERRUPTIBLE);
1412
1413 if (!atomic_read(&md->pending) || signal_pending(current))
1414 break;
1415
1416 io_schedule();
1417 }
1418 set_current_state(TASK_RUNNING);
1419 1506
1420 down_write(&md->io_lock); 1507 down_write(&md->io_lock);
1421 remove_wait_queue(&md->wait, &wait); 1508 remove_wait_queue(&md->wait, &wait);
1422 1509
1423 if (noflush) { 1510 if (noflush)
1424 spin_lock_irqsave(&md->pushback_lock, flags); 1511 __merge_pushback_list(md);
1425 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 1512 up_write(&md->io_lock);
1426 bio_list_merge_head(&md->deferred, &md->pushback);
1427 bio_list_init(&md->pushback);
1428 spin_unlock_irqrestore(&md->pushback_lock, flags);
1429 }
1430 1513
1431 /* were we interrupted ? */ 1514 /* were we interrupted ? */
1432 r = -EINTR; 1515 if (r < 0) {
1433 if (atomic_read(&md->pending)) { 1516 dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
1434 clear_bit(DMF_BLOCK_IO, &md->flags); 1517
1435 def = bio_list_get(&md->deferred);
1436 __flush_deferred_io(md, def);
1437 up_write(&md->io_lock);
1438 unlock_fs(md); 1518 unlock_fs(md);
1439 goto out; /* pushback list is already flushed, so skip flush */ 1519 goto out; /* pushback list is already flushed, so skip flush */
1440 } 1520 }
1441 up_write(&md->io_lock);
1442 1521
1443 dm_table_postsuspend_targets(map); 1522 dm_table_postsuspend_targets(map);
1444 1523
1445 set_bit(DMF_SUSPENDED, &md->flags); 1524 set_bit(DMF_SUSPENDED, &md->flags);
1446 1525
1447 r = 0;
1448
1449flush_and_out: 1526flush_and_out:
1450 if (r && noflush) { 1527 if (r && noflush)
1451 /* 1528 /*
1452 * Because there may be already I/Os in the pushback list, 1529 * Because there may be already I/Os in the pushback list,
1453 * flush them before return. 1530 * flush them before return.
1454 */ 1531 */
1455 down_write(&md->io_lock); 1532 dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL);
1456
1457 spin_lock_irqsave(&md->pushback_lock, flags);
1458 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
1459 bio_list_merge_head(&md->deferred, &md->pushback);
1460 bio_list_init(&md->pushback);
1461 spin_unlock_irqrestore(&md->pushback_lock, flags);
1462
1463 def = bio_list_get(&md->deferred);
1464 __flush_deferred_io(md, def);
1465 up_write(&md->io_lock);
1466 }
1467 1533
1468out: 1534out:
1469 if (r && md->suspended_bdev) { 1535 if (r && md->suspended_bdev) {
@@ -1474,17 +1540,16 @@ out:
1474 dm_table_put(map); 1540 dm_table_put(map);
1475 1541
1476out_unlock: 1542out_unlock:
1477 up(&md->suspend_lock); 1543 mutex_unlock(&md->suspend_lock);
1478 return r; 1544 return r;
1479} 1545}
1480 1546
1481int dm_resume(struct mapped_device *md) 1547int dm_resume(struct mapped_device *md)
1482{ 1548{
1483 int r = -EINVAL; 1549 int r = -EINVAL;
1484 struct bio *def;
1485 struct dm_table *map = NULL; 1550 struct dm_table *map = NULL;
1486 1551
1487 down(&md->suspend_lock); 1552 mutex_lock(&md->suspend_lock);
1488 if (!dm_suspended(md)) 1553 if (!dm_suspended(md))
1489 goto out; 1554 goto out;
1490 1555
@@ -1496,12 +1561,7 @@ int dm_resume(struct mapped_device *md)
1496 if (r) 1561 if (r)
1497 goto out; 1562 goto out;
1498 1563
1499 down_write(&md->io_lock); 1564 dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
1500 clear_bit(DMF_BLOCK_IO, &md->flags);
1501
1502 def = bio_list_get(&md->deferred);
1503 __flush_deferred_io(md, def);
1504 up_write(&md->io_lock);
1505 1565
1506 unlock_fs(md); 1566 unlock_fs(md);
1507 1567
@@ -1520,7 +1580,7 @@ int dm_resume(struct mapped_device *md)
1520 1580
1521out: 1581out:
1522 dm_table_put(map); 1582 dm_table_put(map);
1523 up(&md->suspend_lock); 1583 mutex_unlock(&md->suspend_lock);
1524 1584
1525 return r; 1585 return r;
1526} 1586}