diff options
author | Alasdair G Kergon <agk@redhat.com> | 2005-07-29 00:16:00 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-07-29 00:46:03 -0400 |
commit | 2ca3310e78912a527d7d2c62c01da7cbf7346b8d (patch) | |
tree | 3eb83a509487d5b85d346630e9d700d73d28e39e /drivers | |
parent | 4e90188be4a56f37fbb4ffb5b58745683526dcb9 (diff) |
[PATCH] device-mapper: fix md->lock deadlocks in core
This patch is an attempt to fix deadlocks discovered in the core dm.
The problems boil down to md->lock having to be held in too many places, so
I've split it into two: md->suspend_lock and md->io_lock.
suspend_lock is now held throughout dm_suspended() as well as dm_resume()
and dm_swap_table() so that these functions cannot run concurrently:
there's no requirement for that and it added complexity.
DMF_FS_LOCKED becomes redundant: DMF_SUSPENDED provides adequate
protection.
Signed-Off-By: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/dm.c | 138 |
1 files changed, 60 insertions, 78 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 88f1f27526cc..d487d9deb98e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -55,10 +55,10 @@ union map_info *dm_get_mapinfo(struct bio *bio) | |||
55 | */ | 55 | */ |
56 | #define DMF_BLOCK_IO 0 | 56 | #define DMF_BLOCK_IO 0 |
57 | #define DMF_SUSPENDED 1 | 57 | #define DMF_SUSPENDED 1 |
58 | #define DMF_FS_LOCKED 2 | ||
59 | 58 | ||
60 | struct mapped_device { | 59 | struct mapped_device { |
61 | struct rw_semaphore lock; | 60 | struct rw_semaphore io_lock; |
61 | struct semaphore suspend_lock; | ||
62 | rwlock_t map_lock; | 62 | rwlock_t map_lock; |
63 | atomic_t holders; | 63 | atomic_t holders; |
64 | 64 | ||
@@ -248,16 +248,16 @@ static inline void free_tio(struct mapped_device *md, struct target_io *tio) | |||
248 | */ | 248 | */ |
249 | static int queue_io(struct mapped_device *md, struct bio *bio) | 249 | static int queue_io(struct mapped_device *md, struct bio *bio) |
250 | { | 250 | { |
251 | down_write(&md->lock); | 251 | down_write(&md->io_lock); |
252 | 252 | ||
253 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { | 253 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { |
254 | up_write(&md->lock); | 254 | up_write(&md->io_lock); |
255 | return 1; | 255 | return 1; |
256 | } | 256 | } |
257 | 257 | ||
258 | bio_list_add(&md->deferred, bio); | 258 | bio_list_add(&md->deferred, bio); |
259 | 259 | ||
260 | up_write(&md->lock); | 260 | up_write(&md->io_lock); |
261 | return 0; /* deferred successfully */ | 261 | return 0; /* deferred successfully */ |
262 | } | 262 | } |
263 | 263 | ||
@@ -568,14 +568,14 @@ static int dm_request(request_queue_t *q, struct bio *bio) | |||
568 | int r; | 568 | int r; |
569 | struct mapped_device *md = q->queuedata; | 569 | struct mapped_device *md = q->queuedata; |
570 | 570 | ||
571 | down_read(&md->lock); | 571 | down_read(&md->io_lock); |
572 | 572 | ||
573 | /* | 573 | /* |
574 | * If we're suspended we have to queue | 574 | * If we're suspended we have to queue |
575 | * this io for later. | 575 | * this io for later. |
576 | */ | 576 | */ |
577 | while (test_bit(DMF_BLOCK_IO, &md->flags)) { | 577 | while (test_bit(DMF_BLOCK_IO, &md->flags)) { |
578 | up_read(&md->lock); | 578 | up_read(&md->io_lock); |
579 | 579 | ||
580 | if (bio_rw(bio) == READA) { | 580 | if (bio_rw(bio) == READA) { |
581 | bio_io_error(bio, bio->bi_size); | 581 | bio_io_error(bio, bio->bi_size); |
@@ -594,11 +594,11 @@ static int dm_request(request_queue_t *q, struct bio *bio) | |||
594 | * We're in a while loop, because someone could suspend | 594 | * We're in a while loop, because someone could suspend |
595 | * before we get to the following read lock. | 595 | * before we get to the following read lock. |
596 | */ | 596 | */ |
597 | down_read(&md->lock); | 597 | down_read(&md->io_lock); |
598 | } | 598 | } |
599 | 599 | ||
600 | __split_bio(md, bio); | 600 | __split_bio(md, bio); |
601 | up_read(&md->lock); | 601 | up_read(&md->io_lock); |
602 | return 0; | 602 | return 0; |
603 | } | 603 | } |
604 | 604 | ||
@@ -747,7 +747,8 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent) | |||
747 | goto bad1; | 747 | goto bad1; |
748 | 748 | ||
749 | memset(md, 0, sizeof(*md)); | 749 | memset(md, 0, sizeof(*md)); |
750 | init_rwsem(&md->lock); | 750 | init_rwsem(&md->io_lock); |
751 | init_MUTEX(&md->suspend_lock); | ||
751 | rwlock_init(&md->map_lock); | 752 | rwlock_init(&md->map_lock); |
752 | atomic_set(&md->holders, 1); | 753 | atomic_set(&md->holders, 1); |
753 | atomic_set(&md->event_nr, 0); | 754 | atomic_set(&md->event_nr, 0); |
@@ -844,13 +845,14 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
844 | if (size == 0) | 845 | if (size == 0) |
845 | return 0; | 846 | return 0; |
846 | 847 | ||
848 | dm_table_get(t); | ||
849 | dm_table_event_callback(t, event_callback, md); | ||
850 | |||
847 | write_lock(&md->map_lock); | 851 | write_lock(&md->map_lock); |
848 | md->map = t; | 852 | md->map = t; |
853 | dm_table_set_restrictions(t, q); | ||
849 | write_unlock(&md->map_lock); | 854 | write_unlock(&md->map_lock); |
850 | 855 | ||
851 | dm_table_get(t); | ||
852 | dm_table_event_callback(t, event_callback, md); | ||
853 | dm_table_set_restrictions(t, q); | ||
854 | return 0; | 856 | return 0; |
855 | } | 857 | } |
856 | 858 | ||
@@ -963,7 +965,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |||
963 | { | 965 | { |
964 | int r = -EINVAL; | 966 | int r = -EINVAL; |
965 | 967 | ||
966 | down_write(&md->lock); | 968 | down(&md->suspend_lock); |
967 | 969 | ||
968 | /* device must be suspended */ | 970 | /* device must be suspended */ |
969 | if (!dm_suspended(md)) | 971 | if (!dm_suspended(md)) |
@@ -973,7 +975,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |||
973 | r = __bind(md, table); | 975 | r = __bind(md, table); |
974 | 976 | ||
975 | out: | 977 | out: |
976 | up_write(&md->lock); | 978 | up(&md->suspend_lock); |
977 | return r; | 979 | return r; |
978 | } | 980 | } |
979 | 981 | ||
@@ -981,16 +983,13 @@ out: | |||
981 | * Functions to lock and unlock any filesystem running on the | 983 | * Functions to lock and unlock any filesystem running on the |
982 | * device. | 984 | * device. |
983 | */ | 985 | */ |
984 | static int __lock_fs(struct mapped_device *md) | 986 | static int lock_fs(struct mapped_device *md) |
985 | { | 987 | { |
986 | int r = -ENOMEM; | 988 | int r = -ENOMEM; |
987 | 989 | ||
988 | if (test_and_set_bit(DMF_FS_LOCKED, &md->flags)) | ||
989 | return 0; | ||
990 | |||
991 | md->frozen_bdev = bdget_disk(md->disk, 0); | 990 | md->frozen_bdev = bdget_disk(md->disk, 0); |
992 | if (!md->frozen_bdev) { | 991 | if (!md->frozen_bdev) { |
993 | DMWARN("bdget failed in __lock_fs"); | 992 | DMWARN("bdget failed in lock_fs"); |
994 | goto out; | 993 | goto out; |
995 | } | 994 | } |
996 | 995 | ||
@@ -1004,7 +1003,7 @@ static int __lock_fs(struct mapped_device *md) | |||
1004 | 1003 | ||
1005 | /* don't bdput right now, we don't want the bdev | 1004 | /* don't bdput right now, we don't want the bdev |
1006 | * to go away while it is locked. We'll bdput | 1005 | * to go away while it is locked. We'll bdput |
1007 | * in __unlock_fs | 1006 | * in unlock_fs |
1008 | */ | 1007 | */ |
1009 | return 0; | 1008 | return 0; |
1010 | 1009 | ||
@@ -1013,15 +1012,11 @@ out_bdput: | |||
1013 | md->frozen_sb = NULL; | 1012 | md->frozen_sb = NULL; |
1014 | md->frozen_bdev = NULL; | 1013 | md->frozen_bdev = NULL; |
1015 | out: | 1014 | out: |
1016 | clear_bit(DMF_FS_LOCKED, &md->flags); | ||
1017 | return r; | 1015 | return r; |
1018 | } | 1016 | } |
1019 | 1017 | ||
1020 | static void __unlock_fs(struct mapped_device *md) | 1018 | static void unlock_fs(struct mapped_device *md) |
1021 | { | 1019 | { |
1022 | if (!test_and_clear_bit(DMF_FS_LOCKED, &md->flags)) | ||
1023 | return; | ||
1024 | |||
1025 | thaw_bdev(md->frozen_bdev, md->frozen_sb); | 1020 | thaw_bdev(md->frozen_bdev, md->frozen_sb); |
1026 | bdput(md->frozen_bdev); | 1021 | bdput(md->frozen_bdev); |
1027 | 1022 | ||
@@ -1038,13 +1033,14 @@ static void __unlock_fs(struct mapped_device *md) | |||
1038 | */ | 1033 | */ |
1039 | int dm_suspend(struct mapped_device *md) | 1034 | int dm_suspend(struct mapped_device *md) |
1040 | { | 1035 | { |
1041 | struct dm_table *map; | 1036 | struct dm_table *map = NULL; |
1042 | DECLARE_WAITQUEUE(wait, current); | 1037 | DECLARE_WAITQUEUE(wait, current); |
1043 | int r = -EINVAL; | 1038 | int r = -EINVAL; |
1044 | 1039 | ||
1045 | down_read(&md->lock); | 1040 | down(&md->suspend_lock); |
1046 | if (test_bit(DMF_BLOCK_IO, &md->flags)) | 1041 | |
1047 | goto out_read_unlock; | 1042 | if (dm_suspended(md)) |
1043 | goto out; | ||
1048 | 1044 | ||
1049 | map = dm_get_table(md); | 1045 | map = dm_get_table(md); |
1050 | 1046 | ||
@@ -1052,36 +1048,22 @@ int dm_suspend(struct mapped_device *md) | |||
1052 | dm_table_presuspend_targets(map); | 1048 | dm_table_presuspend_targets(map); |
1053 | 1049 | ||
1054 | /* Flush I/O to the device. */ | 1050 | /* Flush I/O to the device. */ |
1055 | r = __lock_fs(md); | 1051 | r = lock_fs(md); |
1056 | if (r) { | 1052 | if (r) |
1057 | dm_table_put(map); | 1053 | goto out; |
1058 | goto out_read_unlock; | ||
1059 | } | ||
1060 | |||
1061 | up_read(&md->lock); | ||
1062 | 1054 | ||
1063 | /* | 1055 | /* |
1064 | * First we set the BLOCK_IO flag so no more ios will be mapped. | 1056 | * First we set the BLOCK_IO flag so no more ios will be mapped. |
1065 | * | ||
1066 | * If the flag is already set we know another thread is trying to | ||
1067 | * suspend as well, so we leave the fs locked for this thread. | ||
1068 | */ | 1057 | */ |
1069 | r = -EINVAL; | 1058 | down_write(&md->io_lock); |
1070 | down_write(&md->lock); | 1059 | set_bit(DMF_BLOCK_IO, &md->flags); |
1071 | if (test_and_set_bit(DMF_BLOCK_IO, &md->flags)) { | ||
1072 | if (map) | ||
1073 | dm_table_put(map); | ||
1074 | goto out_write_unlock; | ||
1075 | } | ||
1076 | 1060 | ||
1077 | add_wait_queue(&md->wait, &wait); | 1061 | add_wait_queue(&md->wait, &wait); |
1078 | up_write(&md->lock); | 1062 | up_write(&md->io_lock); |
1079 | 1063 | ||
1080 | /* unplug */ | 1064 | /* unplug */ |
1081 | if (map) { | 1065 | if (map) |
1082 | dm_table_unplug_all(map); | 1066 | dm_table_unplug_all(map); |
1083 | dm_table_put(map); | ||
1084 | } | ||
1085 | 1067 | ||
1086 | /* | 1068 | /* |
1087 | * Then we wait for the already mapped ios to | 1069 | * Then we wait for the already mapped ios to |
@@ -1097,32 +1079,28 @@ int dm_suspend(struct mapped_device *md) | |||
1097 | } | 1079 | } |
1098 | set_current_state(TASK_RUNNING); | 1080 | set_current_state(TASK_RUNNING); |
1099 | 1081 | ||
1100 | down_write(&md->lock); | 1082 | down_write(&md->io_lock); |
1101 | remove_wait_queue(&md->wait, &wait); | 1083 | remove_wait_queue(&md->wait, &wait); |
1102 | 1084 | ||
1103 | /* were we interrupted ? */ | 1085 | /* were we interrupted ? */ |
1104 | r = -EINTR; | 1086 | r = -EINTR; |
1105 | if (atomic_read(&md->pending)) | 1087 | if (atomic_read(&md->pending)) { |
1106 | goto out_unfreeze; | 1088 | up_write(&md->io_lock); |
1107 | 1089 | unlock_fs(md); | |
1108 | set_bit(DMF_SUSPENDED, &md->flags); | 1090 | clear_bit(DMF_BLOCK_IO, &md->flags); |
1091 | goto out; | ||
1092 | } | ||
1093 | up_write(&md->io_lock); | ||
1109 | 1094 | ||
1110 | map = dm_get_table(md); | ||
1111 | dm_table_postsuspend_targets(map); | 1095 | dm_table_postsuspend_targets(map); |
1112 | dm_table_put(map); | ||
1113 | up_write(&md->lock); | ||
1114 | 1096 | ||
1115 | return 0; | 1097 | set_bit(DMF_SUSPENDED, &md->flags); |
1116 | 1098 | ||
1117 | out_unfreeze: | 1099 | r = 0; |
1118 | __unlock_fs(md); | ||
1119 | clear_bit(DMF_BLOCK_IO, &md->flags); | ||
1120 | out_write_unlock: | ||
1121 | up_write(&md->lock); | ||
1122 | return r; | ||
1123 | 1100 | ||
1124 | out_read_unlock: | 1101 | out: |
1125 | up_read(&md->lock); | 1102 | dm_table_put(map); |
1103 | up(&md->suspend_lock); | ||
1126 | return r; | 1104 | return r; |
1127 | } | 1105 | } |
1128 | 1106 | ||
@@ -1132,31 +1110,35 @@ int dm_resume(struct mapped_device *md) | |||
1132 | struct bio *def; | 1110 | struct bio *def; |
1133 | struct dm_table *map = NULL; | 1111 | struct dm_table *map = NULL; |
1134 | 1112 | ||
1135 | down_write(&md->lock); | 1113 | down(&md->suspend_lock); |
1136 | if (!dm_suspended(md)) { | 1114 | if (!dm_suspended(md)) |
1137 | up_write(&md->lock); | ||
1138 | goto out; | 1115 | goto out; |
1139 | } | ||
1140 | 1116 | ||
1141 | map = dm_get_table(md); | 1117 | map = dm_get_table(md); |
1142 | if (!map || !dm_table_get_size(map)) { | 1118 | if (!map || !dm_table_get_size(map)) |
1143 | up_write(&md->lock); | ||
1144 | goto out; | 1119 | goto out; |
1145 | } | ||
1146 | 1120 | ||
1147 | dm_table_resume_targets(map); | 1121 | dm_table_resume_targets(map); |
1148 | clear_bit(DMF_SUSPENDED, &md->flags); | 1122 | |
1123 | down_write(&md->io_lock); | ||
1149 | clear_bit(DMF_BLOCK_IO, &md->flags); | 1124 | clear_bit(DMF_BLOCK_IO, &md->flags); |
1150 | 1125 | ||
1151 | def = bio_list_get(&md->deferred); | 1126 | def = bio_list_get(&md->deferred); |
1152 | __flush_deferred_io(md, def); | 1127 | __flush_deferred_io(md, def); |
1153 | up_write(&md->lock); | 1128 | up_write(&md->io_lock); |
1154 | __unlock_fs(md); | 1129 | |
1130 | unlock_fs(md); | ||
1131 | |||
1132 | clear_bit(DMF_SUSPENDED, &md->flags); | ||
1133 | |||
1155 | dm_table_unplug_all(map); | 1134 | dm_table_unplug_all(map); |
1156 | 1135 | ||
1157 | r = 0; | 1136 | r = 0; |
1137 | |||
1158 | out: | 1138 | out: |
1159 | dm_table_put(map); | 1139 | dm_table_put(map); |
1140 | up(&md->suspend_lock); | ||
1141 | |||
1160 | return r; | 1142 | return r; |
1161 | } | 1143 | } |
1162 | 1144 | ||