diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:30:47 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:30:47 -0500 |
commit | f6bcfd94c0a97c11ce9197ade93a08bc8af6e057 (patch) | |
tree | 83d867565b4f2a7627b3288f9e000eaf2b217be9 /drivers/md | |
parent | 509e4aef44eb10e4aef1f81c3c3ff1214671503b (diff) | |
parent | 9d09e663d5502c46f2d9481c04c1087e1c2da698 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (32 commits)
dm: raid456 basic support
dm: per target unplug callback support
dm: introduce target callbacks and congestion callback
dm mpath: delay activate_path retry on SCSI_DH_RETRY
dm: remove superfluous irq disablement in dm_request_fn
dm log: use PTR_ERR value instead of ENOMEM
dm snapshot: avoid storing private suspended state
dm snapshot: persistent make metadata_wq multithreaded
dm: use non reentrant workqueues if equivalent
dm: convert workqueues to alloc_ordered
dm stripe: switch from local workqueue to system_wq
dm: dont use flush_scheduled_work
dm snapshot: remove unused dm_snapshot queued_bios_work
dm ioctl: suppress needless warning messages
dm crypt: add loop aes iv generator
dm crypt: add multi key capability
dm crypt: add post iv call to iv generator
dm crypt: use io thread for reads only if mempool exhausted
dm crypt: scale to multiple cpus
dm crypt: simplify compatible table output
...
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 24 | ||||
-rw-r--r-- | drivers/md/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 618 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 111 | ||||
-rw-r--r-- | drivers/md/dm-kcopyd.c | 57 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-base.c | 139 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 67 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 697 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 19 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 62 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 27 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 19 | ||||
-rw-r--r-- | drivers/md/dm.c | 23 |
17 files changed, 1581 insertions, 292 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index bf1a95e31559..98d9ec85e0eb 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -240,6 +240,30 @@ config DM_MIRROR | |||
240 | Allow volume managers to mirror logical volumes, also | 240 | Allow volume managers to mirror logical volumes, also |
241 | needed for live data migration tools such as 'pvmove'. | 241 | needed for live data migration tools such as 'pvmove'. |
242 | 242 | ||
243 | config DM_RAID | ||
244 | tristate "RAID 4/5/6 target (EXPERIMENTAL)" | ||
245 | depends on BLK_DEV_DM && EXPERIMENTAL | ||
246 | select MD_RAID456 | ||
247 | select BLK_DEV_MD | ||
248 | ---help--- | ||
249 | A dm target that supports RAID4, RAID5 and RAID6 mappings | ||
250 | |||
251 | A RAID-5 set of N drives with a capacity of C MB per drive provides | ||
252 | the capacity of C * (N - 1) MB, and protects against a failure | ||
253 | of a single drive. For a given sector (row) number, (N - 1) drives | ||
254 | contain data sectors, and one drive contains the parity protection. | ||
255 | For a RAID-4 set, the parity blocks are present on a single drive, | ||
256 | while a RAID-5 set distributes the parity across the drives in one | ||
257 | of the available parity distribution methods. | ||
258 | |||
259 | A RAID-6 set of N drives with a capacity of C MB per drive | ||
260 | provides the capacity of C * (N - 2) MB, and protects | ||
261 | against a failure of any two drives. For a given sector | ||
262 | (row) number, (N - 2) drives contain data sectors, and two | ||
263 | drives contains two independent redundancy syndromes. Like | ||
264 | RAID-5, RAID-6 distributes the syndromes across the drives | ||
265 | in one of the available parity distribution methods. | ||
266 | |||
243 | config DM_LOG_USERSPACE | 267 | config DM_LOG_USERSPACE |
244 | tristate "Mirror userspace logging (EXPERIMENTAL)" | 268 | tristate "Mirror userspace logging (EXPERIMENTAL)" |
245 | depends on DM_MIRROR && EXPERIMENTAL && NET | 269 | depends on DM_MIRROR && EXPERIMENTAL && NET |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 5e3aac41919d..d0138606c2e8 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -36,6 +36,7 @@ obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o | |||
36 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o | 36 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o |
37 | obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o | 37 | obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o |
38 | obj-$(CONFIG_DM_ZERO) += dm-zero.o | 38 | obj-$(CONFIG_DM_ZERO) += dm-zero.o |
39 | obj-$(CONFIG_DM_RAID) += dm-raid.o | ||
39 | 40 | ||
40 | ifeq ($(CONFIG_DM_UEVENT),y) | 41 | ifeq ($(CONFIG_DM_UEVENT),y) |
41 | dm-mod-objs += dm-uevent.o | 42 | dm-mod-objs += dm-uevent.o |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d5b0e4c0e702..4e054bd91664 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -18,10 +18,14 @@ | |||
18 | #include <linux/crypto.h> | 18 | #include <linux/crypto.h> |
19 | #include <linux/workqueue.h> | 19 | #include <linux/workqueue.h> |
20 | #include <linux/backing-dev.h> | 20 | #include <linux/backing-dev.h> |
21 | #include <linux/percpu.h> | ||
21 | #include <asm/atomic.h> | 22 | #include <asm/atomic.h> |
22 | #include <linux/scatterlist.h> | 23 | #include <linux/scatterlist.h> |
23 | #include <asm/page.h> | 24 | #include <asm/page.h> |
24 | #include <asm/unaligned.h> | 25 | #include <asm/unaligned.h> |
26 | #include <crypto/hash.h> | ||
27 | #include <crypto/md5.h> | ||
28 | #include <crypto/algapi.h> | ||
25 | 29 | ||
26 | #include <linux/device-mapper.h> | 30 | #include <linux/device-mapper.h> |
27 | 31 | ||
@@ -63,6 +67,7 @@ struct dm_crypt_request { | |||
63 | struct convert_context *ctx; | 67 | struct convert_context *ctx; |
64 | struct scatterlist sg_in; | 68 | struct scatterlist sg_in; |
65 | struct scatterlist sg_out; | 69 | struct scatterlist sg_out; |
70 | sector_t iv_sector; | ||
66 | }; | 71 | }; |
67 | 72 | ||
68 | struct crypt_config; | 73 | struct crypt_config; |
@@ -73,11 +78,13 @@ struct crypt_iv_operations { | |||
73 | void (*dtr)(struct crypt_config *cc); | 78 | void (*dtr)(struct crypt_config *cc); |
74 | int (*init)(struct crypt_config *cc); | 79 | int (*init)(struct crypt_config *cc); |
75 | int (*wipe)(struct crypt_config *cc); | 80 | int (*wipe)(struct crypt_config *cc); |
76 | int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector); | 81 | int (*generator)(struct crypt_config *cc, u8 *iv, |
82 | struct dm_crypt_request *dmreq); | ||
83 | int (*post)(struct crypt_config *cc, u8 *iv, | ||
84 | struct dm_crypt_request *dmreq); | ||
77 | }; | 85 | }; |
78 | 86 | ||
79 | struct iv_essiv_private { | 87 | struct iv_essiv_private { |
80 | struct crypto_cipher *tfm; | ||
81 | struct crypto_hash *hash_tfm; | 88 | struct crypto_hash *hash_tfm; |
82 | u8 *salt; | 89 | u8 *salt; |
83 | }; | 90 | }; |
@@ -86,11 +93,32 @@ struct iv_benbi_private { | |||
86 | int shift; | 93 | int shift; |
87 | }; | 94 | }; |
88 | 95 | ||
96 | #define LMK_SEED_SIZE 64 /* hash + 0 */ | ||
97 | struct iv_lmk_private { | ||
98 | struct crypto_shash *hash_tfm; | ||
99 | u8 *seed; | ||
100 | }; | ||
101 | |||
89 | /* | 102 | /* |
90 | * Crypt: maps a linear range of a block device | 103 | * Crypt: maps a linear range of a block device |
91 | * and encrypts / decrypts at the same time. | 104 | * and encrypts / decrypts at the same time. |
92 | */ | 105 | */ |
93 | enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; | 106 | enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; |
107 | |||
108 | /* | ||
109 | * Duplicated per-CPU state for cipher. | ||
110 | */ | ||
111 | struct crypt_cpu { | ||
112 | struct ablkcipher_request *req; | ||
113 | /* ESSIV: struct crypto_cipher *essiv_tfm */ | ||
114 | void *iv_private; | ||
115 | struct crypto_ablkcipher *tfms[0]; | ||
116 | }; | ||
117 | |||
118 | /* | ||
119 | * The fields in here must be read only after initialization, | ||
120 | * changing state should be in crypt_cpu. | ||
121 | */ | ||
94 | struct crypt_config { | 122 | struct crypt_config { |
95 | struct dm_dev *dev; | 123 | struct dm_dev *dev; |
96 | sector_t start; | 124 | sector_t start; |
@@ -108,17 +136,25 @@ struct crypt_config { | |||
108 | struct workqueue_struct *crypt_queue; | 136 | struct workqueue_struct *crypt_queue; |
109 | 137 | ||
110 | char *cipher; | 138 | char *cipher; |
111 | char *cipher_mode; | 139 | char *cipher_string; |
112 | 140 | ||
113 | struct crypt_iv_operations *iv_gen_ops; | 141 | struct crypt_iv_operations *iv_gen_ops; |
114 | union { | 142 | union { |
115 | struct iv_essiv_private essiv; | 143 | struct iv_essiv_private essiv; |
116 | struct iv_benbi_private benbi; | 144 | struct iv_benbi_private benbi; |
145 | struct iv_lmk_private lmk; | ||
117 | } iv_gen_private; | 146 | } iv_gen_private; |
118 | sector_t iv_offset; | 147 | sector_t iv_offset; |
119 | unsigned int iv_size; | 148 | unsigned int iv_size; |
120 | 149 | ||
121 | /* | 150 | /* |
151 | * Duplicated per cpu state. Access through | ||
152 | * per_cpu_ptr() only. | ||
153 | */ | ||
154 | struct crypt_cpu __percpu *cpu; | ||
155 | unsigned tfms_count; | ||
156 | |||
157 | /* | ||
122 | * Layout of each crypto request: | 158 | * Layout of each crypto request: |
123 | * | 159 | * |
124 | * struct ablkcipher_request | 160 | * struct ablkcipher_request |
@@ -132,11 +168,10 @@ struct crypt_config { | |||
132 | * correctly aligned. | 168 | * correctly aligned. |
133 | */ | 169 | */ |
134 | unsigned int dmreq_start; | 170 | unsigned int dmreq_start; |
135 | struct ablkcipher_request *req; | ||
136 | 171 | ||
137 | struct crypto_ablkcipher *tfm; | ||
138 | unsigned long flags; | 172 | unsigned long flags; |
139 | unsigned int key_size; | 173 | unsigned int key_size; |
174 | unsigned int key_parts; | ||
140 | u8 key[0]; | 175 | u8 key[0]; |
141 | }; | 176 | }; |
142 | 177 | ||
@@ -148,6 +183,20 @@ static struct kmem_cache *_crypt_io_pool; | |||
148 | 183 | ||
149 | static void clone_init(struct dm_crypt_io *, struct bio *); | 184 | static void clone_init(struct dm_crypt_io *, struct bio *); |
150 | static void kcryptd_queue_crypt(struct dm_crypt_io *io); | 185 | static void kcryptd_queue_crypt(struct dm_crypt_io *io); |
186 | static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq); | ||
187 | |||
188 | static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) | ||
189 | { | ||
190 | return this_cpu_ptr(cc->cpu); | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Use this to access cipher attributes that are the same for each CPU. | ||
195 | */ | ||
196 | static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) | ||
197 | { | ||
198 | return __this_cpu_ptr(cc->cpu)->tfms[0]; | ||
199 | } | ||
151 | 200 | ||
152 | /* | 201 | /* |
153 | * Different IV generation algorithms: | 202 | * Different IV generation algorithms: |
@@ -168,23 +217,38 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io); | |||
168 | * null: the initial vector is always zero. Provides compatibility with | 217 | * null: the initial vector is always zero. Provides compatibility with |
169 | * obsolete loop_fish2 devices. Do not use for new devices. | 218 | * obsolete loop_fish2 devices. Do not use for new devices. |
170 | * | 219 | * |
220 | * lmk: Compatible implementation of the block chaining mode used | ||
221 | * by the Loop-AES block device encryption system | ||
222 | * designed by Jari Ruusu. See http://loop-aes.sourceforge.net/ | ||
223 | * It operates on full 512 byte sectors and uses CBC | ||
224 | * with an IV derived from the sector number, the data and | ||
225 | * optionally extra IV seed. | ||
226 | * This means that after decryption the first block | ||
227 | * of sector must be tweaked according to decrypted data. | ||
228 | * Loop-AES can use three encryption schemes: | ||
229 | * version 1: is plain aes-cbc mode | ||
230 | * version 2: uses 64 multikey scheme with lmk IV generator | ||
231 | * version 3: the same as version 2 with additional IV seed | ||
232 | * (it uses 65 keys, last key is used as IV seed) | ||
233 | * | ||
171 | * plumb: unimplemented, see: | 234 | * plumb: unimplemented, see: |
172 | * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 | 235 | * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 |
173 | */ | 236 | */ |
174 | 237 | ||
175 | static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector) | 238 | static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, |
239 | struct dm_crypt_request *dmreq) | ||
176 | { | 240 | { |
177 | memset(iv, 0, cc->iv_size); | 241 | memset(iv, 0, cc->iv_size); |
178 | *(u32 *)iv = cpu_to_le32(sector & 0xffffffff); | 242 | *(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); |
179 | 243 | ||
180 | return 0; | 244 | return 0; |
181 | } | 245 | } |
182 | 246 | ||
183 | static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv, | 247 | static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv, |
184 | sector_t sector) | 248 | struct dm_crypt_request *dmreq) |
185 | { | 249 | { |
186 | memset(iv, 0, cc->iv_size); | 250 | memset(iv, 0, cc->iv_size); |
187 | *(u64 *)iv = cpu_to_le64(sector); | 251 | *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); |
188 | 252 | ||
189 | return 0; | 253 | return 0; |
190 | } | 254 | } |
@@ -195,7 +259,8 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) | |||
195 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; | 259 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; |
196 | struct hash_desc desc; | 260 | struct hash_desc desc; |
197 | struct scatterlist sg; | 261 | struct scatterlist sg; |
198 | int err; | 262 | struct crypto_cipher *essiv_tfm; |
263 | int err, cpu; | ||
199 | 264 | ||
200 | sg_init_one(&sg, cc->key, cc->key_size); | 265 | sg_init_one(&sg, cc->key, cc->key_size); |
201 | desc.tfm = essiv->hash_tfm; | 266 | desc.tfm = essiv->hash_tfm; |
@@ -205,8 +270,16 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) | |||
205 | if (err) | 270 | if (err) |
206 | return err; | 271 | return err; |
207 | 272 | ||
208 | return crypto_cipher_setkey(essiv->tfm, essiv->salt, | 273 | for_each_possible_cpu(cpu) { |
274 | essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private, | ||
275 | |||
276 | err = crypto_cipher_setkey(essiv_tfm, essiv->salt, | ||
209 | crypto_hash_digestsize(essiv->hash_tfm)); | 277 | crypto_hash_digestsize(essiv->hash_tfm)); |
278 | if (err) | ||
279 | return err; | ||
280 | } | ||
281 | |||
282 | return 0; | ||
210 | } | 283 | } |
211 | 284 | ||
212 | /* Wipe salt and reset key derived from volume key */ | 285 | /* Wipe salt and reset key derived from volume key */ |
@@ -214,24 +287,76 @@ static int crypt_iv_essiv_wipe(struct crypt_config *cc) | |||
214 | { | 287 | { |
215 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; | 288 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; |
216 | unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm); | 289 | unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm); |
290 | struct crypto_cipher *essiv_tfm; | ||
291 | int cpu, r, err = 0; | ||
217 | 292 | ||
218 | memset(essiv->salt, 0, salt_size); | 293 | memset(essiv->salt, 0, salt_size); |
219 | 294 | ||
220 | return crypto_cipher_setkey(essiv->tfm, essiv->salt, salt_size); | 295 | for_each_possible_cpu(cpu) { |
296 | essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private; | ||
297 | r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); | ||
298 | if (r) | ||
299 | err = r; | ||
300 | } | ||
301 | |||
302 | return err; | ||
303 | } | ||
304 | |||
305 | /* Set up per cpu cipher state */ | ||
306 | static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc, | ||
307 | struct dm_target *ti, | ||
308 | u8 *salt, unsigned saltsize) | ||
309 | { | ||
310 | struct crypto_cipher *essiv_tfm; | ||
311 | int err; | ||
312 | |||
313 | /* Setup the essiv_tfm with the given salt */ | ||
314 | essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); | ||
315 | if (IS_ERR(essiv_tfm)) { | ||
316 | ti->error = "Error allocating crypto tfm for ESSIV"; | ||
317 | return essiv_tfm; | ||
318 | } | ||
319 | |||
320 | if (crypto_cipher_blocksize(essiv_tfm) != | ||
321 | crypto_ablkcipher_ivsize(any_tfm(cc))) { | ||
322 | ti->error = "Block size of ESSIV cipher does " | ||
323 | "not match IV size of block cipher"; | ||
324 | crypto_free_cipher(essiv_tfm); | ||
325 | return ERR_PTR(-EINVAL); | ||
326 | } | ||
327 | |||
328 | err = crypto_cipher_setkey(essiv_tfm, salt, saltsize); | ||
329 | if (err) { | ||
330 | ti->error = "Failed to set key for ESSIV cipher"; | ||
331 | crypto_free_cipher(essiv_tfm); | ||
332 | return ERR_PTR(err); | ||
333 | } | ||
334 | |||
335 | return essiv_tfm; | ||
221 | } | 336 | } |
222 | 337 | ||
223 | static void crypt_iv_essiv_dtr(struct crypt_config *cc) | 338 | static void crypt_iv_essiv_dtr(struct crypt_config *cc) |
224 | { | 339 | { |
340 | int cpu; | ||
341 | struct crypt_cpu *cpu_cc; | ||
342 | struct crypto_cipher *essiv_tfm; | ||
225 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; | 343 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; |
226 | 344 | ||
227 | crypto_free_cipher(essiv->tfm); | ||
228 | essiv->tfm = NULL; | ||
229 | |||
230 | crypto_free_hash(essiv->hash_tfm); | 345 | crypto_free_hash(essiv->hash_tfm); |
231 | essiv->hash_tfm = NULL; | 346 | essiv->hash_tfm = NULL; |
232 | 347 | ||
233 | kzfree(essiv->salt); | 348 | kzfree(essiv->salt); |
234 | essiv->salt = NULL; | 349 | essiv->salt = NULL; |
350 | |||
351 | for_each_possible_cpu(cpu) { | ||
352 | cpu_cc = per_cpu_ptr(cc->cpu, cpu); | ||
353 | essiv_tfm = cpu_cc->iv_private; | ||
354 | |||
355 | if (essiv_tfm) | ||
356 | crypto_free_cipher(essiv_tfm); | ||
357 | |||
358 | cpu_cc->iv_private = NULL; | ||
359 | } | ||
235 | } | 360 | } |
236 | 361 | ||
237 | static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, | 362 | static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, |
@@ -240,7 +365,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, | |||
240 | struct crypto_cipher *essiv_tfm = NULL; | 365 | struct crypto_cipher *essiv_tfm = NULL; |
241 | struct crypto_hash *hash_tfm = NULL; | 366 | struct crypto_hash *hash_tfm = NULL; |
242 | u8 *salt = NULL; | 367 | u8 *salt = NULL; |
243 | int err; | 368 | int err, cpu; |
244 | 369 | ||
245 | if (!opts) { | 370 | if (!opts) { |
246 | ti->error = "Digest algorithm missing for ESSIV mode"; | 371 | ti->error = "Digest algorithm missing for ESSIV mode"; |
@@ -262,48 +387,44 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, | |||
262 | goto bad; | 387 | goto bad; |
263 | } | 388 | } |
264 | 389 | ||
265 | /* Allocate essiv_tfm */ | ||
266 | essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); | ||
267 | if (IS_ERR(essiv_tfm)) { | ||
268 | ti->error = "Error allocating crypto tfm for ESSIV"; | ||
269 | err = PTR_ERR(essiv_tfm); | ||
270 | goto bad; | ||
271 | } | ||
272 | if (crypto_cipher_blocksize(essiv_tfm) != | ||
273 | crypto_ablkcipher_ivsize(cc->tfm)) { | ||
274 | ti->error = "Block size of ESSIV cipher does " | ||
275 | "not match IV size of block cipher"; | ||
276 | err = -EINVAL; | ||
277 | goto bad; | ||
278 | } | ||
279 | |||
280 | cc->iv_gen_private.essiv.salt = salt; | 390 | cc->iv_gen_private.essiv.salt = salt; |
281 | cc->iv_gen_private.essiv.tfm = essiv_tfm; | ||
282 | cc->iv_gen_private.essiv.hash_tfm = hash_tfm; | 391 | cc->iv_gen_private.essiv.hash_tfm = hash_tfm; |
283 | 392 | ||
393 | for_each_possible_cpu(cpu) { | ||
394 | essiv_tfm = setup_essiv_cpu(cc, ti, salt, | ||
395 | crypto_hash_digestsize(hash_tfm)); | ||
396 | if (IS_ERR(essiv_tfm)) { | ||
397 | crypt_iv_essiv_dtr(cc); | ||
398 | return PTR_ERR(essiv_tfm); | ||
399 | } | ||
400 | per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm; | ||
401 | } | ||
402 | |||
284 | return 0; | 403 | return 0; |
285 | 404 | ||
286 | bad: | 405 | bad: |
287 | if (essiv_tfm && !IS_ERR(essiv_tfm)) | ||
288 | crypto_free_cipher(essiv_tfm); | ||
289 | if (hash_tfm && !IS_ERR(hash_tfm)) | 406 | if (hash_tfm && !IS_ERR(hash_tfm)) |
290 | crypto_free_hash(hash_tfm); | 407 | crypto_free_hash(hash_tfm); |
291 | kfree(salt); | 408 | kfree(salt); |
292 | return err; | 409 | return err; |
293 | } | 410 | } |
294 | 411 | ||
295 | static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector) | 412 | static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, |
413 | struct dm_crypt_request *dmreq) | ||
296 | { | 414 | { |
415 | struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; | ||
416 | |||
297 | memset(iv, 0, cc->iv_size); | 417 | memset(iv, 0, cc->iv_size); |
298 | *(u64 *)iv = cpu_to_le64(sector); | 418 | *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); |
299 | crypto_cipher_encrypt_one(cc->iv_gen_private.essiv.tfm, iv, iv); | 419 | crypto_cipher_encrypt_one(essiv_tfm, iv, iv); |
420 | |||
300 | return 0; | 421 | return 0; |
301 | } | 422 | } |
302 | 423 | ||
303 | static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, | 424 | static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, |
304 | const char *opts) | 425 | const char *opts) |
305 | { | 426 | { |
306 | unsigned bs = crypto_ablkcipher_blocksize(cc->tfm); | 427 | unsigned bs = crypto_ablkcipher_blocksize(any_tfm(cc)); |
307 | int log = ilog2(bs); | 428 | int log = ilog2(bs); |
308 | 429 | ||
309 | /* we need to calculate how far we must shift the sector count | 430 | /* we need to calculate how far we must shift the sector count |
@@ -328,25 +449,177 @@ static void crypt_iv_benbi_dtr(struct crypt_config *cc) | |||
328 | { | 449 | { |
329 | } | 450 | } |
330 | 451 | ||
331 | static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv, sector_t sector) | 452 | static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv, |
453 | struct dm_crypt_request *dmreq) | ||
332 | { | 454 | { |
333 | __be64 val; | 455 | __be64 val; |
334 | 456 | ||
335 | memset(iv, 0, cc->iv_size - sizeof(u64)); /* rest is cleared below */ | 457 | memset(iv, 0, cc->iv_size - sizeof(u64)); /* rest is cleared below */ |
336 | 458 | ||
337 | val = cpu_to_be64(((u64)sector << cc->iv_gen_private.benbi.shift) + 1); | 459 | val = cpu_to_be64(((u64)dmreq->iv_sector << cc->iv_gen_private.benbi.shift) + 1); |
338 | put_unaligned(val, (__be64 *)(iv + cc->iv_size - sizeof(u64))); | 460 | put_unaligned(val, (__be64 *)(iv + cc->iv_size - sizeof(u64))); |
339 | 461 | ||
340 | return 0; | 462 | return 0; |
341 | } | 463 | } |
342 | 464 | ||
343 | static int crypt_iv_null_gen(struct crypt_config *cc, u8 *iv, sector_t sector) | 465 | static int crypt_iv_null_gen(struct crypt_config *cc, u8 *iv, |
466 | struct dm_crypt_request *dmreq) | ||
344 | { | 467 | { |
345 | memset(iv, 0, cc->iv_size); | 468 | memset(iv, 0, cc->iv_size); |
346 | 469 | ||
347 | return 0; | 470 | return 0; |
348 | } | 471 | } |
349 | 472 | ||
473 | static void crypt_iv_lmk_dtr(struct crypt_config *cc) | ||
474 | { | ||
475 | struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; | ||
476 | |||
477 | if (lmk->hash_tfm && !IS_ERR(lmk->hash_tfm)) | ||
478 | crypto_free_shash(lmk->hash_tfm); | ||
479 | lmk->hash_tfm = NULL; | ||
480 | |||
481 | kzfree(lmk->seed); | ||
482 | lmk->seed = NULL; | ||
483 | } | ||
484 | |||
485 | static int crypt_iv_lmk_ctr(struct crypt_config *cc, struct dm_target *ti, | ||
486 | const char *opts) | ||
487 | { | ||
488 | struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; | ||
489 | |||
490 | lmk->hash_tfm = crypto_alloc_shash("md5", 0, 0); | ||
491 | if (IS_ERR(lmk->hash_tfm)) { | ||
492 | ti->error = "Error initializing LMK hash"; | ||
493 | return PTR_ERR(lmk->hash_tfm); | ||
494 | } | ||
495 | |||
496 | /* No seed in LMK version 2 */ | ||
497 | if (cc->key_parts == cc->tfms_count) { | ||
498 | lmk->seed = NULL; | ||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | lmk->seed = kzalloc(LMK_SEED_SIZE, GFP_KERNEL); | ||
503 | if (!lmk->seed) { | ||
504 | crypt_iv_lmk_dtr(cc); | ||
505 | ti->error = "Error kmallocing seed storage in LMK"; | ||
506 | return -ENOMEM; | ||
507 | } | ||
508 | |||
509 | return 0; | ||
510 | } | ||
511 | |||
512 | static int crypt_iv_lmk_init(struct crypt_config *cc) | ||
513 | { | ||
514 | struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; | ||
515 | int subkey_size = cc->key_size / cc->key_parts; | ||
516 | |||
517 | /* LMK seed is on the position of LMK_KEYS + 1 key */ | ||
518 | if (lmk->seed) | ||
519 | memcpy(lmk->seed, cc->key + (cc->tfms_count * subkey_size), | ||
520 | crypto_shash_digestsize(lmk->hash_tfm)); | ||
521 | |||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | static int crypt_iv_lmk_wipe(struct crypt_config *cc) | ||
526 | { | ||
527 | struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; | ||
528 | |||
529 | if (lmk->seed) | ||
530 | memset(lmk->seed, 0, LMK_SEED_SIZE); | ||
531 | |||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, | ||
536 | struct dm_crypt_request *dmreq, | ||
537 | u8 *data) | ||
538 | { | ||
539 | struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; | ||
540 | struct { | ||
541 | struct shash_desc desc; | ||
542 | char ctx[crypto_shash_descsize(lmk->hash_tfm)]; | ||
543 | } sdesc; | ||
544 | struct md5_state md5state; | ||
545 | u32 buf[4]; | ||
546 | int i, r; | ||
547 | |||
548 | sdesc.desc.tfm = lmk->hash_tfm; | ||
549 | sdesc.desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; | ||
550 | |||
551 | r = crypto_shash_init(&sdesc.desc); | ||
552 | if (r) | ||
553 | return r; | ||
554 | |||
555 | if (lmk->seed) { | ||
556 | r = crypto_shash_update(&sdesc.desc, lmk->seed, LMK_SEED_SIZE); | ||
557 | if (r) | ||
558 | return r; | ||
559 | } | ||
560 | |||
561 | /* Sector is always 512B, block size 16, add data of blocks 1-31 */ | ||
562 | r = crypto_shash_update(&sdesc.desc, data + 16, 16 * 31); | ||
563 | if (r) | ||
564 | return r; | ||
565 | |||
566 | /* Sector is cropped to 56 bits here */ | ||
567 | buf[0] = cpu_to_le32(dmreq->iv_sector & 0xFFFFFFFF); | ||
568 | buf[1] = cpu_to_le32((((u64)dmreq->iv_sector >> 32) & 0x00FFFFFF) | 0x80000000); | ||
569 | buf[2] = cpu_to_le32(4024); | ||
570 | buf[3] = 0; | ||
571 | r = crypto_shash_update(&sdesc.desc, (u8 *)buf, sizeof(buf)); | ||
572 | if (r) | ||
573 | return r; | ||
574 | |||
575 | /* No MD5 padding here */ | ||
576 | r = crypto_shash_export(&sdesc.desc, &md5state); | ||
577 | if (r) | ||
578 | return r; | ||
579 | |||
580 | for (i = 0; i < MD5_HASH_WORDS; i++) | ||
581 | __cpu_to_le32s(&md5state.hash[i]); | ||
582 | memcpy(iv, &md5state.hash, cc->iv_size); | ||
583 | |||
584 | return 0; | ||
585 | } | ||
586 | |||
587 | static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv, | ||
588 | struct dm_crypt_request *dmreq) | ||
589 | { | ||
590 | u8 *src; | ||
591 | int r = 0; | ||
592 | |||
593 | if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) { | ||
594 | src = kmap_atomic(sg_page(&dmreq->sg_in), KM_USER0); | ||
595 | r = crypt_iv_lmk_one(cc, iv, dmreq, src + dmreq->sg_in.offset); | ||
596 | kunmap_atomic(src, KM_USER0); | ||
597 | } else | ||
598 | memset(iv, 0, cc->iv_size); | ||
599 | |||
600 | return r; | ||
601 | } | ||
602 | |||
603 | static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv, | ||
604 | struct dm_crypt_request *dmreq) | ||
605 | { | ||
606 | u8 *dst; | ||
607 | int r; | ||
608 | |||
609 | if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) | ||
610 | return 0; | ||
611 | |||
612 | dst = kmap_atomic(sg_page(&dmreq->sg_out), KM_USER0); | ||
613 | r = crypt_iv_lmk_one(cc, iv, dmreq, dst + dmreq->sg_out.offset); | ||
614 | |||
615 | /* Tweak the first block of plaintext sector */ | ||
616 | if (!r) | ||
617 | crypto_xor(dst + dmreq->sg_out.offset, iv, cc->iv_size); | ||
618 | |||
619 | kunmap_atomic(dst, KM_USER0); | ||
620 | return r; | ||
621 | } | ||
622 | |||
350 | static struct crypt_iv_operations crypt_iv_plain_ops = { | 623 | static struct crypt_iv_operations crypt_iv_plain_ops = { |
351 | .generator = crypt_iv_plain_gen | 624 | .generator = crypt_iv_plain_gen |
352 | }; | 625 | }; |
@@ -373,6 +646,15 @@ static struct crypt_iv_operations crypt_iv_null_ops = { | |||
373 | .generator = crypt_iv_null_gen | 646 | .generator = crypt_iv_null_gen |
374 | }; | 647 | }; |
375 | 648 | ||
649 | static struct crypt_iv_operations crypt_iv_lmk_ops = { | ||
650 | .ctr = crypt_iv_lmk_ctr, | ||
651 | .dtr = crypt_iv_lmk_dtr, | ||
652 | .init = crypt_iv_lmk_init, | ||
653 | .wipe = crypt_iv_lmk_wipe, | ||
654 | .generator = crypt_iv_lmk_gen, | ||
655 | .post = crypt_iv_lmk_post | ||
656 | }; | ||
657 | |||
376 | static void crypt_convert_init(struct crypt_config *cc, | 658 | static void crypt_convert_init(struct crypt_config *cc, |
377 | struct convert_context *ctx, | 659 | struct convert_context *ctx, |
378 | struct bio *bio_out, struct bio *bio_in, | 660 | struct bio *bio_out, struct bio *bio_in, |
@@ -400,6 +682,13 @@ static struct ablkcipher_request *req_of_dmreq(struct crypt_config *cc, | |||
400 | return (struct ablkcipher_request *)((char *)dmreq - cc->dmreq_start); | 682 | return (struct ablkcipher_request *)((char *)dmreq - cc->dmreq_start); |
401 | } | 683 | } |
402 | 684 | ||
685 | static u8 *iv_of_dmreq(struct crypt_config *cc, | ||
686 | struct dm_crypt_request *dmreq) | ||
687 | { | ||
688 | return (u8 *)ALIGN((unsigned long)(dmreq + 1), | ||
689 | crypto_ablkcipher_alignmask(any_tfm(cc)) + 1); | ||
690 | } | ||
691 | |||
403 | static int crypt_convert_block(struct crypt_config *cc, | 692 | static int crypt_convert_block(struct crypt_config *cc, |
404 | struct convert_context *ctx, | 693 | struct convert_context *ctx, |
405 | struct ablkcipher_request *req) | 694 | struct ablkcipher_request *req) |
@@ -411,9 +700,9 @@ static int crypt_convert_block(struct crypt_config *cc, | |||
411 | int r = 0; | 700 | int r = 0; |
412 | 701 | ||
413 | dmreq = dmreq_of_req(cc, req); | 702 | dmreq = dmreq_of_req(cc, req); |
414 | iv = (u8 *)ALIGN((unsigned long)(dmreq + 1), | 703 | iv = iv_of_dmreq(cc, dmreq); |
415 | crypto_ablkcipher_alignmask(cc->tfm) + 1); | ||
416 | 704 | ||
705 | dmreq->iv_sector = ctx->sector; | ||
417 | dmreq->ctx = ctx; | 706 | dmreq->ctx = ctx; |
418 | sg_init_table(&dmreq->sg_in, 1); | 707 | sg_init_table(&dmreq->sg_in, 1); |
419 | sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, | 708 | sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, |
@@ -436,7 +725,7 @@ static int crypt_convert_block(struct crypt_config *cc, | |||
436 | } | 725 | } |
437 | 726 | ||
438 | if (cc->iv_gen_ops) { | 727 | if (cc->iv_gen_ops) { |
439 | r = cc->iv_gen_ops->generator(cc, iv, ctx->sector); | 728 | r = cc->iv_gen_ops->generator(cc, iv, dmreq); |
440 | if (r < 0) | 729 | if (r < 0) |
441 | return r; | 730 | return r; |
442 | } | 731 | } |
@@ -449,21 +738,28 @@ static int crypt_convert_block(struct crypt_config *cc, | |||
449 | else | 738 | else |
450 | r = crypto_ablkcipher_decrypt(req); | 739 | r = crypto_ablkcipher_decrypt(req); |
451 | 740 | ||
741 | if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) | ||
742 | r = cc->iv_gen_ops->post(cc, iv, dmreq); | ||
743 | |||
452 | return r; | 744 | return r; |
453 | } | 745 | } |
454 | 746 | ||
455 | static void kcryptd_async_done(struct crypto_async_request *async_req, | 747 | static void kcryptd_async_done(struct crypto_async_request *async_req, |
456 | int error); | 748 | int error); |
749 | |||
457 | static void crypt_alloc_req(struct crypt_config *cc, | 750 | static void crypt_alloc_req(struct crypt_config *cc, |
458 | struct convert_context *ctx) | 751 | struct convert_context *ctx) |
459 | { | 752 | { |
460 | if (!cc->req) | 753 | struct crypt_cpu *this_cc = this_crypt_config(cc); |
461 | cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); | 754 | unsigned key_index = ctx->sector & (cc->tfms_count - 1); |
462 | ablkcipher_request_set_tfm(cc->req, cc->tfm); | 755 | |
463 | ablkcipher_request_set_callback(cc->req, CRYPTO_TFM_REQ_MAY_BACKLOG | | 756 | if (!this_cc->req) |
464 | CRYPTO_TFM_REQ_MAY_SLEEP, | 757 | this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); |
465 | kcryptd_async_done, | 758 | |
466 | dmreq_of_req(cc, cc->req)); | 759 | ablkcipher_request_set_tfm(this_cc->req, this_cc->tfms[key_index]); |
760 | ablkcipher_request_set_callback(this_cc->req, | ||
761 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, | ||
762 | kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); | ||
467 | } | 763 | } |
468 | 764 | ||
469 | /* | 765 | /* |
@@ -472,6 +768,7 @@ static void crypt_alloc_req(struct crypt_config *cc, | |||
472 | static int crypt_convert(struct crypt_config *cc, | 768 | static int crypt_convert(struct crypt_config *cc, |
473 | struct convert_context *ctx) | 769 | struct convert_context *ctx) |
474 | { | 770 | { |
771 | struct crypt_cpu *this_cc = this_crypt_config(cc); | ||
475 | int r; | 772 | int r; |
476 | 773 | ||
477 | atomic_set(&ctx->pending, 1); | 774 | atomic_set(&ctx->pending, 1); |
@@ -483,7 +780,7 @@ static int crypt_convert(struct crypt_config *cc, | |||
483 | 780 | ||
484 | atomic_inc(&ctx->pending); | 781 | atomic_inc(&ctx->pending); |
485 | 782 | ||
486 | r = crypt_convert_block(cc, ctx, cc->req); | 783 | r = crypt_convert_block(cc, ctx, this_cc->req); |
487 | 784 | ||
488 | switch (r) { | 785 | switch (r) { |
489 | /* async */ | 786 | /* async */ |
@@ -492,7 +789,7 @@ static int crypt_convert(struct crypt_config *cc, | |||
492 | INIT_COMPLETION(ctx->restart); | 789 | INIT_COMPLETION(ctx->restart); |
493 | /* fall through*/ | 790 | /* fall through*/ |
494 | case -EINPROGRESS: | 791 | case -EINPROGRESS: |
495 | cc->req = NULL; | 792 | this_cc->req = NULL; |
496 | ctx->sector++; | 793 | ctx->sector++; |
497 | continue; | 794 | continue; |
498 | 795 | ||
@@ -651,6 +948,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io) | |||
651 | * They must be separated as otherwise the final stages could be | 948 | * They must be separated as otherwise the final stages could be |
652 | * starved by new requests which can block in the first stages due | 949 | * starved by new requests which can block in the first stages due |
653 | * to memory allocation. | 950 | * to memory allocation. |
951 | * | ||
952 | * The work is done per CPU global for all dm-crypt instances. | ||
953 | * They should not depend on each other and do not block. | ||
654 | */ | 954 | */ |
655 | static void crypt_endio(struct bio *clone, int error) | 955 | static void crypt_endio(struct bio *clone, int error) |
656 | { | 956 | { |
@@ -691,26 +991,30 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) | |||
691 | clone->bi_destructor = dm_crypt_bio_destructor; | 991 | clone->bi_destructor = dm_crypt_bio_destructor; |
692 | } | 992 | } |
693 | 993 | ||
694 | static void kcryptd_io_read(struct dm_crypt_io *io) | 994 | static void kcryptd_unplug(struct crypt_config *cc) |
995 | { | ||
996 | blk_unplug(bdev_get_queue(cc->dev->bdev)); | ||
997 | } | ||
998 | |||
999 | static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) | ||
695 | { | 1000 | { |
696 | struct crypt_config *cc = io->target->private; | 1001 | struct crypt_config *cc = io->target->private; |
697 | struct bio *base_bio = io->base_bio; | 1002 | struct bio *base_bio = io->base_bio; |
698 | struct bio *clone; | 1003 | struct bio *clone; |
699 | 1004 | ||
700 | crypt_inc_pending(io); | ||
701 | |||
702 | /* | 1005 | /* |
703 | * The block layer might modify the bvec array, so always | 1006 | * The block layer might modify the bvec array, so always |
704 | * copy the required bvecs because we need the original | 1007 | * copy the required bvecs because we need the original |
705 | * one in order to decrypt the whole bio data *afterwards*. | 1008 | * one in order to decrypt the whole bio data *afterwards*. |
706 | */ | 1009 | */ |
707 | clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs); | 1010 | clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs); |
708 | if (unlikely(!clone)) { | 1011 | if (!clone) { |
709 | io->error = -ENOMEM; | 1012 | kcryptd_unplug(cc); |
710 | crypt_dec_pending(io); | 1013 | return 1; |
711 | return; | ||
712 | } | 1014 | } |
713 | 1015 | ||
1016 | crypt_inc_pending(io); | ||
1017 | |||
714 | clone_init(io, clone); | 1018 | clone_init(io, clone); |
715 | clone->bi_idx = 0; | 1019 | clone->bi_idx = 0; |
716 | clone->bi_vcnt = bio_segments(base_bio); | 1020 | clone->bi_vcnt = bio_segments(base_bio); |
@@ -720,6 +1024,7 @@ static void kcryptd_io_read(struct dm_crypt_io *io) | |||
720 | sizeof(struct bio_vec) * clone->bi_vcnt); | 1024 | sizeof(struct bio_vec) * clone->bi_vcnt); |
721 | 1025 | ||
722 | generic_make_request(clone); | 1026 | generic_make_request(clone); |
1027 | return 0; | ||
723 | } | 1028 | } |
724 | 1029 | ||
725 | static void kcryptd_io_write(struct dm_crypt_io *io) | 1030 | static void kcryptd_io_write(struct dm_crypt_io *io) |
@@ -732,9 +1037,12 @@ static void kcryptd_io(struct work_struct *work) | |||
732 | { | 1037 | { |
733 | struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); | 1038 | struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); |
734 | 1039 | ||
735 | if (bio_data_dir(io->base_bio) == READ) | 1040 | if (bio_data_dir(io->base_bio) == READ) { |
736 | kcryptd_io_read(io); | 1041 | crypt_inc_pending(io); |
737 | else | 1042 | if (kcryptd_io_read(io, GFP_NOIO)) |
1043 | io->error = -ENOMEM; | ||
1044 | crypt_dec_pending(io); | ||
1045 | } else | ||
738 | kcryptd_io_write(io); | 1046 | kcryptd_io_write(io); |
739 | } | 1047 | } |
740 | 1048 | ||
@@ -901,6 +1209,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, | |||
901 | return; | 1209 | return; |
902 | } | 1210 | } |
903 | 1211 | ||
1212 | if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post) | ||
1213 | error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq); | ||
1214 | |||
904 | mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); | 1215 | mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); |
905 | 1216 | ||
906 | if (!atomic_dec_and_test(&ctx->pending)) | 1217 | if (!atomic_dec_and_test(&ctx->pending)) |
@@ -971,34 +1282,84 @@ static void crypt_encode_key(char *hex, u8 *key, unsigned int size) | |||
971 | } | 1282 | } |
972 | } | 1283 | } |
973 | 1284 | ||
974 | static int crypt_set_key(struct crypt_config *cc, char *key) | 1285 | static void crypt_free_tfms(struct crypt_config *cc, int cpu) |
975 | { | 1286 | { |
976 | unsigned key_size = strlen(key) >> 1; | 1287 | struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); |
1288 | unsigned i; | ||
977 | 1289 | ||
978 | if (cc->key_size && cc->key_size != key_size) | 1290 | for (i = 0; i < cc->tfms_count; i++) |
1291 | if (cpu_cc->tfms[i] && !IS_ERR(cpu_cc->tfms[i])) { | ||
1292 | crypto_free_ablkcipher(cpu_cc->tfms[i]); | ||
1293 | cpu_cc->tfms[i] = NULL; | ||
1294 | } | ||
1295 | } | ||
1296 | |||
1297 | static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode) | ||
1298 | { | ||
1299 | struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); | ||
1300 | unsigned i; | ||
1301 | int err; | ||
1302 | |||
1303 | for (i = 0; i < cc->tfms_count; i++) { | ||
1304 | cpu_cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0); | ||
1305 | if (IS_ERR(cpu_cc->tfms[i])) { | ||
1306 | err = PTR_ERR(cpu_cc->tfms[i]); | ||
1307 | crypt_free_tfms(cc, cpu); | ||
1308 | return err; | ||
1309 | } | ||
1310 | } | ||
1311 | |||
1312 | return 0; | ||
1313 | } | ||
1314 | |||
1315 | static int crypt_setkey_allcpus(struct crypt_config *cc) | ||
1316 | { | ||
1317 | unsigned subkey_size = cc->key_size >> ilog2(cc->tfms_count); | ||
1318 | int cpu, err = 0, i, r; | ||
1319 | |||
1320 | for_each_possible_cpu(cpu) { | ||
1321 | for (i = 0; i < cc->tfms_count; i++) { | ||
1322 | r = crypto_ablkcipher_setkey(per_cpu_ptr(cc->cpu, cpu)->tfms[i], | ||
1323 | cc->key + (i * subkey_size), subkey_size); | ||
1324 | if (r) | ||
1325 | err = r; | ||
1326 | } | ||
1327 | } | ||
1328 | |||
1329 | return err; | ||
1330 | } | ||
1331 | |||
1332 | static int crypt_set_key(struct crypt_config *cc, char *key) | ||
1333 | { | ||
1334 | /* The key size may not be changed. */ | ||
1335 | if (cc->key_size != (strlen(key) >> 1)) | ||
979 | return -EINVAL; | 1336 | return -EINVAL; |
980 | 1337 | ||
981 | cc->key_size = key_size; /* initial settings */ | 1338 | /* Hyphen (which gives a key_size of zero) means there is no key. */ |
1339 | if (!cc->key_size && strcmp(key, "-")) | ||
1340 | return -EINVAL; | ||
982 | 1341 | ||
983 | if ((!key_size && strcmp(key, "-")) || | 1342 | if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) |
984 | (key_size && crypt_decode_key(cc->key, key, key_size) < 0)) | ||
985 | return -EINVAL; | 1343 | return -EINVAL; |
986 | 1344 | ||
987 | set_bit(DM_CRYPT_KEY_VALID, &cc->flags); | 1345 | set_bit(DM_CRYPT_KEY_VALID, &cc->flags); |
988 | 1346 | ||
989 | return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size); | 1347 | return crypt_setkey_allcpus(cc); |
990 | } | 1348 | } |
991 | 1349 | ||
992 | static int crypt_wipe_key(struct crypt_config *cc) | 1350 | static int crypt_wipe_key(struct crypt_config *cc) |
993 | { | 1351 | { |
994 | clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); | 1352 | clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); |
995 | memset(&cc->key, 0, cc->key_size * sizeof(u8)); | 1353 | memset(&cc->key, 0, cc->key_size * sizeof(u8)); |
996 | return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size); | 1354 | |
1355 | return crypt_setkey_allcpus(cc); | ||
997 | } | 1356 | } |
998 | 1357 | ||
999 | static void crypt_dtr(struct dm_target *ti) | 1358 | static void crypt_dtr(struct dm_target *ti) |
1000 | { | 1359 | { |
1001 | struct crypt_config *cc = ti->private; | 1360 | struct crypt_config *cc = ti->private; |
1361 | struct crypt_cpu *cpu_cc; | ||
1362 | int cpu; | ||
1002 | 1363 | ||
1003 | ti->private = NULL; | 1364 | ti->private = NULL; |
1004 | 1365 | ||
@@ -1010,6 +1371,14 @@ static void crypt_dtr(struct dm_target *ti) | |||
1010 | if (cc->crypt_queue) | 1371 | if (cc->crypt_queue) |
1011 | destroy_workqueue(cc->crypt_queue); | 1372 | destroy_workqueue(cc->crypt_queue); |
1012 | 1373 | ||
1374 | if (cc->cpu) | ||
1375 | for_each_possible_cpu(cpu) { | ||
1376 | cpu_cc = per_cpu_ptr(cc->cpu, cpu); | ||
1377 | if (cpu_cc->req) | ||
1378 | mempool_free(cpu_cc->req, cc->req_pool); | ||
1379 | crypt_free_tfms(cc, cpu); | ||
1380 | } | ||
1381 | |||
1013 | if (cc->bs) | 1382 | if (cc->bs) |
1014 | bioset_free(cc->bs); | 1383 | bioset_free(cc->bs); |
1015 | 1384 | ||
@@ -1023,14 +1392,14 @@ static void crypt_dtr(struct dm_target *ti) | |||
1023 | if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) | 1392 | if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) |
1024 | cc->iv_gen_ops->dtr(cc); | 1393 | cc->iv_gen_ops->dtr(cc); |
1025 | 1394 | ||
1026 | if (cc->tfm && !IS_ERR(cc->tfm)) | ||
1027 | crypto_free_ablkcipher(cc->tfm); | ||
1028 | |||
1029 | if (cc->dev) | 1395 | if (cc->dev) |
1030 | dm_put_device(ti, cc->dev); | 1396 | dm_put_device(ti, cc->dev); |
1031 | 1397 | ||
1398 | if (cc->cpu) | ||
1399 | free_percpu(cc->cpu); | ||
1400 | |||
1032 | kzfree(cc->cipher); | 1401 | kzfree(cc->cipher); |
1033 | kzfree(cc->cipher_mode); | 1402 | kzfree(cc->cipher_string); |
1034 | 1403 | ||
1035 | /* Must zero key material before freeing */ | 1404 | /* Must zero key material before freeing */ |
1036 | kzfree(cc); | 1405 | kzfree(cc); |
@@ -1040,9 +1409,9 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1040 | char *cipher_in, char *key) | 1409 | char *cipher_in, char *key) |
1041 | { | 1410 | { |
1042 | struct crypt_config *cc = ti->private; | 1411 | struct crypt_config *cc = ti->private; |
1043 | char *tmp, *cipher, *chainmode, *ivmode, *ivopts; | 1412 | char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount; |
1044 | char *cipher_api = NULL; | 1413 | char *cipher_api = NULL; |
1045 | int ret = -EINVAL; | 1414 | int cpu, ret = -EINVAL; |
1046 | 1415 | ||
1047 | /* Convert to crypto api definition? */ | 1416 | /* Convert to crypto api definition? */ |
1048 | if (strchr(cipher_in, '(')) { | 1417 | if (strchr(cipher_in, '(')) { |
@@ -1050,23 +1419,31 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1050 | return -EINVAL; | 1419 | return -EINVAL; |
1051 | } | 1420 | } |
1052 | 1421 | ||
1422 | cc->cipher_string = kstrdup(cipher_in, GFP_KERNEL); | ||
1423 | if (!cc->cipher_string) | ||
1424 | goto bad_mem; | ||
1425 | |||
1053 | /* | 1426 | /* |
1054 | * Legacy dm-crypt cipher specification | 1427 | * Legacy dm-crypt cipher specification |
1055 | * cipher-mode-iv:ivopts | 1428 | * cipher[:keycount]-mode-iv:ivopts |
1056 | */ | 1429 | */ |
1057 | tmp = cipher_in; | 1430 | tmp = cipher_in; |
1058 | cipher = strsep(&tmp, "-"); | 1431 | keycount = strsep(&tmp, "-"); |
1432 | cipher = strsep(&keycount, ":"); | ||
1433 | |||
1434 | if (!keycount) | ||
1435 | cc->tfms_count = 1; | ||
1436 | else if (sscanf(keycount, "%u", &cc->tfms_count) != 1 || | ||
1437 | !is_power_of_2(cc->tfms_count)) { | ||
1438 | ti->error = "Bad cipher key count specification"; | ||
1439 | return -EINVAL; | ||
1440 | } | ||
1441 | cc->key_parts = cc->tfms_count; | ||
1059 | 1442 | ||
1060 | cc->cipher = kstrdup(cipher, GFP_KERNEL); | 1443 | cc->cipher = kstrdup(cipher, GFP_KERNEL); |
1061 | if (!cc->cipher) | 1444 | if (!cc->cipher) |
1062 | goto bad_mem; | 1445 | goto bad_mem; |
1063 | 1446 | ||
1064 | if (tmp) { | ||
1065 | cc->cipher_mode = kstrdup(tmp, GFP_KERNEL); | ||
1066 | if (!cc->cipher_mode) | ||
1067 | goto bad_mem; | ||
1068 | } | ||
1069 | |||
1070 | chainmode = strsep(&tmp, "-"); | 1447 | chainmode = strsep(&tmp, "-"); |
1071 | ivopts = strsep(&tmp, "-"); | 1448 | ivopts = strsep(&tmp, "-"); |
1072 | ivmode = strsep(&ivopts, ":"); | 1449 | ivmode = strsep(&ivopts, ":"); |
@@ -1074,10 +1451,19 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1074 | if (tmp) | 1451 | if (tmp) |
1075 | DMWARN("Ignoring unexpected additional cipher options"); | 1452 | DMWARN("Ignoring unexpected additional cipher options"); |
1076 | 1453 | ||
1077 | /* Compatibility mode for old dm-crypt mappings */ | 1454 | cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)) + |
1455 | cc->tfms_count * sizeof(*(cc->cpu->tfms)), | ||
1456 | __alignof__(struct crypt_cpu)); | ||
1457 | if (!cc->cpu) { | ||
1458 | ti->error = "Cannot allocate per cpu state"; | ||
1459 | goto bad_mem; | ||
1460 | } | ||
1461 | |||
1462 | /* | ||
1463 | * For compatibility with the original dm-crypt mapping format, if | ||
1464 | * only the cipher name is supplied, use cbc-plain. | ||
1465 | */ | ||
1078 | if (!chainmode || (!strcmp(chainmode, "plain") && !ivmode)) { | 1466 | if (!chainmode || (!strcmp(chainmode, "plain") && !ivmode)) { |
1079 | kfree(cc->cipher_mode); | ||
1080 | cc->cipher_mode = kstrdup("cbc-plain", GFP_KERNEL); | ||
1081 | chainmode = "cbc"; | 1467 | chainmode = "cbc"; |
1082 | ivmode = "plain"; | 1468 | ivmode = "plain"; |
1083 | } | 1469 | } |
@@ -1099,11 +1485,12 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1099 | } | 1485 | } |
1100 | 1486 | ||
1101 | /* Allocate cipher */ | 1487 | /* Allocate cipher */ |
1102 | cc->tfm = crypto_alloc_ablkcipher(cipher_api, 0, 0); | 1488 | for_each_possible_cpu(cpu) { |
1103 | if (IS_ERR(cc->tfm)) { | 1489 | ret = crypt_alloc_tfms(cc, cpu, cipher_api); |
1104 | ret = PTR_ERR(cc->tfm); | 1490 | if (ret < 0) { |
1105 | ti->error = "Error allocating crypto tfm"; | 1491 | ti->error = "Error allocating crypto tfm"; |
1106 | goto bad; | 1492 | goto bad; |
1493 | } | ||
1107 | } | 1494 | } |
1108 | 1495 | ||
1109 | /* Initialize and set key */ | 1496 | /* Initialize and set key */ |
@@ -1114,7 +1501,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1114 | } | 1501 | } |
1115 | 1502 | ||
1116 | /* Initialize IV */ | 1503 | /* Initialize IV */ |
1117 | cc->iv_size = crypto_ablkcipher_ivsize(cc->tfm); | 1504 | cc->iv_size = crypto_ablkcipher_ivsize(any_tfm(cc)); |
1118 | if (cc->iv_size) | 1505 | if (cc->iv_size) |
1119 | /* at least a 64 bit sector number should fit in our buffer */ | 1506 | /* at least a 64 bit sector number should fit in our buffer */ |
1120 | cc->iv_size = max(cc->iv_size, | 1507 | cc->iv_size = max(cc->iv_size, |
@@ -1137,7 +1524,15 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1137 | cc->iv_gen_ops = &crypt_iv_benbi_ops; | 1524 | cc->iv_gen_ops = &crypt_iv_benbi_ops; |
1138 | else if (strcmp(ivmode, "null") == 0) | 1525 | else if (strcmp(ivmode, "null") == 0) |
1139 | cc->iv_gen_ops = &crypt_iv_null_ops; | 1526 | cc->iv_gen_ops = &crypt_iv_null_ops; |
1140 | else { | 1527 | else if (strcmp(ivmode, "lmk") == 0) { |
1528 | cc->iv_gen_ops = &crypt_iv_lmk_ops; | ||
1529 | /* Version 2 and 3 is recognised according | ||
1530 | * to length of provided multi-key string. | ||
1531 | * If present (version 3), last key is used as IV seed. | ||
1532 | */ | ||
1533 | if (cc->key_size % cc->key_parts) | ||
1534 | cc->key_parts++; | ||
1535 | } else { | ||
1141 | ret = -EINVAL; | 1536 | ret = -EINVAL; |
1142 | ti->error = "Invalid IV mode"; | 1537 | ti->error = "Invalid IV mode"; |
1143 | goto bad; | 1538 | goto bad; |
@@ -1194,6 +1589,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1194 | ti->error = "Cannot allocate encryption context"; | 1589 | ti->error = "Cannot allocate encryption context"; |
1195 | return -ENOMEM; | 1590 | return -ENOMEM; |
1196 | } | 1591 | } |
1592 | cc->key_size = key_size; | ||
1197 | 1593 | ||
1198 | ti->private = cc; | 1594 | ti->private = cc; |
1199 | ret = crypt_ctr_cipher(ti, argv[0], argv[1]); | 1595 | ret = crypt_ctr_cipher(ti, argv[0], argv[1]); |
@@ -1208,9 +1604,9 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1208 | } | 1604 | } |
1209 | 1605 | ||
1210 | cc->dmreq_start = sizeof(struct ablkcipher_request); | 1606 | cc->dmreq_start = sizeof(struct ablkcipher_request); |
1211 | cc->dmreq_start += crypto_ablkcipher_reqsize(cc->tfm); | 1607 | cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); |
1212 | cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment()); | 1608 | cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment()); |
1213 | cc->dmreq_start += crypto_ablkcipher_alignmask(cc->tfm) & | 1609 | cc->dmreq_start += crypto_ablkcipher_alignmask(any_tfm(cc)) & |
1214 | ~(crypto_tfm_ctx_alignment() - 1); | 1610 | ~(crypto_tfm_ctx_alignment() - 1); |
1215 | 1611 | ||
1216 | cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + | 1612 | cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + |
@@ -1219,7 +1615,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1219 | ti->error = "Cannot allocate crypt request mempool"; | 1615 | ti->error = "Cannot allocate crypt request mempool"; |
1220 | goto bad; | 1616 | goto bad; |
1221 | } | 1617 | } |
1222 | cc->req = NULL; | ||
1223 | 1618 | ||
1224 | cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); | 1619 | cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); |
1225 | if (!cc->page_pool) { | 1620 | if (!cc->page_pool) { |
@@ -1252,13 +1647,20 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1252 | cc->start = tmpll; | 1647 | cc->start = tmpll; |
1253 | 1648 | ||
1254 | ret = -ENOMEM; | 1649 | ret = -ENOMEM; |
1255 | cc->io_queue = create_singlethread_workqueue("kcryptd_io"); | 1650 | cc->io_queue = alloc_workqueue("kcryptd_io", |
1651 | WQ_NON_REENTRANT| | ||
1652 | WQ_MEM_RECLAIM, | ||
1653 | 1); | ||
1256 | if (!cc->io_queue) { | 1654 | if (!cc->io_queue) { |
1257 | ti->error = "Couldn't create kcryptd io queue"; | 1655 | ti->error = "Couldn't create kcryptd io queue"; |
1258 | goto bad; | 1656 | goto bad; |
1259 | } | 1657 | } |
1260 | 1658 | ||
1261 | cc->crypt_queue = create_singlethread_workqueue("kcryptd"); | 1659 | cc->crypt_queue = alloc_workqueue("kcryptd", |
1660 | WQ_NON_REENTRANT| | ||
1661 | WQ_CPU_INTENSIVE| | ||
1662 | WQ_MEM_RECLAIM, | ||
1663 | 1); | ||
1262 | if (!cc->crypt_queue) { | 1664 | if (!cc->crypt_queue) { |
1263 | ti->error = "Couldn't create kcryptd queue"; | 1665 | ti->error = "Couldn't create kcryptd queue"; |
1264 | goto bad; | 1666 | goto bad; |
@@ -1286,9 +1688,10 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
1286 | 1688 | ||
1287 | io = crypt_io_alloc(ti, bio, dm_target_offset(ti, bio->bi_sector)); | 1689 | io = crypt_io_alloc(ti, bio, dm_target_offset(ti, bio->bi_sector)); |
1288 | 1690 | ||
1289 | if (bio_data_dir(io->base_bio) == READ) | 1691 | if (bio_data_dir(io->base_bio) == READ) { |
1290 | kcryptd_queue_io(io); | 1692 | if (kcryptd_io_read(io, GFP_NOWAIT)) |
1291 | else | 1693 | kcryptd_queue_io(io); |
1694 | } else | ||
1292 | kcryptd_queue_crypt(io); | 1695 | kcryptd_queue_crypt(io); |
1293 | 1696 | ||
1294 | return DM_MAPIO_SUBMITTED; | 1697 | return DM_MAPIO_SUBMITTED; |
@@ -1306,10 +1709,7 @@ static int crypt_status(struct dm_target *ti, status_type_t type, | |||
1306 | break; | 1709 | break; |
1307 | 1710 | ||
1308 | case STATUSTYPE_TABLE: | 1711 | case STATUSTYPE_TABLE: |
1309 | if (cc->cipher_mode) | 1712 | DMEMIT("%s ", cc->cipher_string); |
1310 | DMEMIT("%s-%s ", cc->cipher, cc->cipher_mode); | ||
1311 | else | ||
1312 | DMEMIT("%s ", cc->cipher); | ||
1313 | 1713 | ||
1314 | if (cc->key_size > 0) { | 1714 | if (cc->key_size > 0) { |
1315 | if ((maxlen - sz) < ((cc->key_size << 1) + 1)) | 1715 | if ((maxlen - sz) < ((cc->key_size << 1) + 1)) |
@@ -1421,7 +1821,7 @@ static int crypt_iterate_devices(struct dm_target *ti, | |||
1421 | 1821 | ||
1422 | static struct target_type crypt_target = { | 1822 | static struct target_type crypt_target = { |
1423 | .name = "crypt", | 1823 | .name = "crypt", |
1424 | .version = {1, 7, 0}, | 1824 | .version = {1, 10, 0}, |
1425 | .module = THIS_MODULE, | 1825 | .module = THIS_MODULE, |
1426 | .ctr = crypt_ctr, | 1826 | .ctr = crypt_ctr, |
1427 | .dtr = crypt_dtr, | 1827 | .dtr = crypt_dtr, |
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index baa11912cc94..f18375dcedd9 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
@@ -352,7 +352,7 @@ static int __init dm_delay_init(void) | |||
352 | { | 352 | { |
353 | int r = -ENOMEM; | 353 | int r = -ENOMEM; |
354 | 354 | ||
355 | kdelayd_wq = create_workqueue("kdelayd"); | 355 | kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); |
356 | if (!kdelayd_wq) { | 356 | if (!kdelayd_wq) { |
357 | DMERR("Couldn't start kdelayd"); | 357 | DMERR("Couldn't start kdelayd"); |
358 | goto bad_queue; | 358 | goto bad_queue; |
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 4b54618b4159..6d12775a1061 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
@@ -295,19 +295,55 @@ retry: | |||
295 | DMWARN("remove_all left %d open device(s)", dev_skipped); | 295 | DMWARN("remove_all left %d open device(s)", dev_skipped); |
296 | } | 296 | } |
297 | 297 | ||
298 | /* | ||
299 | * Set the uuid of a hash_cell that isn't already set. | ||
300 | */ | ||
301 | static void __set_cell_uuid(struct hash_cell *hc, char *new_uuid) | ||
302 | { | ||
303 | mutex_lock(&dm_hash_cells_mutex); | ||
304 | hc->uuid = new_uuid; | ||
305 | mutex_unlock(&dm_hash_cells_mutex); | ||
306 | |||
307 | list_add(&hc->uuid_list, _uuid_buckets + hash_str(new_uuid)); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * Changes the name of a hash_cell and returns the old name for | ||
312 | * the caller to free. | ||
313 | */ | ||
314 | static char *__change_cell_name(struct hash_cell *hc, char *new_name) | ||
315 | { | ||
316 | char *old_name; | ||
317 | |||
318 | /* | ||
319 | * Rename and move the name cell. | ||
320 | */ | ||
321 | list_del(&hc->name_list); | ||
322 | old_name = hc->name; | ||
323 | |||
324 | mutex_lock(&dm_hash_cells_mutex); | ||
325 | hc->name = new_name; | ||
326 | mutex_unlock(&dm_hash_cells_mutex); | ||
327 | |||
328 | list_add(&hc->name_list, _name_buckets + hash_str(new_name)); | ||
329 | |||
330 | return old_name; | ||
331 | } | ||
332 | |||
298 | static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, | 333 | static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, |
299 | const char *new) | 334 | const char *new) |
300 | { | 335 | { |
301 | char *new_name, *old_name; | 336 | char *new_data, *old_name = NULL; |
302 | struct hash_cell *hc; | 337 | struct hash_cell *hc; |
303 | struct dm_table *table; | 338 | struct dm_table *table; |
304 | struct mapped_device *md; | 339 | struct mapped_device *md; |
340 | unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0; | ||
305 | 341 | ||
306 | /* | 342 | /* |
307 | * duplicate new. | 343 | * duplicate new. |
308 | */ | 344 | */ |
309 | new_name = kstrdup(new, GFP_KERNEL); | 345 | new_data = kstrdup(new, GFP_KERNEL); |
310 | if (!new_name) | 346 | if (!new_data) |
311 | return ERR_PTR(-ENOMEM); | 347 | return ERR_PTR(-ENOMEM); |
312 | 348 | ||
313 | down_write(&_hash_lock); | 349 | down_write(&_hash_lock); |
@@ -315,13 +351,19 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, | |||
315 | /* | 351 | /* |
316 | * Is new free ? | 352 | * Is new free ? |
317 | */ | 353 | */ |
318 | hc = __get_name_cell(new); | 354 | if (change_uuid) |
355 | hc = __get_uuid_cell(new); | ||
356 | else | ||
357 | hc = __get_name_cell(new); | ||
358 | |||
319 | if (hc) { | 359 | if (hc) { |
320 | DMWARN("asked to rename to an already-existing name %s -> %s", | 360 | DMWARN("Unable to change %s on mapped device %s to one that " |
361 | "already exists: %s", | ||
362 | change_uuid ? "uuid" : "name", | ||
321 | param->name, new); | 363 | param->name, new); |
322 | dm_put(hc->md); | 364 | dm_put(hc->md); |
323 | up_write(&_hash_lock); | 365 | up_write(&_hash_lock); |
324 | kfree(new_name); | 366 | kfree(new_data); |
325 | return ERR_PTR(-EBUSY); | 367 | return ERR_PTR(-EBUSY); |
326 | } | 368 | } |
327 | 369 | ||
@@ -330,22 +372,30 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, | |||
330 | */ | 372 | */ |
331 | hc = __get_name_cell(param->name); | 373 | hc = __get_name_cell(param->name); |
332 | if (!hc) { | 374 | if (!hc) { |
333 | DMWARN("asked to rename a non-existent device %s -> %s", | 375 | DMWARN("Unable to rename non-existent device, %s to %s%s", |
334 | param->name, new); | 376 | param->name, change_uuid ? "uuid " : "", new); |
335 | up_write(&_hash_lock); | 377 | up_write(&_hash_lock); |
336 | kfree(new_name); | 378 | kfree(new_data); |
337 | return ERR_PTR(-ENXIO); | 379 | return ERR_PTR(-ENXIO); |
338 | } | 380 | } |
339 | 381 | ||
340 | /* | 382 | /* |
341 | * rename and move the name cell. | 383 | * Does this device already have a uuid? |
342 | */ | 384 | */ |
343 | list_del(&hc->name_list); | 385 | if (change_uuid && hc->uuid) { |
344 | old_name = hc->name; | 386 | DMWARN("Unable to change uuid of mapped device %s to %s " |
345 | mutex_lock(&dm_hash_cells_mutex); | 387 | "because uuid is already set to %s", |
346 | hc->name = new_name; | 388 | param->name, new, hc->uuid); |
347 | mutex_unlock(&dm_hash_cells_mutex); | 389 | dm_put(hc->md); |
348 | list_add(&hc->name_list, _name_buckets + hash_str(new_name)); | 390 | up_write(&_hash_lock); |
391 | kfree(new_data); | ||
392 | return ERR_PTR(-EINVAL); | ||
393 | } | ||
394 | |||
395 | if (change_uuid) | ||
396 | __set_cell_uuid(hc, new_data); | ||
397 | else | ||
398 | old_name = __change_cell_name(hc, new_data); | ||
349 | 399 | ||
350 | /* | 400 | /* |
351 | * Wake up any dm event waiters. | 401 | * Wake up any dm event waiters. |
@@ -729,7 +779,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) | |||
729 | hc = __find_device_hash_cell(param); | 779 | hc = __find_device_hash_cell(param); |
730 | 780 | ||
731 | if (!hc) { | 781 | if (!hc) { |
732 | DMWARN("device doesn't appear to be in the dev hash table."); | 782 | DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table."); |
733 | up_write(&_hash_lock); | 783 | up_write(&_hash_lock); |
734 | return -ENXIO; | 784 | return -ENXIO; |
735 | } | 785 | } |
@@ -741,7 +791,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) | |||
741 | */ | 791 | */ |
742 | r = dm_lock_for_deletion(md); | 792 | r = dm_lock_for_deletion(md); |
743 | if (r) { | 793 | if (r) { |
744 | DMWARN("unable to remove open device %s", hc->name); | 794 | DMDEBUG_LIMIT("unable to remove open device %s", hc->name); |
745 | up_write(&_hash_lock); | 795 | up_write(&_hash_lock); |
746 | dm_put(md); | 796 | dm_put(md); |
747 | return r; | 797 | return r; |
@@ -774,21 +824,24 @@ static int invalid_str(char *str, void *end) | |||
774 | static int dev_rename(struct dm_ioctl *param, size_t param_size) | 824 | static int dev_rename(struct dm_ioctl *param, size_t param_size) |
775 | { | 825 | { |
776 | int r; | 826 | int r; |
777 | char *new_name = (char *) param + param->data_start; | 827 | char *new_data = (char *) param + param->data_start; |
778 | struct mapped_device *md; | 828 | struct mapped_device *md; |
829 | unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0; | ||
779 | 830 | ||
780 | if (new_name < param->data || | 831 | if (new_data < param->data || |
781 | invalid_str(new_name, (void *) param + param_size) || | 832 | invalid_str(new_data, (void *) param + param_size) || |
782 | strlen(new_name) > DM_NAME_LEN - 1) { | 833 | strlen(new_data) > (change_uuid ? DM_UUID_LEN - 1 : DM_NAME_LEN - 1)) { |
783 | DMWARN("Invalid new logical volume name supplied."); | 834 | DMWARN("Invalid new mapped device name or uuid string supplied."); |
784 | return -EINVAL; | 835 | return -EINVAL; |
785 | } | 836 | } |
786 | 837 | ||
787 | r = check_name(new_name); | 838 | if (!change_uuid) { |
788 | if (r) | 839 | r = check_name(new_data); |
789 | return r; | 840 | if (r) |
841 | return r; | ||
842 | } | ||
790 | 843 | ||
791 | md = dm_hash_rename(param, new_name); | 844 | md = dm_hash_rename(param, new_data); |
792 | if (IS_ERR(md)) | 845 | if (IS_ERR(md)) |
793 | return PTR_ERR(md); | 846 | return PTR_ERR(md); |
794 | 847 | ||
@@ -885,7 +938,7 @@ static int do_resume(struct dm_ioctl *param) | |||
885 | 938 | ||
886 | hc = __find_device_hash_cell(param); | 939 | hc = __find_device_hash_cell(param); |
887 | if (!hc) { | 940 | if (!hc) { |
888 | DMWARN("device doesn't appear to be in the dev hash table."); | 941 | DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table."); |
889 | up_write(&_hash_lock); | 942 | up_write(&_hash_lock); |
890 | return -ENXIO; | 943 | return -ENXIO; |
891 | } | 944 | } |
@@ -1212,7 +1265,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) | |||
1212 | 1265 | ||
1213 | hc = __find_device_hash_cell(param); | 1266 | hc = __find_device_hash_cell(param); |
1214 | if (!hc) { | 1267 | if (!hc) { |
1215 | DMWARN("device doesn't appear to be in the dev hash table."); | 1268 | DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table."); |
1216 | up_write(&_hash_lock); | 1269 | up_write(&_hash_lock); |
1217 | return -ENXIO; | 1270 | return -ENXIO; |
1218 | } | 1271 | } |
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index d8587bac5682..924f5f0084c2 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c | |||
@@ -37,6 +37,13 @@ struct dm_kcopyd_client { | |||
37 | unsigned int nr_pages; | 37 | unsigned int nr_pages; |
38 | unsigned int nr_free_pages; | 38 | unsigned int nr_free_pages; |
39 | 39 | ||
40 | /* | ||
41 | * Block devices to unplug. | ||
42 | * Non-NULL pointer means that a block device has some pending requests | ||
43 | * and needs to be unplugged. | ||
44 | */ | ||
45 | struct block_device *unplug[2]; | ||
46 | |||
40 | struct dm_io_client *io_client; | 47 | struct dm_io_client *io_client; |
41 | 48 | ||
42 | wait_queue_head_t destroyq; | 49 | wait_queue_head_t destroyq; |
@@ -308,6 +315,31 @@ static int run_complete_job(struct kcopyd_job *job) | |||
308 | return 0; | 315 | return 0; |
309 | } | 316 | } |
310 | 317 | ||
318 | /* | ||
319 | * Unplug the block device at the specified index. | ||
320 | */ | ||
321 | static void unplug(struct dm_kcopyd_client *kc, int rw) | ||
322 | { | ||
323 | if (kc->unplug[rw] != NULL) { | ||
324 | blk_unplug(bdev_get_queue(kc->unplug[rw])); | ||
325 | kc->unplug[rw] = NULL; | ||
326 | } | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * Prepare block device unplug. If there's another device | ||
331 | * to be unplugged at the same array index, we unplug that | ||
332 | * device first. | ||
333 | */ | ||
334 | static void prepare_unplug(struct dm_kcopyd_client *kc, int rw, | ||
335 | struct block_device *bdev) | ||
336 | { | ||
337 | if (likely(kc->unplug[rw] == bdev)) | ||
338 | return; | ||
339 | unplug(kc, rw); | ||
340 | kc->unplug[rw] = bdev; | ||
341 | } | ||
342 | |||
311 | static void complete_io(unsigned long error, void *context) | 343 | static void complete_io(unsigned long error, void *context) |
312 | { | 344 | { |
313 | struct kcopyd_job *job = (struct kcopyd_job *) context; | 345 | struct kcopyd_job *job = (struct kcopyd_job *) context; |
@@ -345,7 +377,7 @@ static int run_io_job(struct kcopyd_job *job) | |||
345 | { | 377 | { |
346 | int r; | 378 | int r; |
347 | struct dm_io_request io_req = { | 379 | struct dm_io_request io_req = { |
348 | .bi_rw = job->rw | REQ_SYNC | REQ_UNPLUG, | 380 | .bi_rw = job->rw, |
349 | .mem.type = DM_IO_PAGE_LIST, | 381 | .mem.type = DM_IO_PAGE_LIST, |
350 | .mem.ptr.pl = job->pages, | 382 | .mem.ptr.pl = job->pages, |
351 | .mem.offset = job->offset, | 383 | .mem.offset = job->offset, |
@@ -354,10 +386,16 @@ static int run_io_job(struct kcopyd_job *job) | |||
354 | .client = job->kc->io_client, | 386 | .client = job->kc->io_client, |
355 | }; | 387 | }; |
356 | 388 | ||
357 | if (job->rw == READ) | 389 | if (job->rw == READ) { |
358 | r = dm_io(&io_req, 1, &job->source, NULL); | 390 | r = dm_io(&io_req, 1, &job->source, NULL); |
359 | else | 391 | prepare_unplug(job->kc, READ, job->source.bdev); |
392 | } else { | ||
393 | if (job->num_dests > 1) | ||
394 | io_req.bi_rw |= REQ_UNPLUG; | ||
360 | r = dm_io(&io_req, job->num_dests, job->dests, NULL); | 395 | r = dm_io(&io_req, job->num_dests, job->dests, NULL); |
396 | if (!(io_req.bi_rw & REQ_UNPLUG)) | ||
397 | prepare_unplug(job->kc, WRITE, job->dests[0].bdev); | ||
398 | } | ||
361 | 399 | ||
362 | return r; | 400 | return r; |
363 | } | 401 | } |
@@ -435,10 +473,18 @@ static void do_work(struct work_struct *work) | |||
435 | * Pages jobs when successful will jump onto the io jobs | 473 | * Pages jobs when successful will jump onto the io jobs |
436 | * list. io jobs call wake when they complete and it all | 474 | * list. io jobs call wake when they complete and it all |
437 | * starts again. | 475 | * starts again. |
476 | * | ||
477 | * Note that io_jobs add block devices to the unplug array, | ||
478 | * this array is cleared with "unplug" calls. It is thus | ||
479 | * forbidden to run complete_jobs after io_jobs and before | ||
480 | * unplug because the block device could be destroyed in | ||
481 | * job completion callback. | ||
438 | */ | 482 | */ |
439 | process_jobs(&kc->complete_jobs, kc, run_complete_job); | 483 | process_jobs(&kc->complete_jobs, kc, run_complete_job); |
440 | process_jobs(&kc->pages_jobs, kc, run_pages_job); | 484 | process_jobs(&kc->pages_jobs, kc, run_pages_job); |
441 | process_jobs(&kc->io_jobs, kc, run_io_job); | 485 | process_jobs(&kc->io_jobs, kc, run_io_job); |
486 | unplug(kc, READ); | ||
487 | unplug(kc, WRITE); | ||
442 | } | 488 | } |
443 | 489 | ||
444 | /* | 490 | /* |
@@ -619,12 +665,15 @@ int dm_kcopyd_client_create(unsigned int nr_pages, | |||
619 | INIT_LIST_HEAD(&kc->io_jobs); | 665 | INIT_LIST_HEAD(&kc->io_jobs); |
620 | INIT_LIST_HEAD(&kc->pages_jobs); | 666 | INIT_LIST_HEAD(&kc->pages_jobs); |
621 | 667 | ||
668 | memset(kc->unplug, 0, sizeof(kc->unplug)); | ||
669 | |||
622 | kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); | 670 | kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); |
623 | if (!kc->job_pool) | 671 | if (!kc->job_pool) |
624 | goto bad_slab; | 672 | goto bad_slab; |
625 | 673 | ||
626 | INIT_WORK(&kc->kcopyd_work, do_work); | 674 | INIT_WORK(&kc->kcopyd_work, do_work); |
627 | kc->kcopyd_wq = create_singlethread_workqueue("kcopyd"); | 675 | kc->kcopyd_wq = alloc_workqueue("kcopyd", |
676 | WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); | ||
628 | if (!kc->kcopyd_wq) | 677 | if (!kc->kcopyd_wq) |
629 | goto bad_workqueue; | 678 | goto bad_workqueue; |
630 | 679 | ||
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 1ed0094f064b..aa2e0c374ab3 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c | |||
@@ -12,12 +12,22 @@ | |||
12 | 12 | ||
13 | #include "dm-log-userspace-transfer.h" | 13 | #include "dm-log-userspace-transfer.h" |
14 | 14 | ||
15 | #define DM_LOG_USERSPACE_VSN "1.1.0" | ||
16 | |||
15 | struct flush_entry { | 17 | struct flush_entry { |
16 | int type; | 18 | int type; |
17 | region_t region; | 19 | region_t region; |
18 | struct list_head list; | 20 | struct list_head list; |
19 | }; | 21 | }; |
20 | 22 | ||
23 | /* | ||
24 | * This limit on the number of mark and clear request is, to a degree, | ||
25 | * arbitrary. However, there is some basis for the choice in the limits | ||
26 | * imposed on the size of data payload by dm-log-userspace-transfer.c: | ||
27 | * dm_consult_userspace(). | ||
28 | */ | ||
29 | #define MAX_FLUSH_GROUP_COUNT 32 | ||
30 | |||
21 | struct log_c { | 31 | struct log_c { |
22 | struct dm_target *ti; | 32 | struct dm_target *ti; |
23 | uint32_t region_size; | 33 | uint32_t region_size; |
@@ -37,8 +47,15 @@ struct log_c { | |||
37 | */ | 47 | */ |
38 | uint64_t in_sync_hint; | 48 | uint64_t in_sync_hint; |
39 | 49 | ||
50 | /* | ||
51 | * Mark and clear requests are held until a flush is issued | ||
52 | * so that we can group, and thereby limit, the amount of | ||
53 | * network traffic between kernel and userspace. The 'flush_lock' | ||
54 | * is used to protect these lists. | ||
55 | */ | ||
40 | spinlock_t flush_lock; | 56 | spinlock_t flush_lock; |
41 | struct list_head flush_list; /* only for clear and mark requests */ | 57 | struct list_head mark_list; |
58 | struct list_head clear_list; | ||
42 | }; | 59 | }; |
43 | 60 | ||
44 | static mempool_t *flush_entry_pool; | 61 | static mempool_t *flush_entry_pool; |
@@ -169,7 +186,8 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | |||
169 | 186 | ||
170 | strncpy(lc->uuid, argv[0], DM_UUID_LEN); | 187 | strncpy(lc->uuid, argv[0], DM_UUID_LEN); |
171 | spin_lock_init(&lc->flush_lock); | 188 | spin_lock_init(&lc->flush_lock); |
172 | INIT_LIST_HEAD(&lc->flush_list); | 189 | INIT_LIST_HEAD(&lc->mark_list); |
190 | INIT_LIST_HEAD(&lc->clear_list); | ||
173 | 191 | ||
174 | str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); | 192 | str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); |
175 | if (str_size < 0) { | 193 | if (str_size < 0) { |
@@ -181,8 +199,11 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | |||
181 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, | 199 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, |
182 | ctr_str, str_size, NULL, NULL); | 200 | ctr_str, str_size, NULL, NULL); |
183 | 201 | ||
184 | if (r == -ESRCH) { | 202 | if (r < 0) { |
185 | DMERR("Userspace log server not found"); | 203 | if (r == -ESRCH) |
204 | DMERR("Userspace log server not found"); | ||
205 | else | ||
206 | DMERR("Userspace log server failed to create log"); | ||
186 | goto out; | 207 | goto out; |
187 | } | 208 | } |
188 | 209 | ||
@@ -214,10 +235,9 @@ out: | |||
214 | 235 | ||
215 | static void userspace_dtr(struct dm_dirty_log *log) | 236 | static void userspace_dtr(struct dm_dirty_log *log) |
216 | { | 237 | { |
217 | int r; | ||
218 | struct log_c *lc = log->context; | 238 | struct log_c *lc = log->context; |
219 | 239 | ||
220 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, | 240 | (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, |
221 | NULL, 0, | 241 | NULL, 0, |
222 | NULL, NULL); | 242 | NULL, NULL); |
223 | 243 | ||
@@ -338,6 +358,71 @@ static int userspace_in_sync(struct dm_dirty_log *log, region_t region, | |||
338 | return (r) ? 0 : (int)in_sync; | 358 | return (r) ? 0 : (int)in_sync; |
339 | } | 359 | } |
340 | 360 | ||
361 | static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list) | ||
362 | { | ||
363 | int r = 0; | ||
364 | struct flush_entry *fe; | ||
365 | |||
366 | list_for_each_entry(fe, flush_list, list) { | ||
367 | r = userspace_do_request(lc, lc->uuid, fe->type, | ||
368 | (char *)&fe->region, | ||
369 | sizeof(fe->region), | ||
370 | NULL, NULL); | ||
371 | if (r) | ||
372 | break; | ||
373 | } | ||
374 | |||
375 | return r; | ||
376 | } | ||
377 | |||
378 | static int flush_by_group(struct log_c *lc, struct list_head *flush_list) | ||
379 | { | ||
380 | int r = 0; | ||
381 | int count; | ||
382 | uint32_t type = 0; | ||
383 | struct flush_entry *fe, *tmp_fe; | ||
384 | LIST_HEAD(tmp_list); | ||
385 | uint64_t group[MAX_FLUSH_GROUP_COUNT]; | ||
386 | |||
387 | /* | ||
388 | * Group process the requests | ||
389 | */ | ||
390 | while (!list_empty(flush_list)) { | ||
391 | count = 0; | ||
392 | |||
393 | list_for_each_entry_safe(fe, tmp_fe, flush_list, list) { | ||
394 | group[count] = fe->region; | ||
395 | count++; | ||
396 | |||
397 | list_del(&fe->list); | ||
398 | list_add(&fe->list, &tmp_list); | ||
399 | |||
400 | type = fe->type; | ||
401 | if (count >= MAX_FLUSH_GROUP_COUNT) | ||
402 | break; | ||
403 | } | ||
404 | |||
405 | r = userspace_do_request(lc, lc->uuid, type, | ||
406 | (char *)(group), | ||
407 | count * sizeof(uint64_t), | ||
408 | NULL, NULL); | ||
409 | if (r) { | ||
410 | /* Group send failed. Attempt one-by-one. */ | ||
411 | list_splice_init(&tmp_list, flush_list); | ||
412 | r = flush_one_by_one(lc, flush_list); | ||
413 | break; | ||
414 | } | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Must collect flush_entrys that were successfully processed | ||
419 | * as a group so that they will be free'd by the caller. | ||
420 | */ | ||
421 | list_splice_init(&tmp_list, flush_list); | ||
422 | |||
423 | return r; | ||
424 | } | ||
425 | |||
341 | /* | 426 | /* |
342 | * userspace_flush | 427 | * userspace_flush |
343 | * | 428 | * |
@@ -360,31 +445,25 @@ static int userspace_flush(struct dm_dirty_log *log) | |||
360 | int r = 0; | 445 | int r = 0; |
361 | unsigned long flags; | 446 | unsigned long flags; |
362 | struct log_c *lc = log->context; | 447 | struct log_c *lc = log->context; |
363 | LIST_HEAD(flush_list); | 448 | LIST_HEAD(mark_list); |
449 | LIST_HEAD(clear_list); | ||
364 | struct flush_entry *fe, *tmp_fe; | 450 | struct flush_entry *fe, *tmp_fe; |
365 | 451 | ||
366 | spin_lock_irqsave(&lc->flush_lock, flags); | 452 | spin_lock_irqsave(&lc->flush_lock, flags); |
367 | list_splice_init(&lc->flush_list, &flush_list); | 453 | list_splice_init(&lc->mark_list, &mark_list); |
454 | list_splice_init(&lc->clear_list, &clear_list); | ||
368 | spin_unlock_irqrestore(&lc->flush_lock, flags); | 455 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
369 | 456 | ||
370 | if (list_empty(&flush_list)) | 457 | if (list_empty(&mark_list) && list_empty(&clear_list)) |
371 | return 0; | 458 | return 0; |
372 | 459 | ||
373 | /* | 460 | r = flush_by_group(lc, &mark_list); |
374 | * FIXME: Count up requests, group request types, | 461 | if (r) |
375 | * allocate memory to stick all requests in and | 462 | goto fail; |
376 | * send to server in one go. Failing the allocation, | ||
377 | * do it one by one. | ||
378 | */ | ||
379 | 463 | ||
380 | list_for_each_entry(fe, &flush_list, list) { | 464 | r = flush_by_group(lc, &clear_list); |
381 | r = userspace_do_request(lc, lc->uuid, fe->type, | 465 | if (r) |
382 | (char *)&fe->region, | 466 | goto fail; |
383 | sizeof(fe->region), | ||
384 | NULL, NULL); | ||
385 | if (r) | ||
386 | goto fail; | ||
387 | } | ||
388 | 467 | ||
389 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, | 468 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, |
390 | NULL, 0, NULL, NULL); | 469 | NULL, 0, NULL, NULL); |
@@ -395,7 +474,11 @@ fail: | |||
395 | * Calling code will receive an error and will know that | 474 | * Calling code will receive an error and will know that |
396 | * the log facility has failed. | 475 | * the log facility has failed. |
397 | */ | 476 | */ |
398 | list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { | 477 | list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) { |
478 | list_del(&fe->list); | ||
479 | mempool_free(fe, flush_entry_pool); | ||
480 | } | ||
481 | list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) { | ||
399 | list_del(&fe->list); | 482 | list_del(&fe->list); |
400 | mempool_free(fe, flush_entry_pool); | 483 | mempool_free(fe, flush_entry_pool); |
401 | } | 484 | } |
@@ -425,7 +508,7 @@ static void userspace_mark_region(struct dm_dirty_log *log, region_t region) | |||
425 | spin_lock_irqsave(&lc->flush_lock, flags); | 508 | spin_lock_irqsave(&lc->flush_lock, flags); |
426 | fe->type = DM_ULOG_MARK_REGION; | 509 | fe->type = DM_ULOG_MARK_REGION; |
427 | fe->region = region; | 510 | fe->region = region; |
428 | list_add(&fe->list, &lc->flush_list); | 511 | list_add(&fe->list, &lc->mark_list); |
429 | spin_unlock_irqrestore(&lc->flush_lock, flags); | 512 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
430 | 513 | ||
431 | return; | 514 | return; |
@@ -462,7 +545,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region) | |||
462 | spin_lock_irqsave(&lc->flush_lock, flags); | 545 | spin_lock_irqsave(&lc->flush_lock, flags); |
463 | fe->type = DM_ULOG_CLEAR_REGION; | 546 | fe->type = DM_ULOG_CLEAR_REGION; |
464 | fe->region = region; | 547 | fe->region = region; |
465 | list_add(&fe->list, &lc->flush_list); | 548 | list_add(&fe->list, &lc->clear_list); |
466 | spin_unlock_irqrestore(&lc->flush_lock, flags); | 549 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
467 | 550 | ||
468 | return; | 551 | return; |
@@ -684,7 +767,7 @@ static int __init userspace_dirty_log_init(void) | |||
684 | return r; | 767 | return r; |
685 | } | 768 | } |
686 | 769 | ||
687 | DMINFO("version 1.0.0 loaded"); | 770 | DMINFO("version " DM_LOG_USERSPACE_VSN " loaded"); |
688 | return 0; | 771 | return 0; |
689 | } | 772 | } |
690 | 773 | ||
@@ -694,7 +777,7 @@ static void __exit userspace_dirty_log_exit(void) | |||
694 | dm_ulog_tfr_exit(); | 777 | dm_ulog_tfr_exit(); |
695 | mempool_destroy(flush_entry_pool); | 778 | mempool_destroy(flush_entry_pool); |
696 | 779 | ||
697 | DMINFO("version 1.0.0 unloaded"); | 780 | DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded"); |
698 | return; | 781 | return; |
699 | } | 782 | } |
700 | 783 | ||
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index 075cbcf8a9f5..049eaf12aaab 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c | |||
@@ -198,6 +198,7 @@ resend: | |||
198 | 198 | ||
199 | memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - sizeof(struct cn_msg)); | 199 | memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - sizeof(struct cn_msg)); |
200 | memcpy(tfr->uuid, uuid, DM_UUID_LEN); | 200 | memcpy(tfr->uuid, uuid, DM_UUID_LEN); |
201 | tfr->version = DM_ULOG_REQUEST_VERSION; | ||
201 | tfr->luid = luid; | 202 | tfr->luid = luid; |
202 | tfr->seq = dm_ulog_seq++; | 203 | tfr->seq = dm_ulog_seq++; |
203 | 204 | ||
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 33420e68d153..6951536ea29c 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -455,7 +455,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
455 | r = PTR_ERR(lc->io_req.client); | 455 | r = PTR_ERR(lc->io_req.client); |
456 | DMWARN("couldn't allocate disk io client"); | 456 | DMWARN("couldn't allocate disk io client"); |
457 | kfree(lc); | 457 | kfree(lc); |
458 | return -ENOMEM; | 458 | return r; |
459 | } | 459 | } |
460 | 460 | ||
461 | lc->disk_header = vmalloc(buf_size); | 461 | lc->disk_header = vmalloc(buf_size); |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 487ecda90ad4..b82d28819e2a 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -23,6 +23,8 @@ | |||
23 | 23 | ||
24 | #define DM_MSG_PREFIX "multipath" | 24 | #define DM_MSG_PREFIX "multipath" |
25 | #define MESG_STR(x) x, sizeof(x) | 25 | #define MESG_STR(x) x, sizeof(x) |
26 | #define DM_PG_INIT_DELAY_MSECS 2000 | ||
27 | #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) | ||
26 | 28 | ||
27 | /* Path properties */ | 29 | /* Path properties */ |
28 | struct pgpath { | 30 | struct pgpath { |
@@ -33,8 +35,7 @@ struct pgpath { | |||
33 | unsigned fail_count; /* Cumulative failure count */ | 35 | unsigned fail_count; /* Cumulative failure count */ |
34 | 36 | ||
35 | struct dm_path path; | 37 | struct dm_path path; |
36 | struct work_struct deactivate_path; | 38 | struct delayed_work activate_path; |
37 | struct work_struct activate_path; | ||
38 | }; | 39 | }; |
39 | 40 | ||
40 | #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) | 41 | #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) |
@@ -65,11 +66,15 @@ struct multipath { | |||
65 | 66 | ||
66 | const char *hw_handler_name; | 67 | const char *hw_handler_name; |
67 | char *hw_handler_params; | 68 | char *hw_handler_params; |
69 | |||
68 | unsigned nr_priority_groups; | 70 | unsigned nr_priority_groups; |
69 | struct list_head priority_groups; | 71 | struct list_head priority_groups; |
72 | |||
73 | wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ | ||
74 | |||
70 | unsigned pg_init_required; /* pg_init needs calling? */ | 75 | unsigned pg_init_required; /* pg_init needs calling? */ |
71 | unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ | 76 | unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ |
72 | wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ | 77 | unsigned pg_init_delay_retry; /* Delay pg_init retry? */ |
73 | 78 | ||
74 | unsigned nr_valid_paths; /* Total number of usable paths */ | 79 | unsigned nr_valid_paths; /* Total number of usable paths */ |
75 | struct pgpath *current_pgpath; | 80 | struct pgpath *current_pgpath; |
@@ -82,6 +87,7 @@ struct multipath { | |||
82 | unsigned saved_queue_if_no_path;/* Saved state during suspension */ | 87 | unsigned saved_queue_if_no_path;/* Saved state during suspension */ |
83 | unsigned pg_init_retries; /* Number of times to retry pg_init */ | 88 | unsigned pg_init_retries; /* Number of times to retry pg_init */ |
84 | unsigned pg_init_count; /* Number of times pg_init called */ | 89 | unsigned pg_init_count; /* Number of times pg_init called */ |
90 | unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ | ||
85 | 91 | ||
86 | struct work_struct process_queued_ios; | 92 | struct work_struct process_queued_ios; |
87 | struct list_head queued_ios; | 93 | struct list_head queued_ios; |
@@ -116,7 +122,6 @@ static struct workqueue_struct *kmultipathd, *kmpath_handlerd; | |||
116 | static void process_queued_ios(struct work_struct *work); | 122 | static void process_queued_ios(struct work_struct *work); |
117 | static void trigger_event(struct work_struct *work); | 123 | static void trigger_event(struct work_struct *work); |
118 | static void activate_path(struct work_struct *work); | 124 | static void activate_path(struct work_struct *work); |
119 | static void deactivate_path(struct work_struct *work); | ||
120 | 125 | ||
121 | 126 | ||
122 | /*----------------------------------------------- | 127 | /*----------------------------------------------- |
@@ -129,8 +134,7 @@ static struct pgpath *alloc_pgpath(void) | |||
129 | 134 | ||
130 | if (pgpath) { | 135 | if (pgpath) { |
131 | pgpath->is_active = 1; | 136 | pgpath->is_active = 1; |
132 | INIT_WORK(&pgpath->deactivate_path, deactivate_path); | 137 | INIT_DELAYED_WORK(&pgpath->activate_path, activate_path); |
133 | INIT_WORK(&pgpath->activate_path, activate_path); | ||
134 | } | 138 | } |
135 | 139 | ||
136 | return pgpath; | 140 | return pgpath; |
@@ -141,14 +145,6 @@ static void free_pgpath(struct pgpath *pgpath) | |||
141 | kfree(pgpath); | 145 | kfree(pgpath); |
142 | } | 146 | } |
143 | 147 | ||
144 | static void deactivate_path(struct work_struct *work) | ||
145 | { | ||
146 | struct pgpath *pgpath = | ||
147 | container_of(work, struct pgpath, deactivate_path); | ||
148 | |||
149 | blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue); | ||
150 | } | ||
151 | |||
152 | static struct priority_group *alloc_priority_group(void) | 148 | static struct priority_group *alloc_priority_group(void) |
153 | { | 149 | { |
154 | struct priority_group *pg; | 150 | struct priority_group *pg; |
@@ -199,6 +195,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) | |||
199 | INIT_LIST_HEAD(&m->queued_ios); | 195 | INIT_LIST_HEAD(&m->queued_ios); |
200 | spin_lock_init(&m->lock); | 196 | spin_lock_init(&m->lock); |
201 | m->queue_io = 1; | 197 | m->queue_io = 1; |
198 | m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; | ||
202 | INIT_WORK(&m->process_queued_ios, process_queued_ios); | 199 | INIT_WORK(&m->process_queued_ios, process_queued_ios); |
203 | INIT_WORK(&m->trigger_event, trigger_event); | 200 | INIT_WORK(&m->trigger_event, trigger_event); |
204 | init_waitqueue_head(&m->pg_init_wait); | 201 | init_waitqueue_head(&m->pg_init_wait); |
@@ -238,14 +235,19 @@ static void free_multipath(struct multipath *m) | |||
238 | static void __pg_init_all_paths(struct multipath *m) | 235 | static void __pg_init_all_paths(struct multipath *m) |
239 | { | 236 | { |
240 | struct pgpath *pgpath; | 237 | struct pgpath *pgpath; |
238 | unsigned long pg_init_delay = 0; | ||
241 | 239 | ||
242 | m->pg_init_count++; | 240 | m->pg_init_count++; |
243 | m->pg_init_required = 0; | 241 | m->pg_init_required = 0; |
242 | if (m->pg_init_delay_retry) | ||
243 | pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? | ||
244 | m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); | ||
244 | list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { | 245 | list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { |
245 | /* Skip failed paths */ | 246 | /* Skip failed paths */ |
246 | if (!pgpath->is_active) | 247 | if (!pgpath->is_active) |
247 | continue; | 248 | continue; |
248 | if (queue_work(kmpath_handlerd, &pgpath->activate_path)) | 249 | if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path, |
250 | pg_init_delay)) | ||
249 | m->pg_init_in_progress++; | 251 | m->pg_init_in_progress++; |
250 | } | 252 | } |
251 | } | 253 | } |
@@ -793,8 +795,9 @@ static int parse_features(struct arg_set *as, struct multipath *m) | |||
793 | const char *param_name; | 795 | const char *param_name; |
794 | 796 | ||
795 | static struct param _params[] = { | 797 | static struct param _params[] = { |
796 | {0, 3, "invalid number of feature args"}, | 798 | {0, 5, "invalid number of feature args"}, |
797 | {1, 50, "pg_init_retries must be between 1 and 50"}, | 799 | {1, 50, "pg_init_retries must be between 1 and 50"}, |
800 | {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, | ||
798 | }; | 801 | }; |
799 | 802 | ||
800 | r = read_param(_params, shift(as), &argc, &ti->error); | 803 | r = read_param(_params, shift(as), &argc, &ti->error); |
@@ -821,6 +824,14 @@ static int parse_features(struct arg_set *as, struct multipath *m) | |||
821 | continue; | 824 | continue; |
822 | } | 825 | } |
823 | 826 | ||
827 | if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) && | ||
828 | (argc >= 1)) { | ||
829 | r = read_param(_params + 2, shift(as), | ||
830 | &m->pg_init_delay_msecs, &ti->error); | ||
831 | argc--; | ||
832 | continue; | ||
833 | } | ||
834 | |||
824 | ti->error = "Unrecognised multipath feature request"; | 835 | ti->error = "Unrecognised multipath feature request"; |
825 | r = -EINVAL; | 836 | r = -EINVAL; |
826 | } while (argc && !r); | 837 | } while (argc && !r); |
@@ -931,7 +942,7 @@ static void flush_multipath_work(struct multipath *m) | |||
931 | flush_workqueue(kmpath_handlerd); | 942 | flush_workqueue(kmpath_handlerd); |
932 | multipath_wait_for_pg_init_completion(m); | 943 | multipath_wait_for_pg_init_completion(m); |
933 | flush_workqueue(kmultipathd); | 944 | flush_workqueue(kmultipathd); |
934 | flush_scheduled_work(); | 945 | flush_work_sync(&m->trigger_event); |
935 | } | 946 | } |
936 | 947 | ||
937 | static void multipath_dtr(struct dm_target *ti) | 948 | static void multipath_dtr(struct dm_target *ti) |
@@ -995,7 +1006,6 @@ static int fail_path(struct pgpath *pgpath) | |||
995 | pgpath->path.dev->name, m->nr_valid_paths); | 1006 | pgpath->path.dev->name, m->nr_valid_paths); |
996 | 1007 | ||
997 | schedule_work(&m->trigger_event); | 1008 | schedule_work(&m->trigger_event); |
998 | queue_work(kmultipathd, &pgpath->deactivate_path); | ||
999 | 1009 | ||
1000 | out: | 1010 | out: |
1001 | spin_unlock_irqrestore(&m->lock, flags); | 1011 | spin_unlock_irqrestore(&m->lock, flags); |
@@ -1034,7 +1044,7 @@ static int reinstate_path(struct pgpath *pgpath) | |||
1034 | m->current_pgpath = NULL; | 1044 | m->current_pgpath = NULL; |
1035 | queue_work(kmultipathd, &m->process_queued_ios); | 1045 | queue_work(kmultipathd, &m->process_queued_ios); |
1036 | } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { | 1046 | } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { |
1037 | if (queue_work(kmpath_handlerd, &pgpath->activate_path)) | 1047 | if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) |
1038 | m->pg_init_in_progress++; | 1048 | m->pg_init_in_progress++; |
1039 | } | 1049 | } |
1040 | 1050 | ||
@@ -1169,6 +1179,7 @@ static void pg_init_done(void *data, int errors) | |||
1169 | struct priority_group *pg = pgpath->pg; | 1179 | struct priority_group *pg = pgpath->pg; |
1170 | struct multipath *m = pg->m; | 1180 | struct multipath *m = pg->m; |
1171 | unsigned long flags; | 1181 | unsigned long flags; |
1182 | unsigned delay_retry = 0; | ||
1172 | 1183 | ||
1173 | /* device or driver problems */ | 1184 | /* device or driver problems */ |
1174 | switch (errors) { | 1185 | switch (errors) { |
@@ -1193,8 +1204,9 @@ static void pg_init_done(void *data, int errors) | |||
1193 | */ | 1204 | */ |
1194 | bypass_pg(m, pg, 1); | 1205 | bypass_pg(m, pg, 1); |
1195 | break; | 1206 | break; |
1196 | /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */ | ||
1197 | case SCSI_DH_RETRY: | 1207 | case SCSI_DH_RETRY: |
1208 | /* Wait before retrying. */ | ||
1209 | delay_retry = 1; | ||
1198 | case SCSI_DH_IMM_RETRY: | 1210 | case SCSI_DH_IMM_RETRY: |
1199 | case SCSI_DH_RES_TEMP_UNAVAIL: | 1211 | case SCSI_DH_RES_TEMP_UNAVAIL: |
1200 | if (pg_init_limit_reached(m, pgpath)) | 1212 | if (pg_init_limit_reached(m, pgpath)) |
@@ -1227,6 +1239,7 @@ static void pg_init_done(void *data, int errors) | |||
1227 | if (!m->pg_init_required) | 1239 | if (!m->pg_init_required) |
1228 | m->queue_io = 0; | 1240 | m->queue_io = 0; |
1229 | 1241 | ||
1242 | m->pg_init_delay_retry = delay_retry; | ||
1230 | queue_work(kmultipathd, &m->process_queued_ios); | 1243 | queue_work(kmultipathd, &m->process_queued_ios); |
1231 | 1244 | ||
1232 | /* | 1245 | /* |
@@ -1241,7 +1254,7 @@ out: | |||
1241 | static void activate_path(struct work_struct *work) | 1254 | static void activate_path(struct work_struct *work) |
1242 | { | 1255 | { |
1243 | struct pgpath *pgpath = | 1256 | struct pgpath *pgpath = |
1244 | container_of(work, struct pgpath, activate_path); | 1257 | container_of(work, struct pgpath, activate_path.work); |
1245 | 1258 | ||
1246 | scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), | 1259 | scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), |
1247 | pg_init_done, pgpath); | 1260 | pg_init_done, pgpath); |
@@ -1382,11 +1395,14 @@ static int multipath_status(struct dm_target *ti, status_type_t type, | |||
1382 | DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); | 1395 | DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); |
1383 | else { | 1396 | else { |
1384 | DMEMIT("%u ", m->queue_if_no_path + | 1397 | DMEMIT("%u ", m->queue_if_no_path + |
1385 | (m->pg_init_retries > 0) * 2); | 1398 | (m->pg_init_retries > 0) * 2 + |
1399 | (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2); | ||
1386 | if (m->queue_if_no_path) | 1400 | if (m->queue_if_no_path) |
1387 | DMEMIT("queue_if_no_path "); | 1401 | DMEMIT("queue_if_no_path "); |
1388 | if (m->pg_init_retries) | 1402 | if (m->pg_init_retries) |
1389 | DMEMIT("pg_init_retries %u ", m->pg_init_retries); | 1403 | DMEMIT("pg_init_retries %u ", m->pg_init_retries); |
1404 | if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) | ||
1405 | DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); | ||
1390 | } | 1406 | } |
1391 | 1407 | ||
1392 | if (!m->hw_handler_name || type == STATUSTYPE_INFO) | 1408 | if (!m->hw_handler_name || type == STATUSTYPE_INFO) |
@@ -1655,7 +1671,7 @@ out: | |||
1655 | *---------------------------------------------------------------*/ | 1671 | *---------------------------------------------------------------*/ |
1656 | static struct target_type multipath_target = { | 1672 | static struct target_type multipath_target = { |
1657 | .name = "multipath", | 1673 | .name = "multipath", |
1658 | .version = {1, 1, 1}, | 1674 | .version = {1, 2, 0}, |
1659 | .module = THIS_MODULE, | 1675 | .module = THIS_MODULE, |
1660 | .ctr = multipath_ctr, | 1676 | .ctr = multipath_ctr, |
1661 | .dtr = multipath_dtr, | 1677 | .dtr = multipath_dtr, |
@@ -1687,7 +1703,7 @@ static int __init dm_multipath_init(void) | |||
1687 | return -EINVAL; | 1703 | return -EINVAL; |
1688 | } | 1704 | } |
1689 | 1705 | ||
1690 | kmultipathd = create_workqueue("kmpathd"); | 1706 | kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); |
1691 | if (!kmultipathd) { | 1707 | if (!kmultipathd) { |
1692 | DMERR("failed to create workqueue kmpathd"); | 1708 | DMERR("failed to create workqueue kmpathd"); |
1693 | dm_unregister_target(&multipath_target); | 1709 | dm_unregister_target(&multipath_target); |
@@ -1701,7 +1717,8 @@ static int __init dm_multipath_init(void) | |||
1701 | * old workqueue would also create a bottleneck in the | 1717 | * old workqueue would also create a bottleneck in the |
1702 | * path of the storage hardware device activation. | 1718 | * path of the storage hardware device activation. |
1703 | */ | 1719 | */ |
1704 | kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd"); | 1720 | kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd", |
1721 | WQ_MEM_RECLAIM); | ||
1705 | if (!kmpath_handlerd) { | 1722 | if (!kmpath_handlerd) { |
1706 | DMERR("failed to create workqueue kmpath_handlerd"); | 1723 | DMERR("failed to create workqueue kmpath_handlerd"); |
1707 | destroy_workqueue(kmultipathd); | 1724 | destroy_workqueue(kmultipathd); |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c new file mode 100644 index 000000000000..b9e1e15ef11c --- /dev/null +++ b/drivers/md/dm-raid.c | |||
@@ -0,0 +1,697 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010-2011 Neil Brown | ||
3 | * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This file is released under the GPL. | ||
6 | */ | ||
7 | |||
8 | #include <linux/slab.h> | ||
9 | |||
10 | #include "md.h" | ||
11 | #include "raid5.h" | ||
12 | #include "dm.h" | ||
13 | #include "bitmap.h" | ||
14 | |||
15 | #define DM_MSG_PREFIX "raid" | ||
16 | |||
17 | /* | ||
18 | * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then | ||
19 | * make it so the flag doesn't set anything. | ||
20 | */ | ||
21 | #ifndef MD_SYNC_STATE_FORCED | ||
22 | #define MD_SYNC_STATE_FORCED 0 | ||
23 | #endif | ||
24 | |||
25 | struct raid_dev { | ||
26 | /* | ||
27 | * Two DM devices, one to hold metadata and one to hold the | ||
28 | * actual data/parity. The reason for this is to not confuse | ||
29 | * ti->len and give more flexibility in altering size and | ||
30 | * characteristics. | ||
31 | * | ||
32 | * While it is possible for this device to be associated | ||
33 | * with a different physical device than the data_dev, it | ||
34 | * is intended for it to be the same. | ||
35 | * |--------- Physical Device ---------| | ||
36 | * |- meta_dev -|------ data_dev ------| | ||
37 | */ | ||
38 | struct dm_dev *meta_dev; | ||
39 | struct dm_dev *data_dev; | ||
40 | struct mdk_rdev_s rdev; | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * Flags for rs->print_flags field. | ||
45 | */ | ||
46 | #define DMPF_DAEMON_SLEEP 0x1 | ||
47 | #define DMPF_MAX_WRITE_BEHIND 0x2 | ||
48 | #define DMPF_SYNC 0x4 | ||
49 | #define DMPF_NOSYNC 0x8 | ||
50 | #define DMPF_STRIPE_CACHE 0x10 | ||
51 | #define DMPF_MIN_RECOVERY_RATE 0x20 | ||
52 | #define DMPF_MAX_RECOVERY_RATE 0x40 | ||
53 | |||
54 | struct raid_set { | ||
55 | struct dm_target *ti; | ||
56 | |||
57 | uint64_t print_flags; | ||
58 | |||
59 | struct mddev_s md; | ||
60 | struct raid_type *raid_type; | ||
61 | struct dm_target_callbacks callbacks; | ||
62 | |||
63 | struct raid_dev dev[0]; | ||
64 | }; | ||
65 | |||
66 | /* Supported raid types and properties. */ | ||
67 | static struct raid_type { | ||
68 | const char *name; /* RAID algorithm. */ | ||
69 | const char *descr; /* Descriptor text for logging. */ | ||
70 | const unsigned parity_devs; /* # of parity devices. */ | ||
71 | const unsigned minimal_devs; /* minimal # of devices in set. */ | ||
72 | const unsigned level; /* RAID level. */ | ||
73 | const unsigned algorithm; /* RAID algorithm. */ | ||
74 | } raid_types[] = { | ||
75 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, | ||
76 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, | ||
77 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, | ||
78 | {"raid5_ls", "RAID5 (left symmetric)", 1, 2, 5, ALGORITHM_LEFT_SYMMETRIC}, | ||
79 | {"raid5_rs", "RAID5 (right symmetric)", 1, 2, 5, ALGORITHM_RIGHT_SYMMETRIC}, | ||
80 | {"raid6_zr", "RAID6 (zero restart)", 2, 4, 6, ALGORITHM_ROTATING_ZERO_RESTART}, | ||
81 | {"raid6_nr", "RAID6 (N restart)", 2, 4, 6, ALGORITHM_ROTATING_N_RESTART}, | ||
82 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} | ||
83 | }; | ||
84 | |||
85 | static struct raid_type *get_raid_type(char *name) | ||
86 | { | ||
87 | int i; | ||
88 | |||
89 | for (i = 0; i < ARRAY_SIZE(raid_types); i++) | ||
90 | if (!strcmp(raid_types[i].name, name)) | ||
91 | return &raid_types[i]; | ||
92 | |||
93 | return NULL; | ||
94 | } | ||
95 | |||
96 | static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *raid_type, unsigned raid_devs) | ||
97 | { | ||
98 | unsigned i; | ||
99 | struct raid_set *rs; | ||
100 | sector_t sectors_per_dev; | ||
101 | |||
102 | if (raid_devs <= raid_type->parity_devs) { | ||
103 | ti->error = "Insufficient number of devices"; | ||
104 | return ERR_PTR(-EINVAL); | ||
105 | } | ||
106 | |||
107 | sectors_per_dev = ti->len; | ||
108 | if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { | ||
109 | ti->error = "Target length not divisible by number of data devices"; | ||
110 | return ERR_PTR(-EINVAL); | ||
111 | } | ||
112 | |||
113 | rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL); | ||
114 | if (!rs) { | ||
115 | ti->error = "Cannot allocate raid context"; | ||
116 | return ERR_PTR(-ENOMEM); | ||
117 | } | ||
118 | |||
119 | mddev_init(&rs->md); | ||
120 | |||
121 | rs->ti = ti; | ||
122 | rs->raid_type = raid_type; | ||
123 | rs->md.raid_disks = raid_devs; | ||
124 | rs->md.level = raid_type->level; | ||
125 | rs->md.new_level = rs->md.level; | ||
126 | rs->md.dev_sectors = sectors_per_dev; | ||
127 | rs->md.layout = raid_type->algorithm; | ||
128 | rs->md.new_layout = rs->md.layout; | ||
129 | rs->md.delta_disks = 0; | ||
130 | rs->md.recovery_cp = 0; | ||
131 | |||
132 | for (i = 0; i < raid_devs; i++) | ||
133 | md_rdev_init(&rs->dev[i].rdev); | ||
134 | |||
135 | /* | ||
136 | * Remaining items to be initialized by further RAID params: | ||
137 | * rs->md.persistent | ||
138 | * rs->md.external | ||
139 | * rs->md.chunk_sectors | ||
140 | * rs->md.new_chunk_sectors | ||
141 | */ | ||
142 | |||
143 | return rs; | ||
144 | } | ||
145 | |||
146 | static void context_free(struct raid_set *rs) | ||
147 | { | ||
148 | int i; | ||
149 | |||
150 | for (i = 0; i < rs->md.raid_disks; i++) | ||
151 | if (rs->dev[i].data_dev) | ||
152 | dm_put_device(rs->ti, rs->dev[i].data_dev); | ||
153 | |||
154 | kfree(rs); | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * For every device we have two words | ||
159 | * <meta_dev>: meta device name or '-' if missing | ||
160 | * <data_dev>: data device name or '-' if missing | ||
161 | * | ||
162 | * This code parses those words. | ||
163 | */ | ||
164 | static int dev_parms(struct raid_set *rs, char **argv) | ||
165 | { | ||
166 | int i; | ||
167 | int rebuild = 0; | ||
168 | int metadata_available = 0; | ||
169 | int ret = 0; | ||
170 | |||
171 | for (i = 0; i < rs->md.raid_disks; i++, argv += 2) { | ||
172 | rs->dev[i].rdev.raid_disk = i; | ||
173 | |||
174 | rs->dev[i].meta_dev = NULL; | ||
175 | rs->dev[i].data_dev = NULL; | ||
176 | |||
177 | /* | ||
178 | * There are no offsets, since there is a separate device | ||
179 | * for data and metadata. | ||
180 | */ | ||
181 | rs->dev[i].rdev.data_offset = 0; | ||
182 | rs->dev[i].rdev.mddev = &rs->md; | ||
183 | |||
184 | if (strcmp(argv[0], "-")) { | ||
185 | rs->ti->error = "Metadata devices not supported"; | ||
186 | return -EINVAL; | ||
187 | } | ||
188 | |||
189 | if (!strcmp(argv[1], "-")) { | ||
190 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags) && | ||
191 | (!rs->dev[i].rdev.recovery_offset)) { | ||
192 | rs->ti->error = "Drive designated for rebuild not specified"; | ||
193 | return -EINVAL; | ||
194 | } | ||
195 | |||
196 | continue; | ||
197 | } | ||
198 | |||
199 | ret = dm_get_device(rs->ti, argv[1], | ||
200 | dm_table_get_mode(rs->ti->table), | ||
201 | &rs->dev[i].data_dev); | ||
202 | if (ret) { | ||
203 | rs->ti->error = "RAID device lookup failure"; | ||
204 | return ret; | ||
205 | } | ||
206 | |||
207 | rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev; | ||
208 | list_add(&rs->dev[i].rdev.same_set, &rs->md.disks); | ||
209 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) | ||
210 | rebuild++; | ||
211 | } | ||
212 | |||
213 | if (metadata_available) { | ||
214 | rs->md.external = 0; | ||
215 | rs->md.persistent = 1; | ||
216 | rs->md.major_version = 2; | ||
217 | } else if (rebuild && !rs->md.recovery_cp) { | ||
218 | /* | ||
219 | * Without metadata, we will not be able to tell if the array | ||
220 | * is in-sync or not - we must assume it is not. Therefore, | ||
221 | * it is impossible to rebuild a drive. | ||
222 | * | ||
223 | * Even if there is metadata, the on-disk information may | ||
224 | * indicate that the array is not in-sync and it will then | ||
225 | * fail at that time. | ||
226 | * | ||
227 | * User could specify 'nosync' option if desperate. | ||
228 | */ | ||
229 | DMERR("Unable to rebuild drive while array is not in-sync"); | ||
230 | rs->ti->error = "RAID device lookup failure"; | ||
231 | return -EINVAL; | ||
232 | } | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Possible arguments are... | ||
239 | * RAID456: | ||
240 | * <chunk_size> [optional_args] | ||
241 | * | ||
242 | * Optional args: | ||
243 | * [[no]sync] Force or prevent recovery of the entire array | ||
244 | * [rebuild <idx>] Rebuild the drive indicated by the index | ||
245 | * [daemon_sleep <ms>] Time between bitmap daemon work to clear bits | ||
246 | * [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization | ||
247 | * [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization | ||
248 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | ||
249 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs | ||
250 | */ | ||
251 | static int parse_raid_params(struct raid_set *rs, char **argv, | ||
252 | unsigned num_raid_params) | ||
253 | { | ||
254 | unsigned i, rebuild_cnt = 0; | ||
255 | unsigned long value; | ||
256 | char *key; | ||
257 | |||
258 | /* | ||
259 | * First, parse the in-order required arguments | ||
260 | */ | ||
261 | if ((strict_strtoul(argv[0], 10, &value) < 0) || | ||
262 | !is_power_of_2(value) || (value < 8)) { | ||
263 | rs->ti->error = "Bad chunk size"; | ||
264 | return -EINVAL; | ||
265 | } | ||
266 | |||
267 | rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; | ||
268 | argv++; | ||
269 | num_raid_params--; | ||
270 | |||
271 | /* | ||
272 | * Second, parse the unordered optional arguments | ||
273 | */ | ||
274 | for (i = 0; i < rs->md.raid_disks; i++) | ||
275 | set_bit(In_sync, &rs->dev[i].rdev.flags); | ||
276 | |||
277 | for (i = 0; i < num_raid_params; i++) { | ||
278 | if (!strcmp(argv[i], "nosync")) { | ||
279 | rs->md.recovery_cp = MaxSector; | ||
280 | rs->print_flags |= DMPF_NOSYNC; | ||
281 | rs->md.flags |= MD_SYNC_STATE_FORCED; | ||
282 | continue; | ||
283 | } | ||
284 | if (!strcmp(argv[i], "sync")) { | ||
285 | rs->md.recovery_cp = 0; | ||
286 | rs->print_flags |= DMPF_SYNC; | ||
287 | rs->md.flags |= MD_SYNC_STATE_FORCED; | ||
288 | continue; | ||
289 | } | ||
290 | |||
291 | /* The rest of the optional arguments come in key/value pairs */ | ||
292 | if ((i + 1) >= num_raid_params) { | ||
293 | rs->ti->error = "Wrong number of raid parameters given"; | ||
294 | return -EINVAL; | ||
295 | } | ||
296 | |||
297 | key = argv[i++]; | ||
298 | if (strict_strtoul(argv[i], 10, &value) < 0) { | ||
299 | rs->ti->error = "Bad numerical argument given in raid params"; | ||
300 | return -EINVAL; | ||
301 | } | ||
302 | |||
303 | if (!strcmp(key, "rebuild")) { | ||
304 | if (++rebuild_cnt > rs->raid_type->parity_devs) { | ||
305 | rs->ti->error = "Too many rebuild drives given"; | ||
306 | return -EINVAL; | ||
307 | } | ||
308 | if (value > rs->md.raid_disks) { | ||
309 | rs->ti->error = "Invalid rebuild index given"; | ||
310 | return -EINVAL; | ||
311 | } | ||
312 | clear_bit(In_sync, &rs->dev[value].rdev.flags); | ||
313 | rs->dev[value].rdev.recovery_offset = 0; | ||
314 | } else if (!strcmp(key, "max_write_behind")) { | ||
315 | rs->print_flags |= DMPF_MAX_WRITE_BEHIND; | ||
316 | |||
317 | /* | ||
318 | * In device-mapper, we specify things in sectors, but | ||
319 | * MD records this value in kB | ||
320 | */ | ||
321 | value /= 2; | ||
322 | if (value > COUNTER_MAX) { | ||
323 | rs->ti->error = "Max write-behind limit out of range"; | ||
324 | return -EINVAL; | ||
325 | } | ||
326 | rs->md.bitmap_info.max_write_behind = value; | ||
327 | } else if (!strcmp(key, "daemon_sleep")) { | ||
328 | rs->print_flags |= DMPF_DAEMON_SLEEP; | ||
329 | if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { | ||
330 | rs->ti->error = "daemon sleep period out of range"; | ||
331 | return -EINVAL; | ||
332 | } | ||
333 | rs->md.bitmap_info.daemon_sleep = value; | ||
334 | } else if (!strcmp(key, "stripe_cache")) { | ||
335 | rs->print_flags |= DMPF_STRIPE_CACHE; | ||
336 | |||
337 | /* | ||
338 | * In device-mapper, we specify things in sectors, but | ||
339 | * MD records this value in kB | ||
340 | */ | ||
341 | value /= 2; | ||
342 | |||
343 | if (rs->raid_type->level < 5) { | ||
344 | rs->ti->error = "Inappropriate argument: stripe_cache"; | ||
345 | return -EINVAL; | ||
346 | } | ||
347 | if (raid5_set_cache_size(&rs->md, (int)value)) { | ||
348 | rs->ti->error = "Bad stripe_cache size"; | ||
349 | return -EINVAL; | ||
350 | } | ||
351 | } else if (!strcmp(key, "min_recovery_rate")) { | ||
352 | rs->print_flags |= DMPF_MIN_RECOVERY_RATE; | ||
353 | if (value > INT_MAX) { | ||
354 | rs->ti->error = "min_recovery_rate out of range"; | ||
355 | return -EINVAL; | ||
356 | } | ||
357 | rs->md.sync_speed_min = (int)value; | ||
358 | } else if (!strcmp(key, "max_recovery_rate")) { | ||
359 | rs->print_flags |= DMPF_MAX_RECOVERY_RATE; | ||
360 | if (value > INT_MAX) { | ||
361 | rs->ti->error = "max_recovery_rate out of range"; | ||
362 | return -EINVAL; | ||
363 | } | ||
364 | rs->md.sync_speed_max = (int)value; | ||
365 | } else { | ||
366 | DMERR("Unable to parse RAID parameter: %s", key); | ||
367 | rs->ti->error = "Unable to parse RAID parameters"; | ||
368 | return -EINVAL; | ||
369 | } | ||
370 | } | ||
371 | |||
372 | /* Assume there are no metadata devices until the drives are parsed */ | ||
373 | rs->md.persistent = 0; | ||
374 | rs->md.external = 1; | ||
375 | |||
376 | return 0; | ||
377 | } | ||
378 | |||
379 | static void do_table_event(struct work_struct *ws) | ||
380 | { | ||
381 | struct raid_set *rs = container_of(ws, struct raid_set, md.event_work); | ||
382 | |||
383 | dm_table_event(rs->ti->table); | ||
384 | } | ||
385 | |||
386 | static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | ||
387 | { | ||
388 | struct raid_set *rs = container_of(cb, struct raid_set, callbacks); | ||
389 | |||
390 | return md_raid5_congested(&rs->md, bits); | ||
391 | } | ||
392 | |||
393 | static void raid_unplug(struct dm_target_callbacks *cb) | ||
394 | { | ||
395 | struct raid_set *rs = container_of(cb, struct raid_set, callbacks); | ||
396 | |||
397 | md_raid5_unplug_device(rs->md.private); | ||
398 | } | ||
399 | |||
400 | /* | ||
401 | * Construct a RAID4/5/6 mapping: | ||
402 | * Args: | ||
403 | * <raid_type> <#raid_params> <raid_params> \ | ||
404 | * <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> } | ||
405 | * | ||
406 | * ** metadata devices are not supported yet, use '-' instead ** | ||
407 | * | ||
408 | * <raid_params> varies by <raid_type>. See 'parse_raid_params' for | ||
409 | * details on possible <raid_params>. | ||
410 | */ | ||
411 | static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | ||
412 | { | ||
413 | int ret; | ||
414 | struct raid_type *rt; | ||
415 | unsigned long num_raid_params, num_raid_devs; | ||
416 | struct raid_set *rs = NULL; | ||
417 | |||
418 | /* Must have at least <raid_type> <#raid_params> */ | ||
419 | if (argc < 2) { | ||
420 | ti->error = "Too few arguments"; | ||
421 | return -EINVAL; | ||
422 | } | ||
423 | |||
424 | /* raid type */ | ||
425 | rt = get_raid_type(argv[0]); | ||
426 | if (!rt) { | ||
427 | ti->error = "Unrecognised raid_type"; | ||
428 | return -EINVAL; | ||
429 | } | ||
430 | argc--; | ||
431 | argv++; | ||
432 | |||
433 | /* number of RAID parameters */ | ||
434 | if (strict_strtoul(argv[0], 10, &num_raid_params) < 0) { | ||
435 | ti->error = "Cannot understand number of RAID parameters"; | ||
436 | return -EINVAL; | ||
437 | } | ||
438 | argc--; | ||
439 | argv++; | ||
440 | |||
441 | /* Skip over RAID params for now and find out # of devices */ | ||
442 | if (num_raid_params + 1 > argc) { | ||
443 | ti->error = "Arguments do not agree with counts given"; | ||
444 | return -EINVAL; | ||
445 | } | ||
446 | |||
447 | if ((strict_strtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) || | ||
448 | (num_raid_devs >= INT_MAX)) { | ||
449 | ti->error = "Cannot understand number of raid devices"; | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | |||
453 | rs = context_alloc(ti, rt, (unsigned)num_raid_devs); | ||
454 | if (IS_ERR(rs)) | ||
455 | return PTR_ERR(rs); | ||
456 | |||
457 | ret = parse_raid_params(rs, argv, (unsigned)num_raid_params); | ||
458 | if (ret) | ||
459 | goto bad; | ||
460 | |||
461 | ret = -EINVAL; | ||
462 | |||
463 | argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */ | ||
464 | argv += num_raid_params + 1; | ||
465 | |||
466 | if (argc != (num_raid_devs * 2)) { | ||
467 | ti->error = "Supplied RAID devices does not match the count given"; | ||
468 | goto bad; | ||
469 | } | ||
470 | |||
471 | ret = dev_parms(rs, argv); | ||
472 | if (ret) | ||
473 | goto bad; | ||
474 | |||
475 | INIT_WORK(&rs->md.event_work, do_table_event); | ||
476 | ti->split_io = rs->md.chunk_sectors; | ||
477 | ti->private = rs; | ||
478 | |||
479 | mutex_lock(&rs->md.reconfig_mutex); | ||
480 | ret = md_run(&rs->md); | ||
481 | rs->md.in_sync = 0; /* Assume already marked dirty */ | ||
482 | mutex_unlock(&rs->md.reconfig_mutex); | ||
483 | |||
484 | if (ret) { | ||
485 | ti->error = "Fail to run raid array"; | ||
486 | goto bad; | ||
487 | } | ||
488 | |||
489 | rs->callbacks.congested_fn = raid_is_congested; | ||
490 | rs->callbacks.unplug_fn = raid_unplug; | ||
491 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | ||
492 | |||
493 | return 0; | ||
494 | |||
495 | bad: | ||
496 | context_free(rs); | ||
497 | |||
498 | return ret; | ||
499 | } | ||
500 | |||
501 | static void raid_dtr(struct dm_target *ti) | ||
502 | { | ||
503 | struct raid_set *rs = ti->private; | ||
504 | |||
505 | list_del_init(&rs->callbacks.list); | ||
506 | md_stop(&rs->md); | ||
507 | context_free(rs); | ||
508 | } | ||
509 | |||
510 | static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) | ||
511 | { | ||
512 | struct raid_set *rs = ti->private; | ||
513 | mddev_t *mddev = &rs->md; | ||
514 | |||
515 | mddev->pers->make_request(mddev, bio); | ||
516 | |||
517 | return DM_MAPIO_SUBMITTED; | ||
518 | } | ||
519 | |||
520 | static int raid_status(struct dm_target *ti, status_type_t type, | ||
521 | char *result, unsigned maxlen) | ||
522 | { | ||
523 | struct raid_set *rs = ti->private; | ||
524 | unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ | ||
525 | unsigned sz = 0; | ||
526 | int i; | ||
527 | sector_t sync; | ||
528 | |||
529 | switch (type) { | ||
530 | case STATUSTYPE_INFO: | ||
531 | DMEMIT("%s %d ", rs->raid_type->name, rs->md.raid_disks); | ||
532 | |||
533 | for (i = 0; i < rs->md.raid_disks; i++) { | ||
534 | if (test_bit(Faulty, &rs->dev[i].rdev.flags)) | ||
535 | DMEMIT("D"); | ||
536 | else if (test_bit(In_sync, &rs->dev[i].rdev.flags)) | ||
537 | DMEMIT("A"); | ||
538 | else | ||
539 | DMEMIT("a"); | ||
540 | } | ||
541 | |||
542 | if (test_bit(MD_RECOVERY_RUNNING, &rs->md.recovery)) | ||
543 | sync = rs->md.curr_resync_completed; | ||
544 | else | ||
545 | sync = rs->md.recovery_cp; | ||
546 | |||
547 | if (sync > rs->md.resync_max_sectors) | ||
548 | sync = rs->md.resync_max_sectors; | ||
549 | |||
550 | DMEMIT(" %llu/%llu", | ||
551 | (unsigned long long) sync, | ||
552 | (unsigned long long) rs->md.resync_max_sectors); | ||
553 | |||
554 | break; | ||
555 | case STATUSTYPE_TABLE: | ||
556 | /* The string you would use to construct this array */ | ||
557 | for (i = 0; i < rs->md.raid_disks; i++) | ||
558 | if (rs->dev[i].data_dev && | ||
559 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) | ||
560 | raid_param_cnt++; /* for rebuilds */ | ||
561 | |||
562 | raid_param_cnt += (hweight64(rs->print_flags) * 2); | ||
563 | if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) | ||
564 | raid_param_cnt--; | ||
565 | |||
566 | DMEMIT("%s %u %u", rs->raid_type->name, | ||
567 | raid_param_cnt, rs->md.chunk_sectors); | ||
568 | |||
569 | if ((rs->print_flags & DMPF_SYNC) && | ||
570 | (rs->md.recovery_cp == MaxSector)) | ||
571 | DMEMIT(" sync"); | ||
572 | if (rs->print_flags & DMPF_NOSYNC) | ||
573 | DMEMIT(" nosync"); | ||
574 | |||
575 | for (i = 0; i < rs->md.raid_disks; i++) | ||
576 | if (rs->dev[i].data_dev && | ||
577 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) | ||
578 | DMEMIT(" rebuild %u", i); | ||
579 | |||
580 | if (rs->print_flags & DMPF_DAEMON_SLEEP) | ||
581 | DMEMIT(" daemon_sleep %lu", | ||
582 | rs->md.bitmap_info.daemon_sleep); | ||
583 | |||
584 | if (rs->print_flags & DMPF_MIN_RECOVERY_RATE) | ||
585 | DMEMIT(" min_recovery_rate %d", rs->md.sync_speed_min); | ||
586 | |||
587 | if (rs->print_flags & DMPF_MAX_RECOVERY_RATE) | ||
588 | DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max); | ||
589 | |||
590 | if (rs->print_flags & DMPF_MAX_WRITE_BEHIND) | ||
591 | DMEMIT(" max_write_behind %lu", | ||
592 | rs->md.bitmap_info.max_write_behind); | ||
593 | |||
594 | if (rs->print_flags & DMPF_STRIPE_CACHE) { | ||
595 | raid5_conf_t *conf = rs->md.private; | ||
596 | |||
597 | /* convert from kiB to sectors */ | ||
598 | DMEMIT(" stripe_cache %d", | ||
599 | conf ? conf->max_nr_stripes * 2 : 0); | ||
600 | } | ||
601 | |||
602 | DMEMIT(" %d", rs->md.raid_disks); | ||
603 | for (i = 0; i < rs->md.raid_disks; i++) { | ||
604 | DMEMIT(" -"); /* metadata device */ | ||
605 | |||
606 | if (rs->dev[i].data_dev) | ||
607 | DMEMIT(" %s", rs->dev[i].data_dev->name); | ||
608 | else | ||
609 | DMEMIT(" -"); | ||
610 | } | ||
611 | } | ||
612 | |||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) | ||
617 | { | ||
618 | struct raid_set *rs = ti->private; | ||
619 | unsigned i; | ||
620 | int ret = 0; | ||
621 | |||
622 | for (i = 0; !ret && i < rs->md.raid_disks; i++) | ||
623 | if (rs->dev[i].data_dev) | ||
624 | ret = fn(ti, | ||
625 | rs->dev[i].data_dev, | ||
626 | 0, /* No offset on data devs */ | ||
627 | rs->md.dev_sectors, | ||
628 | data); | ||
629 | |||
630 | return ret; | ||
631 | } | ||
632 | |||
633 | static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) | ||
634 | { | ||
635 | struct raid_set *rs = ti->private; | ||
636 | unsigned chunk_size = rs->md.chunk_sectors << 9; | ||
637 | raid5_conf_t *conf = rs->md.private; | ||
638 | |||
639 | blk_limits_io_min(limits, chunk_size); | ||
640 | blk_limits_io_opt(limits, chunk_size * (conf->raid_disks - conf->max_degraded)); | ||
641 | } | ||
642 | |||
643 | static void raid_presuspend(struct dm_target *ti) | ||
644 | { | ||
645 | struct raid_set *rs = ti->private; | ||
646 | |||
647 | md_stop_writes(&rs->md); | ||
648 | } | ||
649 | |||
650 | static void raid_postsuspend(struct dm_target *ti) | ||
651 | { | ||
652 | struct raid_set *rs = ti->private; | ||
653 | |||
654 | mddev_suspend(&rs->md); | ||
655 | } | ||
656 | |||
657 | static void raid_resume(struct dm_target *ti) | ||
658 | { | ||
659 | struct raid_set *rs = ti->private; | ||
660 | |||
661 | mddev_resume(&rs->md); | ||
662 | } | ||
663 | |||
664 | static struct target_type raid_target = { | ||
665 | .name = "raid", | ||
666 | .version = {1, 0, 0}, | ||
667 | .module = THIS_MODULE, | ||
668 | .ctr = raid_ctr, | ||
669 | .dtr = raid_dtr, | ||
670 | .map = raid_map, | ||
671 | .status = raid_status, | ||
672 | .iterate_devices = raid_iterate_devices, | ||
673 | .io_hints = raid_io_hints, | ||
674 | .presuspend = raid_presuspend, | ||
675 | .postsuspend = raid_postsuspend, | ||
676 | .resume = raid_resume, | ||
677 | }; | ||
678 | |||
679 | static int __init dm_raid_init(void) | ||
680 | { | ||
681 | return dm_register_target(&raid_target); | ||
682 | } | ||
683 | |||
684 | static void __exit dm_raid_exit(void) | ||
685 | { | ||
686 | dm_unregister_target(&raid_target); | ||
687 | } | ||
688 | |||
689 | module_init(dm_raid_init); | ||
690 | module_exit(dm_raid_exit); | ||
691 | |||
692 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); | ||
693 | MODULE_ALIAS("dm-raid4"); | ||
694 | MODULE_ALIAS("dm-raid5"); | ||
695 | MODULE_ALIAS("dm-raid6"); | ||
696 | MODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>"); | ||
697 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 19a59b041c27..dee326775c60 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -261,7 +261,7 @@ static int mirror_flush(struct dm_target *ti) | |||
261 | struct dm_io_request io_req = { | 261 | struct dm_io_request io_req = { |
262 | .bi_rw = WRITE_FLUSH, | 262 | .bi_rw = WRITE_FLUSH, |
263 | .mem.type = DM_IO_KMEM, | 263 | .mem.type = DM_IO_KMEM, |
264 | .mem.ptr.bvec = NULL, | 264 | .mem.ptr.addr = NULL, |
265 | .client = ms->io_client, | 265 | .client = ms->io_client, |
266 | }; | 266 | }; |
267 | 267 | ||
@@ -637,6 +637,12 @@ static void do_write(struct mirror_set *ms, struct bio *bio) | |||
637 | .client = ms->io_client, | 637 | .client = ms->io_client, |
638 | }; | 638 | }; |
639 | 639 | ||
640 | if (bio->bi_rw & REQ_DISCARD) { | ||
641 | io_req.bi_rw |= REQ_DISCARD; | ||
642 | io_req.mem.type = DM_IO_KMEM; | ||
643 | io_req.mem.ptr.addr = NULL; | ||
644 | } | ||
645 | |||
640 | for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) | 646 | for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) |
641 | map_region(dest++, m, bio); | 647 | map_region(dest++, m, bio); |
642 | 648 | ||
@@ -670,7 +676,8 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
670 | bio_list_init(&requeue); | 676 | bio_list_init(&requeue); |
671 | 677 | ||
672 | while ((bio = bio_list_pop(writes))) { | 678 | while ((bio = bio_list_pop(writes))) { |
673 | if (bio->bi_rw & REQ_FLUSH) { | 679 | if ((bio->bi_rw & REQ_FLUSH) || |
680 | (bio->bi_rw & REQ_DISCARD)) { | ||
674 | bio_list_add(&sync, bio); | 681 | bio_list_add(&sync, bio); |
675 | continue; | 682 | continue; |
676 | } | 683 | } |
@@ -1076,8 +1083,10 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1076 | ti->private = ms; | 1083 | ti->private = ms; |
1077 | ti->split_io = dm_rh_get_region_size(ms->rh); | 1084 | ti->split_io = dm_rh_get_region_size(ms->rh); |
1078 | ti->num_flush_requests = 1; | 1085 | ti->num_flush_requests = 1; |
1086 | ti->num_discard_requests = 1; | ||
1079 | 1087 | ||
1080 | ms->kmirrord_wq = create_singlethread_workqueue("kmirrord"); | 1088 | ms->kmirrord_wq = alloc_workqueue("kmirrord", |
1089 | WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); | ||
1081 | if (!ms->kmirrord_wq) { | 1090 | if (!ms->kmirrord_wq) { |
1082 | DMERR("couldn't start kmirrord"); | 1091 | DMERR("couldn't start kmirrord"); |
1083 | r = -ENOMEM; | 1092 | r = -ENOMEM; |
@@ -1130,7 +1139,7 @@ static void mirror_dtr(struct dm_target *ti) | |||
1130 | 1139 | ||
1131 | del_timer_sync(&ms->timer); | 1140 | del_timer_sync(&ms->timer); |
1132 | flush_workqueue(ms->kmirrord_wq); | 1141 | flush_workqueue(ms->kmirrord_wq); |
1133 | flush_scheduled_work(); | 1142 | flush_work_sync(&ms->trigger_event); |
1134 | dm_kcopyd_client_destroy(ms->kcopyd_client); | 1143 | dm_kcopyd_client_destroy(ms->kcopyd_client); |
1135 | destroy_workqueue(ms->kmirrord_wq); | 1144 | destroy_workqueue(ms->kmirrord_wq); |
1136 | free_context(ms, ti, ms->nr_mirrors); | 1145 | free_context(ms, ti, ms->nr_mirrors); |
@@ -1406,7 +1415,7 @@ static int mirror_iterate_devices(struct dm_target *ti, | |||
1406 | 1415 | ||
1407 | static struct target_type mirror_target = { | 1416 | static struct target_type mirror_target = { |
1408 | .name = "mirror", | 1417 | .name = "mirror", |
1409 | .version = {1, 12, 0}, | 1418 | .version = {1, 12, 1}, |
1410 | .module = THIS_MODULE, | 1419 | .module = THIS_MODULE, |
1411 | .ctr = mirror_ctr, | 1420 | .ctr = mirror_ctr, |
1412 | .dtr = mirror_dtr, | 1421 | .dtr = mirror_dtr, |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 2129cdb115dc..95891dfcbca0 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c | |||
@@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw, | |||
256 | */ | 256 | */ |
257 | INIT_WORK_ONSTACK(&req.work, do_metadata); | 257 | INIT_WORK_ONSTACK(&req.work, do_metadata); |
258 | queue_work(ps->metadata_wq, &req.work); | 258 | queue_work(ps->metadata_wq, &req.work); |
259 | flush_workqueue(ps->metadata_wq); | 259 | flush_work(&req.work); |
260 | 260 | ||
261 | return req.result; | 261 | return req.result; |
262 | } | 262 | } |
@@ -818,7 +818,7 @@ static int persistent_ctr(struct dm_exception_store *store, | |||
818 | atomic_set(&ps->pending_count, 0); | 818 | atomic_set(&ps->pending_count, 0); |
819 | ps->callbacks = NULL; | 819 | ps->callbacks = NULL; |
820 | 820 | ||
821 | ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); | 821 | ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0); |
822 | if (!ps->metadata_wq) { | 822 | if (!ps->metadata_wq) { |
823 | kfree(ps); | 823 | kfree(ps); |
824 | DMERR("couldn't start header metadata update thread"); | 824 | DMERR("couldn't start header metadata update thread"); |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 53cf79d8bcbc..fdde53cd12b7 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/vmalloc.h> | 19 | #include <linux/vmalloc.h> |
20 | #include <linux/log2.h> | 20 | #include <linux/log2.h> |
21 | #include <linux/dm-kcopyd.h> | 21 | #include <linux/dm-kcopyd.h> |
22 | #include <linux/workqueue.h> | ||
23 | 22 | ||
24 | #include "dm-exception-store.h" | 23 | #include "dm-exception-store.h" |
25 | 24 | ||
@@ -80,9 +79,6 @@ struct dm_snapshot { | |||
80 | /* Origin writes don't trigger exceptions until this is set */ | 79 | /* Origin writes don't trigger exceptions until this is set */ |
81 | int active; | 80 | int active; |
82 | 81 | ||
83 | /* Whether or not owning mapped_device is suspended */ | ||
84 | int suspended; | ||
85 | |||
86 | atomic_t pending_exceptions_count; | 82 | atomic_t pending_exceptions_count; |
87 | 83 | ||
88 | mempool_t *pending_pool; | 84 | mempool_t *pending_pool; |
@@ -106,10 +102,6 @@ struct dm_snapshot { | |||
106 | 102 | ||
107 | struct dm_kcopyd_client *kcopyd_client; | 103 | struct dm_kcopyd_client *kcopyd_client; |
108 | 104 | ||
109 | /* Queue of snapshot writes for ksnapd to flush */ | ||
110 | struct bio_list queued_bios; | ||
111 | struct work_struct queued_bios_work; | ||
112 | |||
113 | /* Wait for events based on state_bits */ | 105 | /* Wait for events based on state_bits */ |
114 | unsigned long state_bits; | 106 | unsigned long state_bits; |
115 | 107 | ||
@@ -160,9 +152,6 @@ struct dm_dev *dm_snap_cow(struct dm_snapshot *s) | |||
160 | } | 152 | } |
161 | EXPORT_SYMBOL(dm_snap_cow); | 153 | EXPORT_SYMBOL(dm_snap_cow); |
162 | 154 | ||
163 | static struct workqueue_struct *ksnapd; | ||
164 | static void flush_queued_bios(struct work_struct *work); | ||
165 | |||
166 | static sector_t chunk_to_sector(struct dm_exception_store *store, | 155 | static sector_t chunk_to_sector(struct dm_exception_store *store, |
167 | chunk_t chunk) | 156 | chunk_t chunk) |
168 | { | 157 | { |
@@ -1110,7 +1099,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1110 | s->ti = ti; | 1099 | s->ti = ti; |
1111 | s->valid = 1; | 1100 | s->valid = 1; |
1112 | s->active = 0; | 1101 | s->active = 0; |
1113 | s->suspended = 0; | ||
1114 | atomic_set(&s->pending_exceptions_count, 0); | 1102 | atomic_set(&s->pending_exceptions_count, 0); |
1115 | init_rwsem(&s->lock); | 1103 | init_rwsem(&s->lock); |
1116 | INIT_LIST_HEAD(&s->list); | 1104 | INIT_LIST_HEAD(&s->list); |
@@ -1153,9 +1141,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1153 | 1141 | ||
1154 | spin_lock_init(&s->tracked_chunk_lock); | 1142 | spin_lock_init(&s->tracked_chunk_lock); |
1155 | 1143 | ||
1156 | bio_list_init(&s->queued_bios); | ||
1157 | INIT_WORK(&s->queued_bios_work, flush_queued_bios); | ||
1158 | |||
1159 | ti->private = s; | 1144 | ti->private = s; |
1160 | ti->num_flush_requests = num_flush_requests; | 1145 | ti->num_flush_requests = num_flush_requests; |
1161 | 1146 | ||
@@ -1279,8 +1264,6 @@ static void snapshot_dtr(struct dm_target *ti) | |||
1279 | struct dm_snapshot *s = ti->private; | 1264 | struct dm_snapshot *s = ti->private; |
1280 | struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; | 1265 | struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; |
1281 | 1266 | ||
1282 | flush_workqueue(ksnapd); | ||
1283 | |||
1284 | down_read(&_origins_lock); | 1267 | down_read(&_origins_lock); |
1285 | /* Check whether exception handover must be cancelled */ | 1268 | /* Check whether exception handover must be cancelled */ |
1286 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); | 1269 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); |
@@ -1342,20 +1325,6 @@ static void flush_bios(struct bio *bio) | |||
1342 | } | 1325 | } |
1343 | } | 1326 | } |
1344 | 1327 | ||
1345 | static void flush_queued_bios(struct work_struct *work) | ||
1346 | { | ||
1347 | struct dm_snapshot *s = | ||
1348 | container_of(work, struct dm_snapshot, queued_bios_work); | ||
1349 | struct bio *queued_bios; | ||
1350 | unsigned long flags; | ||
1351 | |||
1352 | spin_lock_irqsave(&s->pe_lock, flags); | ||
1353 | queued_bios = bio_list_get(&s->queued_bios); | ||
1354 | spin_unlock_irqrestore(&s->pe_lock, flags); | ||
1355 | |||
1356 | flush_bios(queued_bios); | ||
1357 | } | ||
1358 | |||
1359 | static int do_origin(struct dm_dev *origin, struct bio *bio); | 1328 | static int do_origin(struct dm_dev *origin, struct bio *bio); |
1360 | 1329 | ||
1361 | /* | 1330 | /* |
@@ -1760,15 +1729,6 @@ static void snapshot_merge_presuspend(struct dm_target *ti) | |||
1760 | stop_merge(s); | 1729 | stop_merge(s); |
1761 | } | 1730 | } |
1762 | 1731 | ||
1763 | static void snapshot_postsuspend(struct dm_target *ti) | ||
1764 | { | ||
1765 | struct dm_snapshot *s = ti->private; | ||
1766 | |||
1767 | down_write(&s->lock); | ||
1768 | s->suspended = 1; | ||
1769 | up_write(&s->lock); | ||
1770 | } | ||
1771 | |||
1772 | static int snapshot_preresume(struct dm_target *ti) | 1732 | static int snapshot_preresume(struct dm_target *ti) |
1773 | { | 1733 | { |
1774 | int r = 0; | 1734 | int r = 0; |
@@ -1783,7 +1743,7 @@ static int snapshot_preresume(struct dm_target *ti) | |||
1783 | DMERR("Unable to resume snapshot source until " | 1743 | DMERR("Unable to resume snapshot source until " |
1784 | "handover completes."); | 1744 | "handover completes."); |
1785 | r = -EINVAL; | 1745 | r = -EINVAL; |
1786 | } else if (!snap_src->suspended) { | 1746 | } else if (!dm_suspended(snap_src->ti)) { |
1787 | DMERR("Unable to perform snapshot handover until " | 1747 | DMERR("Unable to perform snapshot handover until " |
1788 | "source is suspended."); | 1748 | "source is suspended."); |
1789 | r = -EINVAL; | 1749 | r = -EINVAL; |
@@ -1816,7 +1776,6 @@ static void snapshot_resume(struct dm_target *ti) | |||
1816 | 1776 | ||
1817 | down_write(&s->lock); | 1777 | down_write(&s->lock); |
1818 | s->active = 1; | 1778 | s->active = 1; |
1819 | s->suspended = 0; | ||
1820 | up_write(&s->lock); | 1779 | up_write(&s->lock); |
1821 | } | 1780 | } |
1822 | 1781 | ||
@@ -2194,7 +2153,7 @@ static int origin_iterate_devices(struct dm_target *ti, | |||
2194 | 2153 | ||
2195 | static struct target_type origin_target = { | 2154 | static struct target_type origin_target = { |
2196 | .name = "snapshot-origin", | 2155 | .name = "snapshot-origin", |
2197 | .version = {1, 7, 0}, | 2156 | .version = {1, 7, 1}, |
2198 | .module = THIS_MODULE, | 2157 | .module = THIS_MODULE, |
2199 | .ctr = origin_ctr, | 2158 | .ctr = origin_ctr, |
2200 | .dtr = origin_dtr, | 2159 | .dtr = origin_dtr, |
@@ -2207,13 +2166,12 @@ static struct target_type origin_target = { | |||
2207 | 2166 | ||
2208 | static struct target_type snapshot_target = { | 2167 | static struct target_type snapshot_target = { |
2209 | .name = "snapshot", | 2168 | .name = "snapshot", |
2210 | .version = {1, 9, 0}, | 2169 | .version = {1, 10, 0}, |
2211 | .module = THIS_MODULE, | 2170 | .module = THIS_MODULE, |
2212 | .ctr = snapshot_ctr, | 2171 | .ctr = snapshot_ctr, |
2213 | .dtr = snapshot_dtr, | 2172 | .dtr = snapshot_dtr, |
2214 | .map = snapshot_map, | 2173 | .map = snapshot_map, |
2215 | .end_io = snapshot_end_io, | 2174 | .end_io = snapshot_end_io, |
2216 | .postsuspend = snapshot_postsuspend, | ||
2217 | .preresume = snapshot_preresume, | 2175 | .preresume = snapshot_preresume, |
2218 | .resume = snapshot_resume, | 2176 | .resume = snapshot_resume, |
2219 | .status = snapshot_status, | 2177 | .status = snapshot_status, |
@@ -2222,14 +2180,13 @@ static struct target_type snapshot_target = { | |||
2222 | 2180 | ||
2223 | static struct target_type merge_target = { | 2181 | static struct target_type merge_target = { |
2224 | .name = dm_snapshot_merge_target_name, | 2182 | .name = dm_snapshot_merge_target_name, |
2225 | .version = {1, 0, 0}, | 2183 | .version = {1, 1, 0}, |
2226 | .module = THIS_MODULE, | 2184 | .module = THIS_MODULE, |
2227 | .ctr = snapshot_ctr, | 2185 | .ctr = snapshot_ctr, |
2228 | .dtr = snapshot_dtr, | 2186 | .dtr = snapshot_dtr, |
2229 | .map = snapshot_merge_map, | 2187 | .map = snapshot_merge_map, |
2230 | .end_io = snapshot_end_io, | 2188 | .end_io = snapshot_end_io, |
2231 | .presuspend = snapshot_merge_presuspend, | 2189 | .presuspend = snapshot_merge_presuspend, |
2232 | .postsuspend = snapshot_postsuspend, | ||
2233 | .preresume = snapshot_preresume, | 2190 | .preresume = snapshot_preresume, |
2234 | .resume = snapshot_merge_resume, | 2191 | .resume = snapshot_merge_resume, |
2235 | .status = snapshot_status, | 2192 | .status = snapshot_status, |
@@ -2291,17 +2248,8 @@ static int __init dm_snapshot_init(void) | |||
2291 | goto bad_tracked_chunk_cache; | 2248 | goto bad_tracked_chunk_cache; |
2292 | } | 2249 | } |
2293 | 2250 | ||
2294 | ksnapd = create_singlethread_workqueue("ksnapd"); | ||
2295 | if (!ksnapd) { | ||
2296 | DMERR("Failed to create ksnapd workqueue."); | ||
2297 | r = -ENOMEM; | ||
2298 | goto bad_pending_pool; | ||
2299 | } | ||
2300 | |||
2301 | return 0; | 2251 | return 0; |
2302 | 2252 | ||
2303 | bad_pending_pool: | ||
2304 | kmem_cache_destroy(tracked_chunk_cache); | ||
2305 | bad_tracked_chunk_cache: | 2253 | bad_tracked_chunk_cache: |
2306 | kmem_cache_destroy(pending_cache); | 2254 | kmem_cache_destroy(pending_cache); |
2307 | bad_pending_cache: | 2255 | bad_pending_cache: |
@@ -2322,8 +2270,6 @@ bad_register_snapshot_target: | |||
2322 | 2270 | ||
2323 | static void __exit dm_snapshot_exit(void) | 2271 | static void __exit dm_snapshot_exit(void) |
2324 | { | 2272 | { |
2325 | destroy_workqueue(ksnapd); | ||
2326 | |||
2327 | dm_unregister_target(&snapshot_target); | 2273 | dm_unregister_target(&snapshot_target); |
2328 | dm_unregister_target(&origin_target); | 2274 | dm_unregister_target(&origin_target); |
2329 | dm_unregister_target(&merge_target); | 2275 | dm_unregister_target(&merge_target); |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index f0371b4c4fbf..dddfa14f2982 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -39,23 +39,20 @@ struct stripe_c { | |||
39 | struct dm_target *ti; | 39 | struct dm_target *ti; |
40 | 40 | ||
41 | /* Work struct used for triggering events*/ | 41 | /* Work struct used for triggering events*/ |
42 | struct work_struct kstriped_ws; | 42 | struct work_struct trigger_event; |
43 | 43 | ||
44 | struct stripe stripe[0]; | 44 | struct stripe stripe[0]; |
45 | }; | 45 | }; |
46 | 46 | ||
47 | static struct workqueue_struct *kstriped; | ||
48 | |||
49 | /* | 47 | /* |
50 | * An event is triggered whenever a drive | 48 | * An event is triggered whenever a drive |
51 | * drops out of a stripe volume. | 49 | * drops out of a stripe volume. |
52 | */ | 50 | */ |
53 | static void trigger_event(struct work_struct *work) | 51 | static void trigger_event(struct work_struct *work) |
54 | { | 52 | { |
55 | struct stripe_c *sc = container_of(work, struct stripe_c, kstriped_ws); | 53 | struct stripe_c *sc = container_of(work, struct stripe_c, |
56 | 54 | trigger_event); | |
57 | dm_table_event(sc->ti->table); | 55 | dm_table_event(sc->ti->table); |
58 | |||
59 | } | 56 | } |
60 | 57 | ||
61 | static inline struct stripe_c *alloc_context(unsigned int stripes) | 58 | static inline struct stripe_c *alloc_context(unsigned int stripes) |
@@ -160,7 +157,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
160 | return -ENOMEM; | 157 | return -ENOMEM; |
161 | } | 158 | } |
162 | 159 | ||
163 | INIT_WORK(&sc->kstriped_ws, trigger_event); | 160 | INIT_WORK(&sc->trigger_event, trigger_event); |
164 | 161 | ||
165 | /* Set pointer to dm target; used in trigger_event */ | 162 | /* Set pointer to dm target; used in trigger_event */ |
166 | sc->ti = ti; | 163 | sc->ti = ti; |
@@ -211,7 +208,7 @@ static void stripe_dtr(struct dm_target *ti) | |||
211 | for (i = 0; i < sc->stripes; i++) | 208 | for (i = 0; i < sc->stripes; i++) |
212 | dm_put_device(ti, sc->stripe[i].dev); | 209 | dm_put_device(ti, sc->stripe[i].dev); |
213 | 210 | ||
214 | flush_workqueue(kstriped); | 211 | flush_work_sync(&sc->trigger_event); |
215 | kfree(sc); | 212 | kfree(sc); |
216 | } | 213 | } |
217 | 214 | ||
@@ -367,7 +364,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, | |||
367 | atomic_inc(&(sc->stripe[i].error_count)); | 364 | atomic_inc(&(sc->stripe[i].error_count)); |
368 | if (atomic_read(&(sc->stripe[i].error_count)) < | 365 | if (atomic_read(&(sc->stripe[i].error_count)) < |
369 | DM_IO_ERROR_THRESHOLD) | 366 | DM_IO_ERROR_THRESHOLD) |
370 | queue_work(kstriped, &sc->kstriped_ws); | 367 | schedule_work(&sc->trigger_event); |
371 | } | 368 | } |
372 | 369 | ||
373 | return error; | 370 | return error; |
@@ -401,7 +398,7 @@ static void stripe_io_hints(struct dm_target *ti, | |||
401 | 398 | ||
402 | static struct target_type stripe_target = { | 399 | static struct target_type stripe_target = { |
403 | .name = "striped", | 400 | .name = "striped", |
404 | .version = {1, 3, 0}, | 401 | .version = {1, 3, 1}, |
405 | .module = THIS_MODULE, | 402 | .module = THIS_MODULE, |
406 | .ctr = stripe_ctr, | 403 | .ctr = stripe_ctr, |
407 | .dtr = stripe_dtr, | 404 | .dtr = stripe_dtr, |
@@ -422,20 +419,10 @@ int __init dm_stripe_init(void) | |||
422 | return r; | 419 | return r; |
423 | } | 420 | } |
424 | 421 | ||
425 | kstriped = create_singlethread_workqueue("kstriped"); | ||
426 | if (!kstriped) { | ||
427 | DMERR("failed to create workqueue kstriped"); | ||
428 | dm_unregister_target(&stripe_target); | ||
429 | return -ENOMEM; | ||
430 | } | ||
431 | |||
432 | return r; | 422 | return r; |
433 | } | 423 | } |
434 | 424 | ||
435 | void dm_stripe_exit(void) | 425 | void dm_stripe_exit(void) |
436 | { | 426 | { |
437 | dm_unregister_target(&stripe_target); | 427 | dm_unregister_target(&stripe_target); |
438 | destroy_workqueue(kstriped); | ||
439 | |||
440 | return; | ||
441 | } | 428 | } |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 985c20a4f30e..dffa0ac7c4f0 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -71,6 +71,8 @@ struct dm_table { | |||
71 | void *event_context; | 71 | void *event_context; |
72 | 72 | ||
73 | struct dm_md_mempools *mempools; | 73 | struct dm_md_mempools *mempools; |
74 | |||
75 | struct list_head target_callbacks; | ||
74 | }; | 76 | }; |
75 | 77 | ||
76 | /* | 78 | /* |
@@ -204,6 +206,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, | |||
204 | return -ENOMEM; | 206 | return -ENOMEM; |
205 | 207 | ||
206 | INIT_LIST_HEAD(&t->devices); | 208 | INIT_LIST_HEAD(&t->devices); |
209 | INIT_LIST_HEAD(&t->target_callbacks); | ||
207 | atomic_set(&t->holders, 0); | 210 | atomic_set(&t->holders, 0); |
208 | t->discards_supported = 1; | 211 | t->discards_supported = 1; |
209 | 212 | ||
@@ -1225,10 +1228,17 @@ int dm_table_resume_targets(struct dm_table *t) | |||
1225 | return 0; | 1228 | return 0; |
1226 | } | 1229 | } |
1227 | 1230 | ||
1231 | void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb) | ||
1232 | { | ||
1233 | list_add(&cb->list, &t->target_callbacks); | ||
1234 | } | ||
1235 | EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks); | ||
1236 | |||
1228 | int dm_table_any_congested(struct dm_table *t, int bdi_bits) | 1237 | int dm_table_any_congested(struct dm_table *t, int bdi_bits) |
1229 | { | 1238 | { |
1230 | struct dm_dev_internal *dd; | 1239 | struct dm_dev_internal *dd; |
1231 | struct list_head *devices = dm_table_get_devices(t); | 1240 | struct list_head *devices = dm_table_get_devices(t); |
1241 | struct dm_target_callbacks *cb; | ||
1232 | int r = 0; | 1242 | int r = 0; |
1233 | 1243 | ||
1234 | list_for_each_entry(dd, devices, list) { | 1244 | list_for_each_entry(dd, devices, list) { |
@@ -1243,6 +1253,10 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) | |||
1243 | bdevname(dd->dm_dev.bdev, b)); | 1253 | bdevname(dd->dm_dev.bdev, b)); |
1244 | } | 1254 | } |
1245 | 1255 | ||
1256 | list_for_each_entry(cb, &t->target_callbacks, list) | ||
1257 | if (cb->congested_fn) | ||
1258 | r |= cb->congested_fn(cb, bdi_bits); | ||
1259 | |||
1246 | return r; | 1260 | return r; |
1247 | } | 1261 | } |
1248 | 1262 | ||
@@ -1264,6 +1278,7 @@ void dm_table_unplug_all(struct dm_table *t) | |||
1264 | { | 1278 | { |
1265 | struct dm_dev_internal *dd; | 1279 | struct dm_dev_internal *dd; |
1266 | struct list_head *devices = dm_table_get_devices(t); | 1280 | struct list_head *devices = dm_table_get_devices(t); |
1281 | struct dm_target_callbacks *cb; | ||
1267 | 1282 | ||
1268 | list_for_each_entry(dd, devices, list) { | 1283 | list_for_each_entry(dd, devices, list) { |
1269 | struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); | 1284 | struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); |
@@ -1276,6 +1291,10 @@ void dm_table_unplug_all(struct dm_table *t) | |||
1276 | dm_device_name(t->md), | 1291 | dm_device_name(t->md), |
1277 | bdevname(dd->dm_dev.bdev, b)); | 1292 | bdevname(dd->dm_dev.bdev, b)); |
1278 | } | 1293 | } |
1294 | |||
1295 | list_for_each_entry(cb, &t->target_callbacks, list) | ||
1296 | if (cb->unplug_fn) | ||
1297 | cb->unplug_fn(cb); | ||
1279 | } | 1298 | } |
1280 | 1299 | ||
1281 | struct mapped_device *dm_table_get_md(struct dm_table *t) | 1300 | struct mapped_device *dm_table_get_md(struct dm_table *t) |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f48a2f359ac4..eaa3af0e0632 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" | 32 | #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" |
33 | #define DM_COOKIE_LENGTH 24 | 33 | #define DM_COOKIE_LENGTH 24 |
34 | 34 | ||
35 | static DEFINE_MUTEX(dm_mutex); | ||
36 | static const char *_name = DM_NAME; | 35 | static const char *_name = DM_NAME; |
37 | 36 | ||
38 | static unsigned int major = 0; | 37 | static unsigned int major = 0; |
@@ -328,7 +327,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode) | |||
328 | { | 327 | { |
329 | struct mapped_device *md; | 328 | struct mapped_device *md; |
330 | 329 | ||
331 | mutex_lock(&dm_mutex); | ||
332 | spin_lock(&_minor_lock); | 330 | spin_lock(&_minor_lock); |
333 | 331 | ||
334 | md = bdev->bd_disk->private_data; | 332 | md = bdev->bd_disk->private_data; |
@@ -346,7 +344,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode) | |||
346 | 344 | ||
347 | out: | 345 | out: |
348 | spin_unlock(&_minor_lock); | 346 | spin_unlock(&_minor_lock); |
349 | mutex_unlock(&dm_mutex); | ||
350 | 347 | ||
351 | return md ? 0 : -ENXIO; | 348 | return md ? 0 : -ENXIO; |
352 | } | 349 | } |
@@ -355,10 +352,12 @@ static int dm_blk_close(struct gendisk *disk, fmode_t mode) | |||
355 | { | 352 | { |
356 | struct mapped_device *md = disk->private_data; | 353 | struct mapped_device *md = disk->private_data; |
357 | 354 | ||
358 | mutex_lock(&dm_mutex); | 355 | spin_lock(&_minor_lock); |
356 | |||
359 | atomic_dec(&md->open_count); | 357 | atomic_dec(&md->open_count); |
360 | dm_put(md); | 358 | dm_put(md); |
361 | mutex_unlock(&dm_mutex); | 359 | |
360 | spin_unlock(&_minor_lock); | ||
362 | 361 | ||
363 | return 0; | 362 | return 0; |
364 | } | 363 | } |
@@ -1638,13 +1637,15 @@ static void dm_request_fn(struct request_queue *q) | |||
1638 | if (map_request(ti, clone, md)) | 1637 | if (map_request(ti, clone, md)) |
1639 | goto requeued; | 1638 | goto requeued; |
1640 | 1639 | ||
1641 | spin_lock_irq(q->queue_lock); | 1640 | BUG_ON(!irqs_disabled()); |
1641 | spin_lock(q->queue_lock); | ||
1642 | } | 1642 | } |
1643 | 1643 | ||
1644 | goto out; | 1644 | goto out; |
1645 | 1645 | ||
1646 | requeued: | 1646 | requeued: |
1647 | spin_lock_irq(q->queue_lock); | 1647 | BUG_ON(!irqs_disabled()); |
1648 | spin_lock(q->queue_lock); | ||
1648 | 1649 | ||
1649 | plug_and_out: | 1650 | plug_and_out: |
1650 | if (!elv_queue_empty(q)) | 1651 | if (!elv_queue_empty(q)) |
@@ -1884,7 +1885,8 @@ static struct mapped_device *alloc_dev(int minor) | |||
1884 | add_disk(md->disk); | 1885 | add_disk(md->disk); |
1885 | format_dev_t(md->name, MKDEV(_major, minor)); | 1886 | format_dev_t(md->name, MKDEV(_major, minor)); |
1886 | 1887 | ||
1887 | md->wq = create_singlethread_workqueue("kdmflush"); | 1888 | md->wq = alloc_workqueue("kdmflush", |
1889 | WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); | ||
1888 | if (!md->wq) | 1890 | if (!md->wq) |
1889 | goto bad_thread; | 1891 | goto bad_thread; |
1890 | 1892 | ||
@@ -1992,13 +1994,14 @@ static void event_callback(void *context) | |||
1992 | wake_up(&md->eventq); | 1994 | wake_up(&md->eventq); |
1993 | } | 1995 | } |
1994 | 1996 | ||
1997 | /* | ||
1998 | * Protected by md->suspend_lock obtained by dm_swap_table(). | ||
1999 | */ | ||
1995 | static void __set_size(struct mapped_device *md, sector_t size) | 2000 | static void __set_size(struct mapped_device *md, sector_t size) |
1996 | { | 2001 | { |
1997 | set_capacity(md->disk, size); | 2002 | set_capacity(md->disk, size); |
1998 | 2003 | ||
1999 | mutex_lock(&md->bdev->bd_inode->i_mutex); | ||
2000 | i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); | 2004 | i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); |
2001 | mutex_unlock(&md->bdev->bd_inode->i_mutex); | ||
2002 | } | 2005 | } |
2003 | 2006 | ||
2004 | /* | 2007 | /* |