aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig23
-rw-r--r--drivers/md/Makefile7
-rw-r--r--drivers/md/dm-emc.c345
-rw-r--r--drivers/md/dm-hw-handler.c213
-rw-r--r--drivers/md/dm-hw-handler.h63
-rw-r--r--drivers/md/dm-mpath-hp-sw.c247
-rw-r--r--drivers/md/dm-mpath-rdac.c700
-rw-r--r--drivers/md/dm-mpath.c163
-rw-r--r--drivers/md/dm-mpath.h1
-rw-r--r--drivers/md/linear.c10
-rw-r--r--drivers/md/raid0.c10
-rw-r--r--drivers/md/raid10.c15
-rw-r--r--drivers/md/raid5.c10
13 files changed, 122 insertions, 1685 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 610af916891e..07d92c11b5d8 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -252,27 +252,14 @@ config DM_ZERO
252config DM_MULTIPATH 252config DM_MULTIPATH
253 tristate "Multipath target" 253 tristate "Multipath target"
254 depends on BLK_DEV_DM 254 depends on BLK_DEV_DM
255 # nasty syntax but means make DM_MULTIPATH independent
256 # of SCSI_DH if the latter isn't defined but if
257 # it is, DM_MULTIPATH must depend on it. We get a build
258 # error if SCSI_DH=m and DM_MULTIPATH=y
259 depends on SCSI_DH || !SCSI_DH
255 ---help--- 260 ---help---
256 Allow volume managers to support multipath hardware. 261 Allow volume managers to support multipath hardware.
257 262
258config DM_MULTIPATH_EMC
259 tristate "EMC CX/AX multipath support"
260 depends on DM_MULTIPATH && BLK_DEV_DM
261 ---help---
262 Multipath support for EMC CX/AX series hardware.
263
264config DM_MULTIPATH_RDAC
265 tristate "LSI/Engenio RDAC multipath support (EXPERIMENTAL)"
266 depends on DM_MULTIPATH && BLK_DEV_DM && SCSI && EXPERIMENTAL
267 ---help---
268 Multipath support for LSI/Engenio RDAC.
269
270config DM_MULTIPATH_HP
271 tristate "HP MSA multipath support (EXPERIMENTAL)"
272 depends on DM_MULTIPATH && BLK_DEV_DM && SCSI && EXPERIMENTAL
273 ---help---
274 Multipath support for HP MSA (Active/Passive) series hardware.
275
276config DM_DELAY 263config DM_DELAY
277 tristate "I/O delaying target (EXPERIMENTAL)" 264 tristate "I/O delaying target (EXPERIMENTAL)"
278 depends on BLK_DEV_DM && EXPERIMENTAL 265 depends on BLK_DEV_DM && EXPERIMENTAL
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 7be09eeea293..f1ef33dfd8cf 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -4,11 +4,9 @@
4 4
5dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ 5dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
6 dm-ioctl.o dm-io.o dm-kcopyd.o 6 dm-ioctl.o dm-io.o dm-kcopyd.o
7dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o 7dm-multipath-objs := dm-path-selector.o dm-mpath.o
8dm-snapshot-objs := dm-snap.o dm-exception-store.o 8dm-snapshot-objs := dm-snap.o dm-exception-store.o
9dm-mirror-objs := dm-raid1.o 9dm-mirror-objs := dm-raid1.o
10dm-rdac-objs := dm-mpath-rdac.o
11dm-hp-sw-objs := dm-mpath-hp-sw.o
12md-mod-objs := md.o bitmap.o 10md-mod-objs := md.o bitmap.o
13raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ 11raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
14 raid6int1.o raid6int2.o raid6int4.o \ 12 raid6int1.o raid6int2.o raid6int4.o \
@@ -35,9 +33,6 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
35obj-$(CONFIG_DM_CRYPT) += dm-crypt.o 33obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
36obj-$(CONFIG_DM_DELAY) += dm-delay.o 34obj-$(CONFIG_DM_DELAY) += dm-delay.o
37obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o 35obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
38obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o
39obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o
40obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o
41obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o 36obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
42obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o 37obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o
43obj-$(CONFIG_DM_ZERO) += dm-zero.o 38obj-$(CONFIG_DM_ZERO) += dm-zero.o
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
deleted file mode 100644
index 3ea5ad4b7805..000000000000
--- a/drivers/md/dm-emc.c
+++ /dev/null
@@ -1,345 +0,0 @@
1/*
2 * Copyright (C) 2004 SUSE LINUX Products GmbH. All rights reserved.
3 * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 *
7 * Multipath support for EMC CLARiiON AX/CX-series hardware.
8 */
9
10#include "dm.h"
11#include "dm-hw-handler.h"
12#include <scsi/scsi.h>
13#include <scsi/scsi_cmnd.h>
14
15#define DM_MSG_PREFIX "multipath emc"
16
17struct emc_handler {
18 spinlock_t lock;
19
20 /* Whether we should send the short trespass command (FC-series)
21 * or the long version (default for AX/CX CLARiiON arrays). */
22 unsigned short_trespass;
23 /* Whether or not to honor SCSI reservations when initiating a
24 * switch-over. Default: Don't. */
25 unsigned hr;
26
27 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
28};
29
30#define TRESPASS_PAGE 0x22
31#define EMC_FAILOVER_TIMEOUT (60 * HZ)
32
33/* Code borrowed from dm-lsi-rdac by Mike Christie */
34
35static inline void free_bio(struct bio *bio)
36{
37 __free_page(bio->bi_io_vec[0].bv_page);
38 bio_put(bio);
39}
40
41static void emc_endio(struct bio *bio, int error)
42{
43 struct dm_path *path = bio->bi_private;
44
45 /* We also need to look at the sense keys here whether or not to
46 * switch to the next PG etc.
47 *
48 * For now simple logic: either it works or it doesn't.
49 */
50 if (error)
51 dm_pg_init_complete(path, MP_FAIL_PATH);
52 else
53 dm_pg_init_complete(path, 0);
54
55 /* request is freed in block layer */
56 free_bio(bio);
57}
58
59static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size)
60{
61 struct bio *bio;
62 struct page *page;
63
64 bio = bio_alloc(GFP_ATOMIC, 1);
65 if (!bio) {
66 DMERR("get_failover_bio: bio_alloc() failed.");
67 return NULL;
68 }
69
70 bio->bi_rw |= (1 << BIO_RW);
71 bio->bi_bdev = path->dev->bdev;
72 bio->bi_sector = 0;
73 bio->bi_private = path;
74 bio->bi_end_io = emc_endio;
75
76 page = alloc_page(GFP_ATOMIC);
77 if (!page) {
78 DMERR("get_failover_bio: alloc_page() failed.");
79 bio_put(bio);
80 return NULL;
81 }
82
83 if (bio_add_page(bio, page, data_size, 0) != data_size) {
84 DMERR("get_failover_bio: bio_add_page() failed.");
85 __free_page(page);
86 bio_put(bio);
87 return NULL;
88 }
89
90 return bio;
91}
92
93static struct request *get_failover_req(struct emc_handler *h,
94 struct bio *bio, struct dm_path *path)
95{
96 struct request *rq;
97 struct block_device *bdev = bio->bi_bdev;
98 struct request_queue *q = bdev_get_queue(bdev);
99
100 /* FIXME: Figure out why it fails with GFP_ATOMIC. */
101 rq = blk_get_request(q, WRITE, __GFP_WAIT);
102 if (!rq) {
103 DMERR("get_failover_req: blk_get_request failed");
104 return NULL;
105 }
106
107 blk_rq_append_bio(q, rq, bio);
108
109 rq->sense = h->sense;
110 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
111 rq->sense_len = 0;
112
113 rq->timeout = EMC_FAILOVER_TIMEOUT;
114 rq->cmd_type = REQ_TYPE_BLOCK_PC;
115 rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
116
117 return rq;
118}
119
120static struct request *emc_trespass_get(struct emc_handler *h,
121 struct dm_path *path)
122{
123 struct bio *bio;
124 struct request *rq;
125 unsigned char *page22;
126 unsigned char long_trespass_pg[] = {
127 0, 0, 0, 0,
128 TRESPASS_PAGE, /* Page code */
129 0x09, /* Page length - 2 */
130 h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */
131 0xff, 0xff, /* Trespass target */
132 0, 0, 0, 0, 0, 0 /* Reserved bytes / unknown */
133 };
134 unsigned char short_trespass_pg[] = {
135 0, 0, 0, 0,
136 TRESPASS_PAGE, /* Page code */
137 0x02, /* Page length - 2 */
138 h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */
139 0xff, /* Trespass target */
140 };
141 unsigned data_size = h->short_trespass ? sizeof(short_trespass_pg) :
142 sizeof(long_trespass_pg);
143
144 /* get bio backing */
145 if (data_size > PAGE_SIZE)
146 /* this should never happen */
147 return NULL;
148
149 bio = get_failover_bio(path, data_size);
150 if (!bio) {
151 DMERR("emc_trespass_get: no bio");
152 return NULL;
153 }
154
155 page22 = (unsigned char *)bio_data(bio);
156 memset(page22, 0, data_size);
157
158 memcpy(page22, h->short_trespass ?
159 short_trespass_pg : long_trespass_pg, data_size);
160
161 /* get request for block layer packet command */
162 rq = get_failover_req(h, bio, path);
163 if (!rq) {
164 DMERR("emc_trespass_get: no rq");
165 free_bio(bio);
166 return NULL;
167 }
168
169 /* Prepare the command. */
170 rq->cmd[0] = MODE_SELECT;
171 rq->cmd[1] = 0x10;
172 rq->cmd[4] = data_size;
173 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
174
175 return rq;
176}
177
178static void emc_pg_init(struct hw_handler *hwh, unsigned bypassed,
179 struct dm_path *path)
180{
181 struct request *rq;
182 struct request_queue *q = bdev_get_queue(path->dev->bdev);
183
184 /*
185 * We can either blindly init the pg (then look at the sense),
186 * or we can send some commands to get the state here (then
187 * possibly send the fo cmnd), or we can also have the
188 * initial state passed into us and then get an update here.
189 */
190 if (!q) {
191 DMINFO("emc_pg_init: no queue");
192 goto fail_path;
193 }
194
195 /* FIXME: The request should be pre-allocated. */
196 rq = emc_trespass_get(hwh->context, path);
197 if (!rq) {
198 DMERR("emc_pg_init: no rq");
199 goto fail_path;
200 }
201
202 DMINFO("emc_pg_init: sending switch-over command");
203 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1);
204 return;
205
206fail_path:
207 dm_pg_init_complete(path, MP_FAIL_PATH);
208}
209
210static struct emc_handler *alloc_emc_handler(void)
211{
212 struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL);
213
214 if (h)
215 spin_lock_init(&h->lock);
216
217 return h;
218}
219
220static int emc_create(struct hw_handler *hwh, unsigned argc, char **argv)
221{
222 struct emc_handler *h;
223 unsigned hr, short_trespass;
224
225 if (argc == 0) {
226 /* No arguments: use defaults */
227 hr = 0;
228 short_trespass = 0;
229 } else if (argc != 2) {
230 DMWARN("incorrect number of arguments");
231 return -EINVAL;
232 } else {
233 if ((sscanf(argv[0], "%u", &short_trespass) != 1)
234 || (short_trespass > 1)) {
235 DMWARN("invalid trespass mode selected");
236 return -EINVAL;
237 }
238
239 if ((sscanf(argv[1], "%u", &hr) != 1)
240 || (hr > 1)) {
241 DMWARN("invalid honor reservation flag selected");
242 return -EINVAL;
243 }
244 }
245
246 h = alloc_emc_handler();
247 if (!h)
248 return -ENOMEM;
249
250 hwh->context = h;
251
252 if ((h->short_trespass = short_trespass))
253 DMWARN("short trespass command will be send");
254 else
255 DMWARN("long trespass command will be send");
256
257 if ((h->hr = hr))
258 DMWARN("honor reservation bit will be set");
259 else
260 DMWARN("honor reservation bit will not be set (default)");
261
262 return 0;
263}
264
265static void emc_destroy(struct hw_handler *hwh)
266{
267 struct emc_handler *h = (struct emc_handler *) hwh->context;
268
269 kfree(h);
270 hwh->context = NULL;
271}
272
273static unsigned emc_error(struct hw_handler *hwh, struct bio *bio)
274{
275 /* FIXME: Patch from axboe still missing */
276#if 0
277 int sense;
278
279 if (bio->bi_error & BIO_SENSE) {
280 sense = bio->bi_error & 0xffffff; /* sense key / asc / ascq */
281
282 if (sense == 0x020403) {
283 /* LUN Not Ready - Manual Intervention Required
284 * indicates this is a passive path.
285 *
286 * FIXME: However, if this is seen and EVPD C0
287 * indicates that this is due to a NDU in
288 * progress, we should set FAIL_PATH too.
289 * This indicates we might have to do a SCSI
290 * inquiry in the end_io path. Ugh. */
291 return MP_BYPASS_PG | MP_RETRY_IO;
292 } else if (sense == 0x052501) {
293 /* An array based copy is in progress. Do not
294 * fail the path, do not bypass to another PG,
295 * do not retry. Fail the IO immediately.
296 * (Actually this is the same conclusion as in
297 * the default handler, but lets make sure.) */
298 return 0;
299 } else if (sense == 0x062900) {
300 /* Unit Attention Code. This is the first IO
301 * to the new path, so just retry. */
302 return MP_RETRY_IO;
303 }
304 }
305#endif
306
307 /* Try default handler */
308 return dm_scsi_err_handler(hwh, bio);
309}
310
311static struct hw_handler_type emc_hwh = {
312 .name = "emc",
313 .module = THIS_MODULE,
314 .create = emc_create,
315 .destroy = emc_destroy,
316 .pg_init = emc_pg_init,
317 .error = emc_error,
318};
319
320static int __init dm_emc_init(void)
321{
322 int r = dm_register_hw_handler(&emc_hwh);
323
324 if (r < 0)
325 DMERR("register failed %d", r);
326
327 DMINFO("version 0.0.3 loaded");
328
329 return r;
330}
331
332static void __exit dm_emc_exit(void)
333{
334 int r = dm_unregister_hw_handler(&emc_hwh);
335
336 if (r < 0)
337 DMERR("unregister failed %d", r);
338}
339
340module_init(dm_emc_init);
341module_exit(dm_emc_exit);
342
343MODULE_DESCRIPTION(DM_NAME " EMC CX/AX/FC-family multipath");
344MODULE_AUTHOR("Lars Marowsky-Bree <lmb@suse.de>");
345MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c
deleted file mode 100644
index 2ee84d8aa0bf..000000000000
--- a/drivers/md/dm-hw-handler.c
+++ /dev/null
@@ -1,213 +0,0 @@
1/*
2 * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
3 *
4 * This file is released under the GPL.
5 *
6 * Multipath hardware handler registration.
7 */
8
9#include "dm.h"
10#include "dm-hw-handler.h"
11
12#include <linux/slab.h>
13
14struct hwh_internal {
15 struct hw_handler_type hwht;
16
17 struct list_head list;
18 long use;
19};
20
21#define hwht_to_hwhi(__hwht) container_of((__hwht), struct hwh_internal, hwht)
22
23static LIST_HEAD(_hw_handlers);
24static DECLARE_RWSEM(_hwh_lock);
25
26static struct hwh_internal *__find_hw_handler_type(const char *name)
27{
28 struct hwh_internal *hwhi;
29
30 list_for_each_entry(hwhi, &_hw_handlers, list) {
31 if (!strcmp(name, hwhi->hwht.name))
32 return hwhi;
33 }
34
35 return NULL;
36}
37
38static struct hwh_internal *get_hw_handler(const char *name)
39{
40 struct hwh_internal *hwhi;
41
42 down_read(&_hwh_lock);
43 hwhi = __find_hw_handler_type(name);
44 if (hwhi) {
45 if ((hwhi->use == 0) && !try_module_get(hwhi->hwht.module))
46 hwhi = NULL;
47 else
48 hwhi->use++;
49 }
50 up_read(&_hwh_lock);
51
52 return hwhi;
53}
54
55struct hw_handler_type *dm_get_hw_handler(const char *name)
56{
57 struct hwh_internal *hwhi;
58
59 if (!name)
60 return NULL;
61
62 hwhi = get_hw_handler(name);
63 if (!hwhi) {
64 request_module("dm-%s", name);
65 hwhi = get_hw_handler(name);
66 }
67
68 return hwhi ? &hwhi->hwht : NULL;
69}
70
71void dm_put_hw_handler(struct hw_handler_type *hwht)
72{
73 struct hwh_internal *hwhi;
74
75 if (!hwht)
76 return;
77
78 down_read(&_hwh_lock);
79 hwhi = __find_hw_handler_type(hwht->name);
80 if (!hwhi)
81 goto out;
82
83 if (--hwhi->use == 0)
84 module_put(hwhi->hwht.module);
85
86 BUG_ON(hwhi->use < 0);
87
88 out:
89 up_read(&_hwh_lock);
90}
91
92static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht)
93{
94 struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL);
95
96 if (hwhi)
97 hwhi->hwht = *hwht;
98
99 return hwhi;
100}
101
102int dm_register_hw_handler(struct hw_handler_type *hwht)
103{
104 int r = 0;
105 struct hwh_internal *hwhi = _alloc_hw_handler(hwht);
106
107 if (!hwhi)
108 return -ENOMEM;
109
110 down_write(&_hwh_lock);
111
112 if (__find_hw_handler_type(hwht->name)) {
113 kfree(hwhi);
114 r = -EEXIST;
115 } else
116 list_add(&hwhi->list, &_hw_handlers);
117
118 up_write(&_hwh_lock);
119
120 return r;
121}
122
123int dm_unregister_hw_handler(struct hw_handler_type *hwht)
124{
125 struct hwh_internal *hwhi;
126
127 down_write(&_hwh_lock);
128
129 hwhi = __find_hw_handler_type(hwht->name);
130 if (!hwhi) {
131 up_write(&_hwh_lock);
132 return -EINVAL;
133 }
134
135 if (hwhi->use) {
136 up_write(&_hwh_lock);
137 return -ETXTBSY;
138 }
139
140 list_del(&hwhi->list);
141
142 up_write(&_hwh_lock);
143
144 kfree(hwhi);
145
146 return 0;
147}
148
149unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio)
150{
151#if 0
152 int sense_key, asc, ascq;
153
154 if (bio->bi_error & BIO_SENSE) {
155 /* FIXME: This is just an initial guess. */
156 /* key / asc / ascq */
157 sense_key = (bio->bi_error >> 16) & 0xff;
158 asc = (bio->bi_error >> 8) & 0xff;
159 ascq = bio->bi_error & 0xff;
160
161 switch (sense_key) {
162 /* This block as a whole comes from the device.
163 * So no point retrying on another path. */
164 case 0x03: /* Medium error */
165 case 0x05: /* Illegal request */
166 case 0x07: /* Data protect */
167 case 0x08: /* Blank check */
168 case 0x0a: /* copy aborted */
169 case 0x0c: /* obsolete - no clue ;-) */
170 case 0x0d: /* volume overflow */
171 case 0x0e: /* data miscompare */
172 case 0x0f: /* reserved - no idea either. */
173 return MP_ERROR_IO;
174
175 /* For these errors it's unclear whether they
176 * come from the device or the controller.
177 * So just lets try a different path, and if
178 * it eventually succeeds, user-space will clear
179 * the paths again... */
180 case 0x02: /* Not ready */
181 case 0x04: /* Hardware error */
182 case 0x09: /* vendor specific */
183 case 0x0b: /* Aborted command */
184 return MP_FAIL_PATH;
185
186 case 0x06: /* Unit attention - might want to decode */
187 if (asc == 0x04 && ascq == 0x01)
188 /* "Unit in the process of
189 * becoming ready" */
190 return 0;
191 return MP_FAIL_PATH;
192
193 /* FIXME: For Unit Not Ready we may want
194 * to have a generic pg activation
195 * feature (START_UNIT). */
196
197 /* Should these two ever end up in the
198 * error path? I don't think so. */
199 case 0x00: /* No sense */
200 case 0x01: /* Recovered error */
201 return 0;
202 }
203 }
204#endif
205
206 /* We got no idea how to decode the other kinds of errors ->
207 * assume generic error condition. */
208 return MP_FAIL_PATH;
209}
210
211EXPORT_SYMBOL_GPL(dm_register_hw_handler);
212EXPORT_SYMBOL_GPL(dm_unregister_hw_handler);
213EXPORT_SYMBOL_GPL(dm_scsi_err_handler);
diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h
deleted file mode 100644
index 46809dcb121a..000000000000
--- a/drivers/md/dm-hw-handler.h
+++ /dev/null
@@ -1,63 +0,0 @@
1/*
2 * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
3 *
4 * This file is released under the GPL.
5 *
6 * Multipath hardware handler registration.
7 */
8
9#ifndef DM_HW_HANDLER_H
10#define DM_HW_HANDLER_H
11
12#include <linux/device-mapper.h>
13
14#include "dm-mpath.h"
15
16struct hw_handler_type;
17struct hw_handler {
18 struct hw_handler_type *type;
19 struct mapped_device *md;
20 void *context;
21};
22
23/*
24 * Constructs a hardware handler object, takes custom arguments
25 */
26/* Information about a hardware handler type */
27struct hw_handler_type {
28 char *name;
29 struct module *module;
30
31 int (*create) (struct hw_handler *handler, unsigned int argc,
32 char **argv);
33 void (*destroy) (struct hw_handler *hwh);
34
35 void (*pg_init) (struct hw_handler *hwh, unsigned bypassed,
36 struct dm_path *path);
37 unsigned (*error) (struct hw_handler *hwh, struct bio *bio);
38 int (*status) (struct hw_handler *hwh, status_type_t type,
39 char *result, unsigned int maxlen);
40};
41
42/* Register a hardware handler */
43int dm_register_hw_handler(struct hw_handler_type *type);
44
45/* Unregister a hardware handler */
46int dm_unregister_hw_handler(struct hw_handler_type *type);
47
48/* Returns a registered hardware handler type */
49struct hw_handler_type *dm_get_hw_handler(const char *name);
50
51/* Releases a hardware handler */
52void dm_put_hw_handler(struct hw_handler_type *hwht);
53
54/* Default err function */
55unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio);
56
57/* Error flags for err and dm_pg_init_complete */
58#define MP_FAIL_PATH 1
59#define MP_BYPASS_PG 2
60#define MP_ERROR_IO 4 /* Don't retry this I/O */
61#define MP_RETRY 8
62
63#endif
diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c
deleted file mode 100644
index b63a0ab37c53..000000000000
--- a/drivers/md/dm-mpath-hp-sw.c
+++ /dev/null
@@ -1,247 +0,0 @@
1/*
2 * Copyright (C) 2005 Mike Christie, All rights reserved.
3 * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
4 * Authors: Mike Christie
5 * Dave Wysochanski
6 *
7 * This file is released under the GPL.
8 *
9 * This module implements the specific path activation code for
10 * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive)
11 * storage arrays.
12 * These storage arrays have controller-based failover, not
13 * LUN-based failover. However, LUN-based failover is the design
14 * of dm-multipath. Thus, this module is written for LUN-based failover.
15 */
16#include <linux/blkdev.h>
17#include <linux/list.h>
18#include <linux/types.h>
19#include <scsi/scsi.h>
20#include <scsi/scsi_cmnd.h>
21#include <scsi/scsi_dbg.h>
22
23#include "dm.h"
24#include "dm-hw-handler.h"
25
26#define DM_MSG_PREFIX "multipath hp-sw"
27#define DM_HP_HWH_NAME "hp-sw"
28#define DM_HP_HWH_VER "1.0.0"
29
30struct hp_sw_context {
31 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
32};
33
34/*
35 * hp_sw_error_is_retryable - Is an HP-specific check condition retryable?
36 * @req: path activation request
37 *
38 * Examine error codes of request and determine whether the error is retryable.
39 * Some error codes are already retried by scsi-ml (see
40 * scsi_decide_disposition), but some HP specific codes are not.
41 * The intent of this routine is to supply the logic for the HP specific
42 * check conditions.
43 *
44 * Returns:
45 * 1 - command completed with retryable error
46 * 0 - command completed with non-retryable error
47 *
48 * Possible optimizations
49 * 1. More hardware-specific error codes
50 */
51static int hp_sw_error_is_retryable(struct request *req)
52{
53 /*
54 * NOT_READY is known to be retryable
55 * For now we just dump out the sense data and call it retryable
56 */
57 if (status_byte(req->errors) == CHECK_CONDITION)
58 __scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len);
59
60 /*
61 * At this point we don't have complete information about all the error
62 * codes from this hardware, so we are just conservative and retry
63 * when in doubt.
64 */
65 return 1;
66}
67
68/*
69 * hp_sw_end_io - Completion handler for HP path activation.
70 * @req: path activation request
71 * @error: scsi-ml error
72 *
73 * Check sense data, free request structure, and notify dm that
74 * pg initialization has completed.
75 *
76 * Context: scsi-ml softirq
77 *
78 */
79static void hp_sw_end_io(struct request *req, int error)
80{
81 struct dm_path *path = req->end_io_data;
82 unsigned err_flags = 0;
83
84 if (!error) {
85 DMDEBUG("%s path activation command - success",
86 path->dev->name);
87 goto out;
88 }
89
90 if (hp_sw_error_is_retryable(req)) {
91 DMDEBUG("%s path activation command - retry",
92 path->dev->name);
93 err_flags = MP_RETRY;
94 goto out;
95 }
96
97 DMWARN("%s path activation fail - error=0x%x",
98 path->dev->name, error);
99 err_flags = MP_FAIL_PATH;
100
101out:
102 req->end_io_data = NULL;
103 __blk_put_request(req->q, req);
104 dm_pg_init_complete(path, err_flags);
105}
106
107/*
108 * hp_sw_get_request - Allocate an HP specific path activation request
109 * @path: path on which request will be sent (needed for request queue)
110 *
111 * The START command is used for path activation request.
112 * These arrays are controller-based failover, not LUN based.
113 * One START command issued to a single path will fail over all
114 * LUNs for the same controller.
115 *
116 * Possible optimizations
117 * 1. Make timeout configurable
118 * 2. Preallocate request
119 */
120static struct request *hp_sw_get_request(struct dm_path *path)
121{
122 struct request *req;
123 struct block_device *bdev = path->dev->bdev;
124 struct request_queue *q = bdev_get_queue(bdev);
125 struct hp_sw_context *h = path->hwhcontext;
126
127 req = blk_get_request(q, WRITE, GFP_NOIO);
128 if (!req)
129 goto out;
130
131 req->timeout = 60 * HZ;
132
133 req->errors = 0;
134 req->cmd_type = REQ_TYPE_BLOCK_PC;
135 req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
136 req->end_io_data = path;
137 req->sense = h->sense;
138 memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
139
140 req->cmd[0] = START_STOP;
141 req->cmd[4] = 1;
142 req->cmd_len = COMMAND_SIZE(req->cmd[0]);
143
144out:
145 return req;
146}
147
148/*
149 * hp_sw_pg_init - HP path activation implementation.
150 * @hwh: hardware handler specific data
151 * @bypassed: unused; is the path group bypassed? (see dm-mpath.c)
152 * @path: path to send initialization command
153 *
154 * Send an HP-specific path activation command on 'path'.
155 * Do not try to optimize in any way, just send the activation command.
156 * More than one path activation command may be sent to the same controller.
157 * This seems to work fine for basic failover support.
158 *
159 * Possible optimizations
160 * 1. Detect an in-progress activation request and avoid submitting another one
161 * 2. Model the controller and only send a single activation request at a time
162 * 3. Determine the state of a path before sending an activation request
163 *
164 * Context: kmpathd (see process_queued_ios() in dm-mpath.c)
165 */
166static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed,
167 struct dm_path *path)
168{
169 struct request *req;
170 struct hp_sw_context *h;
171
172 path->hwhcontext = hwh->context;
173 h = hwh->context;
174
175 req = hp_sw_get_request(path);
176 if (!req) {
177 DMERR("%s path activation command - allocation fail",
178 path->dev->name);
179 goto retry;
180 }
181
182 DMDEBUG("%s path activation command - sent", path->dev->name);
183
184 blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io);
185 return;
186
187retry:
188 dm_pg_init_complete(path, MP_RETRY);
189}
190
191static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv)
192{
193 struct hp_sw_context *h;
194
195 h = kmalloc(sizeof(*h), GFP_KERNEL);
196 if (!h)
197 return -ENOMEM;
198
199 hwh->context = h;
200
201 return 0;
202}
203
204static void hp_sw_destroy(struct hw_handler *hwh)
205{
206 struct hp_sw_context *h = hwh->context;
207
208 kfree(h);
209}
210
211static struct hw_handler_type hp_sw_hwh = {
212 .name = DM_HP_HWH_NAME,
213 .module = THIS_MODULE,
214 .create = hp_sw_create,
215 .destroy = hp_sw_destroy,
216 .pg_init = hp_sw_pg_init,
217};
218
219static int __init hp_sw_init(void)
220{
221 int r;
222
223 r = dm_register_hw_handler(&hp_sw_hwh);
224 if (r < 0)
225 DMERR("register failed %d", r);
226 else
227 DMINFO("version " DM_HP_HWH_VER " loaded");
228
229 return r;
230}
231
232static void __exit hp_sw_exit(void)
233{
234 int r;
235
236 r = dm_unregister_hw_handler(&hp_sw_hwh);
237 if (r < 0)
238 DMERR("unregister failed %d", r);
239}
240
241module_init(hp_sw_init);
242module_exit(hp_sw_exit);
243
244MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support");
245MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>");
246MODULE_LICENSE("GPL");
247MODULE_VERSION(DM_HP_HWH_VER);
diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c
deleted file mode 100644
index 95e77734880a..000000000000
--- a/drivers/md/dm-mpath-rdac.c
+++ /dev/null
@@ -1,700 +0,0 @@
1/*
2 * Engenio/LSI RDAC DM HW handler
3 *
4 * Copyright (C) 2005 Mike Christie. All rights reserved.
5 * Copyright (C) Chandra Seetharaman, IBM Corp. 2007
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 *
21 */
22#include <scsi/scsi.h>
23#include <scsi/scsi_cmnd.h>
24#include <scsi/scsi_eh.h>
25
26#define DM_MSG_PREFIX "multipath rdac"
27
28#include "dm.h"
29#include "dm-hw-handler.h"
30
31#define RDAC_DM_HWH_NAME "rdac"
32#define RDAC_DM_HWH_VER "0.4"
33
34/*
35 * LSI mode page stuff
36 *
37 * These struct definitions and the forming of the
38 * mode page were taken from the LSI RDAC 2.4 GPL'd
39 * driver, and then converted to Linux conventions.
40 */
41#define RDAC_QUIESCENCE_TIME 20;
42/*
43 * Page Codes
44 */
45#define RDAC_PAGE_CODE_REDUNDANT_CONTROLLER 0x2c
46
47/*
48 * Controller modes definitions
49 */
50#define RDAC_MODE_TRANSFER_ALL_LUNS 0x01
51#define RDAC_MODE_TRANSFER_SPECIFIED_LUNS 0x02
52
53/*
54 * RDAC Options field
55 */
56#define RDAC_FORCED_QUIESENCE 0x02
57
58#define RDAC_FAILOVER_TIMEOUT (60 * HZ)
59
60struct rdac_mode_6_hdr {
61 u8 data_len;
62 u8 medium_type;
63 u8 device_params;
64 u8 block_desc_len;
65};
66
67struct rdac_mode_10_hdr {
68 u16 data_len;
69 u8 medium_type;
70 u8 device_params;
71 u16 reserved;
72 u16 block_desc_len;
73};
74
75struct rdac_mode_common {
76 u8 controller_serial[16];
77 u8 alt_controller_serial[16];
78 u8 rdac_mode[2];
79 u8 alt_rdac_mode[2];
80 u8 quiescence_timeout;
81 u8 rdac_options;
82};
83
84struct rdac_pg_legacy {
85 struct rdac_mode_6_hdr hdr;
86 u8 page_code;
87 u8 page_len;
88 struct rdac_mode_common common;
89#define MODE6_MAX_LUN 32
90 u8 lun_table[MODE6_MAX_LUN];
91 u8 reserved2[32];
92 u8 reserved3;
93 u8 reserved4;
94};
95
96struct rdac_pg_expanded {
97 struct rdac_mode_10_hdr hdr;
98 u8 page_code;
99 u8 subpage_code;
100 u8 page_len[2];
101 struct rdac_mode_common common;
102 u8 lun_table[256];
103 u8 reserved3;
104 u8 reserved4;
105};
106
107struct c9_inquiry {
108 u8 peripheral_info;
109 u8 page_code; /* 0xC9 */
110 u8 reserved1;
111 u8 page_len;
112 u8 page_id[4]; /* "vace" */
113 u8 avte_cvp;
114 u8 path_prio;
115 u8 reserved2[38];
116};
117
118#define SUBSYS_ID_LEN 16
119#define SLOT_ID_LEN 2
120
121struct c4_inquiry {
122 u8 peripheral_info;
123 u8 page_code; /* 0xC4 */
124 u8 reserved1;
125 u8 page_len;
126 u8 page_id[4]; /* "subs" */
127 u8 subsys_id[SUBSYS_ID_LEN];
128 u8 revision[4];
129 u8 slot_id[SLOT_ID_LEN];
130 u8 reserved[2];
131};
132
133struct rdac_controller {
134 u8 subsys_id[SUBSYS_ID_LEN];
135 u8 slot_id[SLOT_ID_LEN];
136 int use_10_ms;
137 struct kref kref;
138 struct list_head node; /* list of all controllers */
139 spinlock_t lock;
140 int submitted;
141 struct list_head cmd_list; /* list of commands to be submitted */
142 union {
143 struct rdac_pg_legacy legacy;
144 struct rdac_pg_expanded expanded;
145 } mode_select;
146};
147struct c8_inquiry {
148 u8 peripheral_info;
149 u8 page_code; /* 0xC8 */
150 u8 reserved1;
151 u8 page_len;
152 u8 page_id[4]; /* "edid" */
153 u8 reserved2[3];
154 u8 vol_uniq_id_len;
155 u8 vol_uniq_id[16];
156 u8 vol_user_label_len;
157 u8 vol_user_label[60];
158 u8 array_uniq_id_len;
159 u8 array_unique_id[16];
160 u8 array_user_label_len;
161 u8 array_user_label[60];
162 u8 lun[8];
163};
164
165struct c2_inquiry {
166 u8 peripheral_info;
167 u8 page_code; /* 0xC2 */
168 u8 reserved1;
169 u8 page_len;
170 u8 page_id[4]; /* "swr4" */
171 u8 sw_version[3];
172 u8 sw_date[3];
173 u8 features_enabled;
174 u8 max_lun_supported;
175 u8 partitions[239]; /* Total allocation length should be 0xFF */
176};
177
178struct rdac_handler {
179 struct list_head entry; /* list waiting to submit MODE SELECT */
180 unsigned timeout;
181 struct rdac_controller *ctlr;
182#define UNINITIALIZED_LUN (1 << 8)
183 unsigned lun;
184 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
185 struct dm_path *path;
186 struct work_struct work;
187#define SEND_C2_INQUIRY 1
188#define SEND_C4_INQUIRY 2
189#define SEND_C8_INQUIRY 3
190#define SEND_C9_INQUIRY 4
191#define SEND_MODE_SELECT 5
192 int cmd_to_send;
193 union {
194 struct c2_inquiry c2;
195 struct c4_inquiry c4;
196 struct c8_inquiry c8;
197 struct c9_inquiry c9;
198 } inq;
199};
200
201static LIST_HEAD(ctlr_list);
202static DEFINE_SPINLOCK(list_lock);
203static struct workqueue_struct *rdac_wkqd;
204
205static inline int had_failures(struct request *req, int error)
206{
207 return (error || host_byte(req->errors) != DID_OK ||
208 msg_byte(req->errors) != COMMAND_COMPLETE);
209}
210
211static void rdac_resubmit_all(struct rdac_handler *h)
212{
213 struct rdac_controller *ctlr = h->ctlr;
214 struct rdac_handler *tmp, *h1;
215
216 spin_lock(&ctlr->lock);
217 list_for_each_entry_safe(h1, tmp, &ctlr->cmd_list, entry) {
218 h1->cmd_to_send = SEND_C9_INQUIRY;
219 queue_work(rdac_wkqd, &h1->work);
220 list_del(&h1->entry);
221 }
222 ctlr->submitted = 0;
223 spin_unlock(&ctlr->lock);
224}
225
226static void mode_select_endio(struct request *req, int error)
227{
228 struct rdac_handler *h = req->end_io_data;
229 struct scsi_sense_hdr sense_hdr;
230 int sense = 0, fail = 0;
231
232 if (had_failures(req, error)) {
233 fail = 1;
234 goto failed;
235 }
236
237 if (status_byte(req->errors) == CHECK_CONDITION) {
238 scsi_normalize_sense(req->sense, SCSI_SENSE_BUFFERSIZE,
239 &sense_hdr);
240 sense = (sense_hdr.sense_key << 16) | (sense_hdr.asc << 8) |
241 sense_hdr.ascq;
242 /* If it is retryable failure, submit the c9 inquiry again */
243 if (sense == 0x59136 || sense == 0x68b02 || sense == 0xb8b02 ||
244 sense == 0x62900) {
245 /* 0x59136 - Command lock contention
246 * 0x[6b]8b02 - Quiesense in progress or achieved
247 * 0x62900 - Power On, Reset, or Bus Device Reset
248 */
249 h->cmd_to_send = SEND_C9_INQUIRY;
250 queue_work(rdac_wkqd, &h->work);
251 goto done;
252 }
253 if (sense)
254 DMINFO("MODE_SELECT failed on %s with sense 0x%x",
255 h->path->dev->name, sense);
256 }
257failed:
258 if (fail || sense)
259 dm_pg_init_complete(h->path, MP_FAIL_PATH);
260 else
261 dm_pg_init_complete(h->path, 0);
262
263done:
264 rdac_resubmit_all(h);
265 __blk_put_request(req->q, req);
266}
267
268static struct request *get_rdac_req(struct rdac_handler *h,
269 void *buffer, unsigned buflen, int rw)
270{
271 struct request *rq;
272 struct request_queue *q = bdev_get_queue(h->path->dev->bdev);
273
274 rq = blk_get_request(q, rw, GFP_KERNEL);
275
276 if (!rq) {
277 DMINFO("get_rdac_req: blk_get_request failed");
278 return NULL;
279 }
280
281 if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_KERNEL)) {
282 blk_put_request(rq);
283 DMINFO("get_rdac_req: blk_rq_map_kern failed");
284 return NULL;
285 }
286
287 rq->sense = h->sense;
288 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
289 rq->sense_len = 0;
290
291 rq->end_io_data = h;
292 rq->timeout = h->timeout;
293 rq->cmd_type = REQ_TYPE_BLOCK_PC;
294 rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
295 return rq;
296}
297
298static struct request *rdac_failover_get(struct rdac_handler *h)
299{
300 struct request *rq;
301 struct rdac_mode_common *common;
302 unsigned data_size;
303
304 if (h->ctlr->use_10_ms) {
305 struct rdac_pg_expanded *rdac_pg;
306
307 data_size = sizeof(struct rdac_pg_expanded);
308 rdac_pg = &h->ctlr->mode_select.expanded;
309 memset(rdac_pg, 0, data_size);
310 common = &rdac_pg->common;
311 rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40;
312 rdac_pg->subpage_code = 0x1;
313 rdac_pg->page_len[0] = 0x01;
314 rdac_pg->page_len[1] = 0x28;
315 rdac_pg->lun_table[h->lun] = 0x81;
316 } else {
317 struct rdac_pg_legacy *rdac_pg;
318
319 data_size = sizeof(struct rdac_pg_legacy);
320 rdac_pg = &h->ctlr->mode_select.legacy;
321 memset(rdac_pg, 0, data_size);
322 common = &rdac_pg->common;
323 rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER;
324 rdac_pg->page_len = 0x68;
325 rdac_pg->lun_table[h->lun] = 0x81;
326 }
327 common->rdac_mode[1] = RDAC_MODE_TRANSFER_SPECIFIED_LUNS;
328 common->quiescence_timeout = RDAC_QUIESCENCE_TIME;
329 common->rdac_options = RDAC_FORCED_QUIESENCE;
330
331 /* get request for block layer packet command */
332 rq = get_rdac_req(h, &h->ctlr->mode_select, data_size, WRITE);
333 if (!rq) {
334 DMERR("rdac_failover_get: no rq");
335 return NULL;
336 }
337
338 /* Prepare the command. */
339 if (h->ctlr->use_10_ms) {
340 rq->cmd[0] = MODE_SELECT_10;
341 rq->cmd[7] = data_size >> 8;
342 rq->cmd[8] = data_size & 0xff;
343 } else {
344 rq->cmd[0] = MODE_SELECT;
345 rq->cmd[4] = data_size;
346 }
347 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
348
349 return rq;
350}
351
352/* Acquires h->ctlr->lock */
353static void submit_mode_select(struct rdac_handler *h)
354{
355 struct request *rq;
356 struct request_queue *q = bdev_get_queue(h->path->dev->bdev);
357
358 spin_lock(&h->ctlr->lock);
359 if (h->ctlr->submitted) {
360 list_add(&h->entry, &h->ctlr->cmd_list);
361 goto drop_lock;
362 }
363
364 if (!q) {
365 DMINFO("submit_mode_select: no queue");
366 goto fail_path;
367 }
368
369 rq = rdac_failover_get(h);
370 if (!rq) {
371 DMERR("submit_mode_select: no rq");
372 goto fail_path;
373 }
374
375 DMINFO("queueing MODE_SELECT command on %s", h->path->dev->name);
376
377 blk_execute_rq_nowait(q, NULL, rq, 1, mode_select_endio);
378 h->ctlr->submitted = 1;
379 goto drop_lock;
380fail_path:
381 dm_pg_init_complete(h->path, MP_FAIL_PATH);
382drop_lock:
383 spin_unlock(&h->ctlr->lock);
384}
385
386static void release_ctlr(struct kref *kref)
387{
388 struct rdac_controller *ctlr;
389 ctlr = container_of(kref, struct rdac_controller, kref);
390
391 spin_lock(&list_lock);
392 list_del(&ctlr->node);
393 spin_unlock(&list_lock);
394 kfree(ctlr);
395}
396
397static struct rdac_controller *get_controller(u8 *subsys_id, u8 *slot_id)
398{
399 struct rdac_controller *ctlr, *tmp;
400
401 spin_lock(&list_lock);
402
403 list_for_each_entry(tmp, &ctlr_list, node) {
404 if ((memcmp(tmp->subsys_id, subsys_id, SUBSYS_ID_LEN) == 0) &&
405 (memcmp(tmp->slot_id, slot_id, SLOT_ID_LEN) == 0)) {
406 kref_get(&tmp->kref);
407 spin_unlock(&list_lock);
408 return tmp;
409 }
410 }
411 ctlr = kmalloc(sizeof(*ctlr), GFP_ATOMIC);
412 if (!ctlr)
413 goto done;
414
415 /* initialize fields of controller */
416 memcpy(ctlr->subsys_id, subsys_id, SUBSYS_ID_LEN);
417 memcpy(ctlr->slot_id, slot_id, SLOT_ID_LEN);
418 kref_init(&ctlr->kref);
419 spin_lock_init(&ctlr->lock);
420 ctlr->submitted = 0;
421 ctlr->use_10_ms = -1;
422 INIT_LIST_HEAD(&ctlr->cmd_list);
423 list_add(&ctlr->node, &ctlr_list);
424done:
425 spin_unlock(&list_lock);
426 return ctlr;
427}
428
429static void c4_endio(struct request *req, int error)
430{
431 struct rdac_handler *h = req->end_io_data;
432 struct c4_inquiry *sp;
433
434 if (had_failures(req, error)) {
435 dm_pg_init_complete(h->path, MP_FAIL_PATH);
436 goto done;
437 }
438
439 sp = &h->inq.c4;
440
441 h->ctlr = get_controller(sp->subsys_id, sp->slot_id);
442
443 if (h->ctlr) {
444 h->cmd_to_send = SEND_C9_INQUIRY;
445 queue_work(rdac_wkqd, &h->work);
446 } else
447 dm_pg_init_complete(h->path, MP_FAIL_PATH);
448done:
449 __blk_put_request(req->q, req);
450}
451
452static void c2_endio(struct request *req, int error)
453{
454 struct rdac_handler *h = req->end_io_data;
455 struct c2_inquiry *sp;
456
457 if (had_failures(req, error)) {
458 dm_pg_init_complete(h->path, MP_FAIL_PATH);
459 goto done;
460 }
461
462 sp = &h->inq.c2;
463
464 /* If more than MODE6_MAX_LUN luns are supported, use mode select 10 */
465 if (sp->max_lun_supported >= MODE6_MAX_LUN)
466 h->ctlr->use_10_ms = 1;
467 else
468 h->ctlr->use_10_ms = 0;
469
470 h->cmd_to_send = SEND_MODE_SELECT;
471 queue_work(rdac_wkqd, &h->work);
472done:
473 __blk_put_request(req->q, req);
474}
475
476static void c9_endio(struct request *req, int error)
477{
478 struct rdac_handler *h = req->end_io_data;
479 struct c9_inquiry *sp;
480
481 if (had_failures(req, error)) {
482 dm_pg_init_complete(h->path, MP_FAIL_PATH);
483 goto done;
484 }
485
486 /* We need to look at the sense keys here to take clear action.
487 * For now simple logic: If the host is in AVT mode or if controller
488 * owns the lun, return dm_pg_init_complete(), otherwise submit
489 * MODE SELECT.
490 */
491 sp = &h->inq.c9;
492
493 /* If in AVT mode, return success */
494 if ((sp->avte_cvp >> 7) == 0x1) {
495 dm_pg_init_complete(h->path, 0);
496 goto done;
497 }
498
499 /* If the controller on this path owns the LUN, return success */
500 if (sp->avte_cvp & 0x1) {
501 dm_pg_init_complete(h->path, 0);
502 goto done;
503 }
504
505 if (h->ctlr) {
506 if (h->ctlr->use_10_ms == -1)
507 h->cmd_to_send = SEND_C2_INQUIRY;
508 else
509 h->cmd_to_send = SEND_MODE_SELECT;
510 } else
511 h->cmd_to_send = SEND_C4_INQUIRY;
512 queue_work(rdac_wkqd, &h->work);
513done:
514 __blk_put_request(req->q, req);
515}
516
517static void c8_endio(struct request *req, int error)
518{
519 struct rdac_handler *h = req->end_io_data;
520 struct c8_inquiry *sp;
521
522 if (had_failures(req, error)) {
523 dm_pg_init_complete(h->path, MP_FAIL_PATH);
524 goto done;
525 }
526
527 /* We need to look at the sense keys here to take clear action.
528 * For now simple logic: Get the lun from the inquiry page.
529 */
530 sp = &h->inq.c8;
531 h->lun = sp->lun[7]; /* currently it uses only one byte */
532 h->cmd_to_send = SEND_C9_INQUIRY;
533 queue_work(rdac_wkqd, &h->work);
534done:
535 __blk_put_request(req->q, req);
536}
537
538static void submit_inquiry(struct rdac_handler *h, int page_code,
539 unsigned int len, rq_end_io_fn endio)
540{
541 struct request *rq;
542 struct request_queue *q = bdev_get_queue(h->path->dev->bdev);
543
544 if (!q)
545 goto fail_path;
546
547 rq = get_rdac_req(h, &h->inq, len, READ);
548 if (!rq)
549 goto fail_path;
550
551 /* Prepare the command. */
552 rq->cmd[0] = INQUIRY;
553 rq->cmd[1] = 1;
554 rq->cmd[2] = page_code;
555 rq->cmd[4] = len;
556 rq->cmd_len = COMMAND_SIZE(INQUIRY);
557 blk_execute_rq_nowait(q, NULL, rq, 1, endio);
558 return;
559
560fail_path:
561 dm_pg_init_complete(h->path, MP_FAIL_PATH);
562}
563
564static void service_wkq(struct work_struct *work)
565{
566 struct rdac_handler *h = container_of(work, struct rdac_handler, work);
567
568 switch (h->cmd_to_send) {
569 case SEND_C2_INQUIRY:
570 submit_inquiry(h, 0xC2, sizeof(struct c2_inquiry), c2_endio);
571 break;
572 case SEND_C4_INQUIRY:
573 submit_inquiry(h, 0xC4, sizeof(struct c4_inquiry), c4_endio);
574 break;
575 case SEND_C8_INQUIRY:
576 submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio);
577 break;
578 case SEND_C9_INQUIRY:
579 submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio);
580 break;
581 case SEND_MODE_SELECT:
582 submit_mode_select(h);
583 break;
584 default:
585 BUG();
586 }
587}
588/*
589 * only support subpage2c until we confirm that this is just a matter of
590 * of updating firmware or not, and RDAC (basic AVT works already) for now
591 * but we can add these in in when we get time and testers
592 */
593static int rdac_create(struct hw_handler *hwh, unsigned argc, char **argv)
594{
595 struct rdac_handler *h;
596 unsigned timeout;
597
598 if (argc == 0) {
599 /* No arguments: use defaults */
600 timeout = RDAC_FAILOVER_TIMEOUT;
601 } else if (argc != 1) {
602 DMWARN("incorrect number of arguments");
603 return -EINVAL;
604 } else {
605 if (sscanf(argv[1], "%u", &timeout) != 1) {
606 DMWARN("invalid timeout value");
607 return -EINVAL;
608 }
609 }
610
611 h = kzalloc(sizeof(*h), GFP_KERNEL);
612 if (!h)
613 return -ENOMEM;
614
615 hwh->context = h;
616 h->timeout = timeout;
617 h->lun = UNINITIALIZED_LUN;
618 INIT_WORK(&h->work, service_wkq);
619 DMWARN("using RDAC command with timeout %u", h->timeout);
620
621 return 0;
622}
623
624static void rdac_destroy(struct hw_handler *hwh)
625{
626 struct rdac_handler *h = hwh->context;
627
628 if (h->ctlr)
629 kref_put(&h->ctlr->kref, release_ctlr);
630 kfree(h);
631 hwh->context = NULL;
632}
633
634static unsigned rdac_error(struct hw_handler *hwh, struct bio *bio)
635{
636 /* Try default handler */
637 return dm_scsi_err_handler(hwh, bio);
638}
639
640static void rdac_pg_init(struct hw_handler *hwh, unsigned bypassed,
641 struct dm_path *path)
642{
643 struct rdac_handler *h = hwh->context;
644
645 h->path = path;
646 switch (h->lun) {
647 case UNINITIALIZED_LUN:
648 submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio);
649 break;
650 default:
651 submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio);
652 }
653}
654
655static struct hw_handler_type rdac_handler = {
656 .name = RDAC_DM_HWH_NAME,
657 .module = THIS_MODULE,
658 .create = rdac_create,
659 .destroy = rdac_destroy,
660 .pg_init = rdac_pg_init,
661 .error = rdac_error,
662};
663
664static int __init rdac_init(void)
665{
666 int r;
667
668 rdac_wkqd = create_singlethread_workqueue("rdac_wkqd");
669 if (!rdac_wkqd) {
670 DMERR("Failed to create workqueue rdac_wkqd.");
671 return -ENOMEM;
672 }
673
674 r = dm_register_hw_handler(&rdac_handler);
675 if (r < 0) {
676 DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
677 destroy_workqueue(rdac_wkqd);
678 return r;
679 }
680
681 DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER);
682 return 0;
683}
684
685static void __exit rdac_exit(void)
686{
687 int r = dm_unregister_hw_handler(&rdac_handler);
688
689 destroy_workqueue(rdac_wkqd);
690 if (r < 0)
691 DMERR("%s: unregister failed %d", RDAC_DM_HWH_NAME, r);
692}
693
694module_init(rdac_init);
695module_exit(rdac_exit);
696
697MODULE_DESCRIPTION("DM Multipath LSI/Engenio RDAC support");
698MODULE_AUTHOR("Mike Christie, Chandra Seetharaman");
699MODULE_LICENSE("GPL");
700MODULE_VERSION(RDAC_DM_HWH_VER);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index e7ee59e655d5..9f7302d4878d 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -7,7 +7,6 @@
7 7
8#include "dm.h" 8#include "dm.h"
9#include "dm-path-selector.h" 9#include "dm-path-selector.h"
10#include "dm-hw-handler.h"
11#include "dm-bio-list.h" 10#include "dm-bio-list.h"
12#include "dm-bio-record.h" 11#include "dm-bio-record.h"
13#include "dm-uevent.h" 12#include "dm-uevent.h"
@@ -20,6 +19,7 @@
20#include <linux/slab.h> 19#include <linux/slab.h>
21#include <linux/time.h> 20#include <linux/time.h>
22#include <linux/workqueue.h> 21#include <linux/workqueue.h>
22#include <scsi/scsi_dh.h>
23#include <asm/atomic.h> 23#include <asm/atomic.h>
24 24
25#define DM_MSG_PREFIX "multipath" 25#define DM_MSG_PREFIX "multipath"
@@ -61,7 +61,8 @@ struct multipath {
61 61
62 spinlock_t lock; 62 spinlock_t lock;
63 63
64 struct hw_handler hw_handler; 64 const char *hw_handler_name;
65 struct work_struct activate_path;
65 unsigned nr_priority_groups; 66 unsigned nr_priority_groups;
66 struct list_head priority_groups; 67 struct list_head priority_groups;
67 unsigned pg_init_required; /* pg_init needs calling? */ 68 unsigned pg_init_required; /* pg_init needs calling? */
@@ -106,9 +107,10 @@ typedef int (*action_fn) (struct pgpath *pgpath);
106 107
107static struct kmem_cache *_mpio_cache; 108static struct kmem_cache *_mpio_cache;
108 109
109static struct workqueue_struct *kmultipathd; 110static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
110static void process_queued_ios(struct work_struct *work); 111static void process_queued_ios(struct work_struct *work);
111static void trigger_event(struct work_struct *work); 112static void trigger_event(struct work_struct *work);
113static void activate_path(struct work_struct *work);
112 114
113 115
114/*----------------------------------------------- 116/*-----------------------------------------------
@@ -178,6 +180,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
178 m->queue_io = 1; 180 m->queue_io = 1;
179 INIT_WORK(&m->process_queued_ios, process_queued_ios); 181 INIT_WORK(&m->process_queued_ios, process_queued_ios);
180 INIT_WORK(&m->trigger_event, trigger_event); 182 INIT_WORK(&m->trigger_event, trigger_event);
183 INIT_WORK(&m->activate_path, activate_path);
181 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 184 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
182 if (!m->mpio_pool) { 185 if (!m->mpio_pool) {
183 kfree(m); 186 kfree(m);
@@ -193,18 +196,13 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
193static void free_multipath(struct multipath *m) 196static void free_multipath(struct multipath *m)
194{ 197{
195 struct priority_group *pg, *tmp; 198 struct priority_group *pg, *tmp;
196 struct hw_handler *hwh = &m->hw_handler;
197 199
198 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 200 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
199 list_del(&pg->list); 201 list_del(&pg->list);
200 free_priority_group(pg, m->ti); 202 free_priority_group(pg, m->ti);
201 } 203 }
202 204
203 if (hwh->type) { 205 kfree(m->hw_handler_name);
204 hwh->type->destroy(hwh);
205 dm_put_hw_handler(hwh->type);
206 }
207
208 mempool_destroy(m->mpio_pool); 206 mempool_destroy(m->mpio_pool);
209 kfree(m); 207 kfree(m);
210} 208}
@@ -216,12 +214,10 @@ static void free_multipath(struct multipath *m)
216 214
217static void __switch_pg(struct multipath *m, struct pgpath *pgpath) 215static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
218{ 216{
219 struct hw_handler *hwh = &m->hw_handler;
220
221 m->current_pg = pgpath->pg; 217 m->current_pg = pgpath->pg;
222 218
223 /* Must we initialise the PG first, and queue I/O till it's ready? */ 219 /* Must we initialise the PG first, and queue I/O till it's ready? */
224 if (hwh->type && hwh->type->pg_init) { 220 if (m->hw_handler_name) {
225 m->pg_init_required = 1; 221 m->pg_init_required = 1;
226 m->queue_io = 1; 222 m->queue_io = 1;
227 } else { 223 } else {
@@ -409,7 +405,6 @@ static void process_queued_ios(struct work_struct *work)
409{ 405{
410 struct multipath *m = 406 struct multipath *m =
411 container_of(work, struct multipath, process_queued_ios); 407 container_of(work, struct multipath, process_queued_ios);
412 struct hw_handler *hwh = &m->hw_handler;
413 struct pgpath *pgpath = NULL; 408 struct pgpath *pgpath = NULL;
414 unsigned init_required = 0, must_queue = 1; 409 unsigned init_required = 0, must_queue = 1;
415 unsigned long flags; 410 unsigned long flags;
@@ -439,7 +434,7 @@ out:
439 spin_unlock_irqrestore(&m->lock, flags); 434 spin_unlock_irqrestore(&m->lock, flags);
440 435
441 if (init_required) 436 if (init_required)
442 hwh->type->pg_init(hwh, pgpath->pg->bypassed, &pgpath->path); 437 queue_work(kmpath_handlerd, &m->activate_path);
443 438
444 if (!must_queue) 439 if (!must_queue)
445 dispatch_queued_ios(m); 440 dispatch_queued_ios(m);
@@ -652,8 +647,6 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
652 647
653static int parse_hw_handler(struct arg_set *as, struct multipath *m) 648static int parse_hw_handler(struct arg_set *as, struct multipath *m)
654{ 649{
655 int r;
656 struct hw_handler_type *hwht;
657 unsigned hw_argc; 650 unsigned hw_argc;
658 struct dm_target *ti = m->ti; 651 struct dm_target *ti = m->ti;
659 652
@@ -661,30 +654,20 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
661 {0, 1024, "invalid number of hardware handler args"}, 654 {0, 1024, "invalid number of hardware handler args"},
662 }; 655 };
663 656
664 r = read_param(_params, shift(as), &hw_argc, &ti->error); 657 if (read_param(_params, shift(as), &hw_argc, &ti->error))
665 if (r)
666 return -EINVAL; 658 return -EINVAL;
667 659
668 if (!hw_argc) 660 if (!hw_argc)
669 return 0; 661 return 0;
670 662
671 hwht = dm_get_hw_handler(shift(as)); 663 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
672 if (!hwht) { 664 request_module("scsi_dh_%s", m->hw_handler_name);
665 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
673 ti->error = "unknown hardware handler type"; 666 ti->error = "unknown hardware handler type";
667 kfree(m->hw_handler_name);
668 m->hw_handler_name = NULL;
674 return -EINVAL; 669 return -EINVAL;
675 } 670 }
676
677 m->hw_handler.md = dm_table_get_md(ti->table);
678 dm_put(m->hw_handler.md);
679
680 r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv);
681 if (r) {
682 dm_put_hw_handler(hwht);
683 ti->error = "hardware handler constructor failed";
684 return r;
685 }
686
687 m->hw_handler.type = hwht;
688 consume(as, hw_argc - 1); 671 consume(as, hw_argc - 1);
689 672
690 return 0; 673 return 0;
@@ -808,6 +791,7 @@ static void multipath_dtr(struct dm_target *ti)
808{ 791{
809 struct multipath *m = (struct multipath *) ti->private; 792 struct multipath *m = (struct multipath *) ti->private;
810 793
794 flush_workqueue(kmpath_handlerd);
811 flush_workqueue(kmultipathd); 795 flush_workqueue(kmultipathd);
812 free_multipath(m); 796 free_multipath(m);
813} 797}
@@ -1025,52 +1009,85 @@ static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1025 return limit_reached; 1009 return limit_reached;
1026} 1010}
1027 1011
1028/* 1012static void pg_init_done(struct dm_path *path, int errors)
1029 * pg_init must call this when it has completed its initialisation
1030 */
1031void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
1032{ 1013{
1033 struct pgpath *pgpath = path_to_pgpath(path); 1014 struct pgpath *pgpath = path_to_pgpath(path);
1034 struct priority_group *pg = pgpath->pg; 1015 struct priority_group *pg = pgpath->pg;
1035 struct multipath *m = pg->m; 1016 struct multipath *m = pg->m;
1036 unsigned long flags; 1017 unsigned long flags;
1037 1018
1038 /* 1019 /* device or driver problems */
1039 * If requested, retry pg_init until maximum number of retries exceeded. 1020 switch (errors) {
1040 * If retry not requested and PG already bypassed, always fail the path. 1021 case SCSI_DH_OK:
1041 */ 1022 break;
1042 if (err_flags & MP_RETRY) { 1023 case SCSI_DH_NOSYS:
1043 if (pg_init_limit_reached(m, pgpath)) 1024 if (!m->hw_handler_name) {
1044 err_flags |= MP_FAIL_PATH; 1025 errors = 0;
1045 } else if (err_flags && pg->bypassed) 1026 break;
1046 err_flags |= MP_FAIL_PATH; 1027 }
1047 1028 DMERR("Cannot failover device because scsi_dh_%s was not "
1048 if (err_flags & MP_FAIL_PATH) 1029 "loaded.", m->hw_handler_name);
1030 /*
1031 * Fail path for now, so we do not ping pong
1032 */
1049 fail_path(pgpath); 1033 fail_path(pgpath);
1050 1034 break;
1051 if (err_flags & MP_BYPASS_PG) 1035 case SCSI_DH_DEV_TEMP_BUSY:
1036 /*
1037 * Probably doing something like FW upgrade on the
1038 * controller so try the other pg.
1039 */
1052 bypass_pg(m, pg, 1); 1040 bypass_pg(m, pg, 1);
1041 break;
1042 /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
1043 case SCSI_DH_RETRY:
1044 case SCSI_DH_IMM_RETRY:
1045 case SCSI_DH_RES_TEMP_UNAVAIL:
1046 if (pg_init_limit_reached(m, pgpath))
1047 fail_path(pgpath);
1048 errors = 0;
1049 break;
1050 default:
1051 /*
1052 * We probably do not want to fail the path for a device
1053 * error, but this is what the old dm did. In future
1054 * patches we can do more advanced handling.
1055 */
1056 fail_path(pgpath);
1057 }
1053 1058
1054 spin_lock_irqsave(&m->lock, flags); 1059 spin_lock_irqsave(&m->lock, flags);
1055 if (err_flags & ~MP_RETRY) { 1060 if (errors) {
1061 DMERR("Could not failover device. Error %d.", errors);
1056 m->current_pgpath = NULL; 1062 m->current_pgpath = NULL;
1057 m->current_pg = NULL; 1063 m->current_pg = NULL;
1058 } else if (!m->pg_init_required) 1064 } else if (!m->pg_init_required) {
1059 m->queue_io = 0; 1065 m->queue_io = 0;
1066 pg->bypassed = 0;
1067 }
1060 1068
1061 m->pg_init_in_progress = 0; 1069 m->pg_init_in_progress = 0;
1062 queue_work(kmultipathd, &m->process_queued_ios); 1070 queue_work(kmultipathd, &m->process_queued_ios);
1063 spin_unlock_irqrestore(&m->lock, flags); 1071 spin_unlock_irqrestore(&m->lock, flags);
1064} 1072}
1065 1073
1074static void activate_path(struct work_struct *work)
1075{
1076 int ret;
1077 struct multipath *m =
1078 container_of(work, struct multipath, activate_path);
1079 struct dm_path *path = &m->current_pgpath->path;
1080
1081 ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
1082 pg_init_done(path, ret);
1083}
1084
1066/* 1085/*
1067 * end_io handling 1086 * end_io handling
1068 */ 1087 */
1069static int do_end_io(struct multipath *m, struct bio *bio, 1088static int do_end_io(struct multipath *m, struct bio *bio,
1070 int error, struct dm_mpath_io *mpio) 1089 int error, struct dm_mpath_io *mpio)
1071{ 1090{
1072 struct hw_handler *hwh = &m->hw_handler;
1073 unsigned err_flags = MP_FAIL_PATH; /* Default behavior */
1074 unsigned long flags; 1091 unsigned long flags;
1075 1092
1076 if (!error) 1093 if (!error)
@@ -1097,19 +1114,8 @@ static int do_end_io(struct multipath *m, struct bio *bio,
1097 } 1114 }
1098 spin_unlock_irqrestore(&m->lock, flags); 1115 spin_unlock_irqrestore(&m->lock, flags);
1099 1116
1100 if (hwh->type && hwh->type->error) 1117 if (mpio->pgpath)
1101 err_flags = hwh->type->error(hwh, bio); 1118 fail_path(mpio->pgpath);
1102
1103 if (mpio->pgpath) {
1104 if (err_flags & MP_FAIL_PATH)
1105 fail_path(mpio->pgpath);
1106
1107 if (err_flags & MP_BYPASS_PG)
1108 bypass_pg(m, mpio->pgpath->pg, 1);
1109 }
1110
1111 if (err_flags & MP_ERROR_IO)
1112 return -EIO;
1113 1119
1114 requeue: 1120 requeue:
1115 dm_bio_restore(&mpio->details, bio); 1121 dm_bio_restore(&mpio->details, bio);
@@ -1194,7 +1200,6 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
1194 int sz = 0; 1200 int sz = 0;
1195 unsigned long flags; 1201 unsigned long flags;
1196 struct multipath *m = (struct multipath *) ti->private; 1202 struct multipath *m = (struct multipath *) ti->private;
1197 struct hw_handler *hwh = &m->hw_handler;
1198 struct priority_group *pg; 1203 struct priority_group *pg;
1199 struct pgpath *p; 1204 struct pgpath *p;
1200 unsigned pg_num; 1205 unsigned pg_num;
@@ -1214,12 +1219,10 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
1214 DMEMIT("pg_init_retries %u ", m->pg_init_retries); 1219 DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1215 } 1220 }
1216 1221
1217 if (hwh->type && hwh->type->status) 1222 if (!m->hw_handler_name || type == STATUSTYPE_INFO)
1218 sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);
1219 else if (!hwh->type || type == STATUSTYPE_INFO)
1220 DMEMIT("0 "); 1223 DMEMIT("0 ");
1221 else 1224 else
1222 DMEMIT("1 %s ", hwh->type->name); 1225 DMEMIT("1 %s ", m->hw_handler_name);
1223 1226
1224 DMEMIT("%u ", m->nr_priority_groups); 1227 DMEMIT("%u ", m->nr_priority_groups);
1225 1228
@@ -1422,6 +1425,21 @@ static int __init dm_multipath_init(void)
1422 return -ENOMEM; 1425 return -ENOMEM;
1423 } 1426 }
1424 1427
1428 /*
1429 * A separate workqueue is used to handle the device handlers
1430 * to avoid overloading existing workqueue. Overloading the
1431 * old workqueue would also create a bottleneck in the
1432 * path of the storage hardware device activation.
1433 */
1434 kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
1435 if (!kmpath_handlerd) {
1436 DMERR("failed to create workqueue kmpath_handlerd");
1437 destroy_workqueue(kmultipathd);
1438 dm_unregister_target(&multipath_target);
1439 kmem_cache_destroy(_mpio_cache);
1440 return -ENOMEM;
1441 }
1442
1425 DMINFO("version %u.%u.%u loaded", 1443 DMINFO("version %u.%u.%u loaded",
1426 multipath_target.version[0], multipath_target.version[1], 1444 multipath_target.version[0], multipath_target.version[1],
1427 multipath_target.version[2]); 1445 multipath_target.version[2]);
@@ -1433,6 +1451,7 @@ static void __exit dm_multipath_exit(void)
1433{ 1451{
1434 int r; 1452 int r;
1435 1453
1454 destroy_workqueue(kmpath_handlerd);
1436 destroy_workqueue(kmultipathd); 1455 destroy_workqueue(kmultipathd);
1437 1456
1438 r = dm_unregister_target(&multipath_target); 1457 r = dm_unregister_target(&multipath_target);
@@ -1441,8 +1460,6 @@ static void __exit dm_multipath_exit(void)
1441 kmem_cache_destroy(_mpio_cache); 1460 kmem_cache_destroy(_mpio_cache);
1442} 1461}
1443 1462
1444EXPORT_SYMBOL_GPL(dm_pg_init_complete);
1445
1446module_init(dm_multipath_init); 1463module_init(dm_multipath_init);
1447module_exit(dm_multipath_exit); 1464module_exit(dm_multipath_exit);
1448 1465
diff --git a/drivers/md/dm-mpath.h b/drivers/md/dm-mpath.h
index b9cdcbb3ed59..c198b856a452 100644
--- a/drivers/md/dm-mpath.h
+++ b/drivers/md/dm-mpath.h
@@ -16,7 +16,6 @@ struct dm_path {
16 unsigned is_active; /* Read-only */ 16 unsigned is_active; /* Read-only */
17 17
18 void *pscontext; /* For path-selector use */ 18 void *pscontext; /* For path-selector use */
19 void *hwhcontext; /* For hw-handler use */
20}; 19};
21 20
22/* Callback for hwh_pg_init_fn to use when complete */ 21/* Callback for hwh_pg_init_fn to use when complete */
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 1cafaa959443..b1eebf88c209 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -50,17 +50,19 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
50/** 50/**
51 * linear_mergeable_bvec -- tell bio layer if two requests can be merged 51 * linear_mergeable_bvec -- tell bio layer if two requests can be merged
52 * @q: request queue 52 * @q: request queue
53 * @bio: the buffer head that's been built up so far 53 * @bvm: properties of new bio
54 * @biovec: the request that could be merged to it. 54 * @biovec: the request that could be merged to it.
55 * 55 *
56 * Return amount of bytes we can take at this offset 56 * Return amount of bytes we can take at this offset
57 */ 57 */
58static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) 58static int linear_mergeable_bvec(struct request_queue *q,
59 struct bvec_merge_data *bvm,
60 struct bio_vec *biovec)
59{ 61{
60 mddev_t *mddev = q->queuedata; 62 mddev_t *mddev = q->queuedata;
61 dev_info_t *dev0; 63 dev_info_t *dev0;
62 unsigned long maxsectors, bio_sectors = bio->bi_size >> 9; 64 unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
63 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 65 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
64 66
65 dev0 = which_dev(mddev, sector); 67 dev0 = which_dev(mddev, sector);
66 maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1)); 68 maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1));
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 2f30ebd8b7ab..183610635661 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -241,18 +241,20 @@ static int create_strip_zones (mddev_t *mddev)
241/** 241/**
242 * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged 242 * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
243 * @q: request queue 243 * @q: request queue
244 * @bio: the buffer head that's been built up so far 244 * @bvm: properties of new bio
245 * @biovec: the request that could be merged to it. 245 * @biovec: the request that could be merged to it.
246 * 246 *
247 * Return amount of bytes we can accept at this offset 247 * Return amount of bytes we can accept at this offset
248 */ 248 */
249static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) 249static int raid0_mergeable_bvec(struct request_queue *q,
250 struct bvec_merge_data *bvm,
251 struct bio_vec *biovec)
250{ 252{
251 mddev_t *mddev = q->queuedata; 253 mddev_t *mddev = q->queuedata;
252 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 254 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
253 int max; 255 int max;
254 unsigned int chunk_sectors = mddev->chunk_size >> 9; 256 unsigned int chunk_sectors = mddev->chunk_size >> 9;
255 unsigned int bio_sectors = bio->bi_size >> 9; 257 unsigned int bio_sectors = bvm->bi_size >> 9;
256 258
257 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; 259 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
258 if (max < 0) max = 0; /* bio_add cannot handle a negative return */ 260 if (max < 0) max = 0; /* bio_add cannot handle a negative return */
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 2acea4025243..159535d73567 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -439,26 +439,27 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev)
439/** 439/**
440 * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged 440 * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
441 * @q: request queue 441 * @q: request queue
442 * @bio: the buffer head that's been built up so far 442 * @bvm: properties of new bio
443 * @biovec: the request that could be merged to it. 443 * @biovec: the request that could be merged to it.
444 * 444 *
445 * Return amount of bytes we can accept at this offset 445 * Return amount of bytes we can accept at this offset
446 * If near_copies == raid_disk, there are no striping issues, 446 * If near_copies == raid_disk, there are no striping issues,
447 * but in that case, the function isn't called at all. 447 * but in that case, the function isn't called at all.
448 */ 448 */
449static int raid10_mergeable_bvec(struct request_queue *q, struct bio *bio, 449static int raid10_mergeable_bvec(struct request_queue *q,
450 struct bio_vec *bio_vec) 450 struct bvec_merge_data *bvm,
451 struct bio_vec *biovec)
451{ 452{
452 mddev_t *mddev = q->queuedata; 453 mddev_t *mddev = q->queuedata;
453 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 454 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
454 int max; 455 int max;
455 unsigned int chunk_sectors = mddev->chunk_size >> 9; 456 unsigned int chunk_sectors = mddev->chunk_size >> 9;
456 unsigned int bio_sectors = bio->bi_size >> 9; 457 unsigned int bio_sectors = bvm->bi_size >> 9;
457 458
458 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; 459 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
459 if (max < 0) max = 0; /* bio_add cannot handle a negative return */ 460 if (max < 0) max = 0; /* bio_add cannot handle a negative return */
460 if (max <= bio_vec->bv_len && bio_sectors == 0) 461 if (max <= biovec->bv_len && bio_sectors == 0)
461 return bio_vec->bv_len; 462 return biovec->bv_len;
462 else 463 else
463 return max; 464 return max;
464} 465}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 42a480ba767b..55e7c56045a0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3073,15 +3073,17 @@ static int raid5_congested(void *data, int bits)
3073/* We want read requests to align with chunks where possible, 3073/* We want read requests to align with chunks where possible,
3074 * but write requests don't need to. 3074 * but write requests don't need to.
3075 */ 3075 */
3076static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) 3076static int raid5_mergeable_bvec(struct request_queue *q,
3077 struct bvec_merge_data *bvm,
3078 struct bio_vec *biovec)
3077{ 3079{
3078 mddev_t *mddev = q->queuedata; 3080 mddev_t *mddev = q->queuedata;
3079 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 3081 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
3080 int max; 3082 int max;
3081 unsigned int chunk_sectors = mddev->chunk_size >> 9; 3083 unsigned int chunk_sectors = mddev->chunk_size >> 9;
3082 unsigned int bio_sectors = bio->bi_size >> 9; 3084 unsigned int bio_sectors = bvm->bi_size >> 9;
3083 3085
3084 if (bio_data_dir(bio) == WRITE) 3086 if ((bvm->bi_rw & 1) == WRITE)
3085 return biovec->bv_len; /* always allow writes to be mergeable */ 3087 return biovec->bv_len; /* always allow writes to be mergeable */
3086 3088
3087 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; 3089 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;