diff options
-rw-r--r-- | Documentation/filesystems/nfs/pnfs-scsi-server.txt | 23 | ||||
-rw-r--r-- | fs/nfsd/Kconfig | 13 | ||||
-rw-r--r-- | fs/nfsd/Makefile | 1 | ||||
-rw-r--r-- | fs/nfsd/blocklayout.c | 208 | ||||
-rw-r--r-- | fs/nfsd/blocklayoutxdr.c | 65 | ||||
-rw-r--r-- | fs/nfsd/blocklayoutxdr.h | 14 | ||||
-rw-r--r-- | fs/nfsd/nfs4layouts.c | 27 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 6 | ||||
-rw-r--r-- | fs/nfsd/pnfs.h | 6 | ||||
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_pnfs.h | 2 |
11 files changed, 358 insertions, 8 deletions
diff --git a/Documentation/filesystems/nfs/pnfs-scsi-server.txt b/Documentation/filesystems/nfs/pnfs-scsi-server.txt new file mode 100644 index 000000000000..5bef7268bd9f --- /dev/null +++ b/Documentation/filesystems/nfs/pnfs-scsi-server.txt | |||
@@ -0,0 +1,23 @@ | |||
1 | |||
2 | pNFS SCSI layout server user guide | ||
3 | ================================== | ||
4 | |||
5 | This document describes support for pNFS SCSI layouts in the Linux NFS server. | ||
6 | With pNFS SCSI layouts, the NFS server acts as Metadata Server (MDS) for pNFS, | ||
7 | which in addition to handling all the metadata access to the NFS export, | ||
8 | also hands out layouts to the clients so that they can directly access the | ||
9 | underlying SCSI LUNs that are shared with the client. | ||
10 | |||
11 | To use pNFS SCSI layouts with with the Linux NFS server, the exported file | ||
12 | system needs to support the pNFS SCSI layouts (currently just XFS), and the | ||
13 | file system must sit on a SCSI LUN that is accessible to the clients in | ||
14 | addition to the MDS. As of now the file system needs to sit directly on the | ||
15 | exported LUN, striping or concatenation of LUNs on the MDS and clients | ||
16 | is not supported yet. | ||
17 | |||
18 | On a server built with CONFIG_NFSD_SCSI, the pNFS SCSI volume support is | ||
19 | automatically enabled if the file system is exported using the "pnfs" | ||
20 | option and the underlying SCSI device support persistent reservations. | ||
21 | On the client make sure the kernel has the CONFIG_PNFS_BLOCK option | ||
22 | enabled, and the file system is mounted using the NFSv4.1 protocol | ||
23 | version (mount -o vers=4.1). | ||
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index eb70d91b255b..a30a31316e68 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -98,6 +98,19 @@ config NFSD_BLOCKLAYOUT | |||
98 | 98 | ||
99 | If unsure, say N. | 99 | If unsure, say N. |
100 | 100 | ||
101 | config NFSD_SCSILAYOUT | ||
102 | bool "NFSv4.1 server support for pNFS SCSI layouts" | ||
103 | depends on NFSD_V4 | ||
104 | select NFSD_PNFS | ||
105 | help | ||
106 | This option enables support for the exporting pNFS SCSI layouts | ||
107 | in the kernel's NFS server. The pNFS SCSI layout enables NFS | ||
108 | clients to directly perform I/O to SCSI devices accesible to both | ||
109 | the server and the clients. See draft-ietf-nfsv4-scsi-layout for | ||
110 | more details. | ||
111 | |||
112 | If unsure, say N. | ||
113 | |||
101 | config NFSD_V4_SECURITY_LABEL | 114 | config NFSD_V4_SECURITY_LABEL |
102 | bool "Provide Security Label support for NFSv4 server" | 115 | bool "Provide Security Label support for NFSv4 server" |
103 | depends on NFSD_V4 && SECURITY | 116 | depends on NFSD_V4 && SECURITY |
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 679cdc6efee8..3ae5f3c77e28 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile | |||
@@ -19,3 +19,4 @@ nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ | |||
19 | nfs4acl.o nfs4callback.o nfs4recover.o | 19 | nfs4acl.o nfs4callback.o nfs4recover.o |
20 | nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o | 20 | nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o |
21 | nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o | 21 | nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o |
22 | nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o | ||
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index f00fa918e0f6..e55b5242614d 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c | |||
@@ -1,11 +1,14 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2014 Christoph Hellwig. | 2 | * Copyright (c) 2014-2016 Christoph Hellwig. |
3 | */ | 3 | */ |
4 | #include <linux/exportfs.h> | 4 | #include <linux/exportfs.h> |
5 | #include <linux/genhd.h> | 5 | #include <linux/genhd.h> |
6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
7 | #include <linux/pr.h> | ||
7 | 8 | ||
8 | #include <linux/nfsd/debug.h> | 9 | #include <linux/nfsd/debug.h> |
10 | #include <scsi/scsi_proto.h> | ||
11 | #include <scsi/scsi_common.h> | ||
9 | 12 | ||
10 | #include "blocklayoutxdr.h" | 13 | #include "blocklayoutxdr.h" |
11 | #include "pnfs.h" | 14 | #include "pnfs.h" |
@@ -159,6 +162,7 @@ nfsd4_block_get_device_info_simple(struct super_block *sb, | |||
159 | 162 | ||
160 | static __be32 | 163 | static __be32 |
161 | nfsd4_block_proc_getdeviceinfo(struct super_block *sb, | 164 | nfsd4_block_proc_getdeviceinfo(struct super_block *sb, |
165 | struct nfs4_client *clp, | ||
162 | struct nfsd4_getdeviceinfo *gdp) | 166 | struct nfsd4_getdeviceinfo *gdp) |
163 | { | 167 | { |
164 | if (sb->s_bdev != sb->s_bdev->bd_contains) | 168 | if (sb->s_bdev != sb->s_bdev->bd_contains) |
@@ -200,3 +204,205 @@ const struct nfsd4_layout_ops bl_layout_ops = { | |||
200 | .proc_layoutcommit = nfsd4_block_proc_layoutcommit, | 204 | .proc_layoutcommit = nfsd4_block_proc_layoutcommit, |
201 | }; | 205 | }; |
202 | #endif /* CONFIG_NFSD_BLOCKLAYOUT */ | 206 | #endif /* CONFIG_NFSD_BLOCKLAYOUT */ |
207 | |||
208 | #ifdef CONFIG_NFSD_SCSILAYOUT | ||
209 | static int nfsd4_scsi_identify_device(struct block_device *bdev, | ||
210 | struct pnfs_block_volume *b) | ||
211 | { | ||
212 | struct request_queue *q = bdev->bd_disk->queue; | ||
213 | struct request *rq; | ||
214 | size_t bufflen = 252, len, id_len; | ||
215 | u8 *buf, *d, type, assoc; | ||
216 | int error; | ||
217 | |||
218 | buf = kzalloc(bufflen, GFP_KERNEL); | ||
219 | if (!buf) | ||
220 | return -ENOMEM; | ||
221 | |||
222 | rq = blk_get_request(q, READ, GFP_KERNEL); | ||
223 | if (IS_ERR(rq)) { | ||
224 | error = -ENOMEM; | ||
225 | goto out_free_buf; | ||
226 | } | ||
227 | blk_rq_set_block_pc(rq); | ||
228 | |||
229 | error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); | ||
230 | if (error) | ||
231 | goto out_put_request; | ||
232 | |||
233 | rq->cmd[0] = INQUIRY; | ||
234 | rq->cmd[1] = 1; | ||
235 | rq->cmd[2] = 0x83; | ||
236 | rq->cmd[3] = bufflen >> 8; | ||
237 | rq->cmd[4] = bufflen & 0xff; | ||
238 | rq->cmd_len = COMMAND_SIZE(INQUIRY); | ||
239 | |||
240 | error = blk_execute_rq(rq->q, NULL, rq, 1); | ||
241 | if (error) { | ||
242 | pr_err("pNFS: INQUIRY 0x83 failed with: %x\n", | ||
243 | rq->errors); | ||
244 | goto out_put_request; | ||
245 | } | ||
246 | |||
247 | len = (buf[2] << 8) + buf[3] + 4; | ||
248 | if (len > bufflen) { | ||
249 | pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n", | ||
250 | len); | ||
251 | goto out_put_request; | ||
252 | } | ||
253 | |||
254 | d = buf + 4; | ||
255 | for (d = buf + 4; d < buf + len; d += id_len + 4) { | ||
256 | id_len = d[3]; | ||
257 | type = d[1] & 0xf; | ||
258 | assoc = (d[1] >> 4) & 0x3; | ||
259 | |||
260 | /* | ||
261 | * We only care about a EUI-64 and NAA designator types | ||
262 | * with LU association. | ||
263 | */ | ||
264 | if (assoc != 0x00) | ||
265 | continue; | ||
266 | if (type != 0x02 && type != 0x03) | ||
267 | continue; | ||
268 | if (id_len != 8 && id_len != 12 && id_len != 16) | ||
269 | continue; | ||
270 | |||
271 | b->scsi.code_set = PS_CODE_SET_BINARY; | ||
272 | b->scsi.designator_type = type == 0x02 ? | ||
273 | PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA; | ||
274 | b->scsi.designator_len = id_len; | ||
275 | memcpy(b->scsi.designator, d + 4, id_len); | ||
276 | |||
277 | /* | ||
278 | * If we found a 8 or 12 byte descriptor continue on to | ||
279 | * see if a 16 byte one is available. If we find a | ||
280 | * 16 byte descriptor we're done. | ||
281 | */ | ||
282 | if (id_len == 16) | ||
283 | break; | ||
284 | } | ||
285 | |||
286 | out_put_request: | ||
287 | blk_put_request(rq); | ||
288 | out_free_buf: | ||
289 | kfree(buf); | ||
290 | return error; | ||
291 | } | ||
292 | |||
293 | #define NFSD_MDS_PR_KEY 0x0100000000000000 | ||
294 | |||
295 | /* | ||
296 | * We use the client ID as a unique key for the reservations. | ||
297 | * This allows us to easily fence a client when recalls fail. | ||
298 | */ | ||
299 | static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp) | ||
300 | { | ||
301 | return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id; | ||
302 | } | ||
303 | |||
304 | static int | ||
305 | nfsd4_block_get_device_info_scsi(struct super_block *sb, | ||
306 | struct nfs4_client *clp, | ||
307 | struct nfsd4_getdeviceinfo *gdp) | ||
308 | { | ||
309 | struct pnfs_block_deviceaddr *dev; | ||
310 | struct pnfs_block_volume *b; | ||
311 | const struct pr_ops *ops; | ||
312 | int error; | ||
313 | |||
314 | dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + | ||
315 | sizeof(struct pnfs_block_volume), GFP_KERNEL); | ||
316 | if (!dev) | ||
317 | return -ENOMEM; | ||
318 | gdp->gd_device = dev; | ||
319 | |||
320 | dev->nr_volumes = 1; | ||
321 | b = &dev->volumes[0]; | ||
322 | |||
323 | b->type = PNFS_BLOCK_VOLUME_SCSI; | ||
324 | b->scsi.pr_key = nfsd4_scsi_pr_key(clp); | ||
325 | |||
326 | error = nfsd4_scsi_identify_device(sb->s_bdev, b); | ||
327 | if (error) | ||
328 | return error; | ||
329 | |||
330 | ops = sb->s_bdev->bd_disk->fops->pr_ops; | ||
331 | if (!ops) { | ||
332 | pr_err("pNFS: device %s does not support PRs.\n", | ||
333 | sb->s_id); | ||
334 | return -EINVAL; | ||
335 | } | ||
336 | |||
337 | error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true); | ||
338 | if (error) { | ||
339 | pr_err("pNFS: failed to register key for device %s.\n", | ||
340 | sb->s_id); | ||
341 | return -EINVAL; | ||
342 | } | ||
343 | |||
344 | error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY, | ||
345 | PR_EXCLUSIVE_ACCESS_REG_ONLY, 0); | ||
346 | if (error) { | ||
347 | pr_err("pNFS: failed to reserve device %s.\n", | ||
348 | sb->s_id); | ||
349 | return -EINVAL; | ||
350 | } | ||
351 | |||
352 | return 0; | ||
353 | } | ||
354 | |||
355 | static __be32 | ||
356 | nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb, | ||
357 | struct nfs4_client *clp, | ||
358 | struct nfsd4_getdeviceinfo *gdp) | ||
359 | { | ||
360 | if (sb->s_bdev != sb->s_bdev->bd_contains) | ||
361 | return nfserr_inval; | ||
362 | return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp)); | ||
363 | } | ||
364 | static __be32 | ||
365 | nfsd4_scsi_proc_layoutcommit(struct inode *inode, | ||
366 | struct nfsd4_layoutcommit *lcp) | ||
367 | { | ||
368 | struct iomap *iomaps; | ||
369 | int nr_iomaps; | ||
370 | |||
371 | nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, | ||
372 | lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); | ||
373 | if (nr_iomaps < 0) | ||
374 | return nfserrno(nr_iomaps); | ||
375 | |||
376 | return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); | ||
377 | } | ||
378 | |||
379 | static void | ||
380 | nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls) | ||
381 | { | ||
382 | struct nfs4_client *clp = ls->ls_stid.sc_client; | ||
383 | struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev; | ||
384 | |||
385 | bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, | ||
386 | nfsd4_scsi_pr_key(clp), 0, true); | ||
387 | } | ||
388 | |||
389 | const struct nfsd4_layout_ops scsi_layout_ops = { | ||
390 | /* | ||
391 | * Pretend that we send notification to the client. This is a blatant | ||
392 | * lie to force recent Linux clients to cache our device IDs. | ||
393 | * We rarely ever change the device ID, so the harm of leaking deviceids | ||
394 | * for a while isn't too bad. Unfortunately RFC5661 is a complete mess | ||
395 | * in this regard, but I filed errata 4119 for this a while ago, and | ||
396 | * hopefully the Linux client will eventually start caching deviceids | ||
397 | * without this again. | ||
398 | */ | ||
399 | .notify_types = | ||
400 | NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, | ||
401 | .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo, | ||
402 | .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, | ||
403 | .proc_layoutget = nfsd4_block_proc_layoutget, | ||
404 | .encode_layoutget = nfsd4_block_encode_layoutget, | ||
405 | .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit, | ||
406 | .fence_client = nfsd4_scsi_fence_client, | ||
407 | }; | ||
408 | #endif /* CONFIG_NFSD_SCSILAYOUT */ | ||
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 6d834dc9bbc8..ca1883668810 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2014 Christoph Hellwig. | 2 | * Copyright (c) 2014-2016 Christoph Hellwig. |
3 | */ | 3 | */ |
4 | #include <linux/sunrpc/svc.h> | 4 | #include <linux/sunrpc/svc.h> |
5 | #include <linux/exportfs.h> | 5 | #include <linux/exportfs.h> |
@@ -53,6 +53,18 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) | |||
53 | p = xdr_encode_hyper(p, b->simple.offset); | 53 | p = xdr_encode_hyper(p, b->simple.offset); |
54 | p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len); | 54 | p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len); |
55 | break; | 55 | break; |
56 | case PNFS_BLOCK_VOLUME_SCSI: | ||
57 | len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8; | ||
58 | p = xdr_reserve_space(xdr, len); | ||
59 | if (!p) | ||
60 | return -ETOOSMALL; | ||
61 | |||
62 | *p++ = cpu_to_be32(b->type); | ||
63 | *p++ = cpu_to_be32(b->scsi.code_set); | ||
64 | *p++ = cpu_to_be32(b->scsi.designator_type); | ||
65 | p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len); | ||
66 | p = xdr_encode_hyper(p, b->scsi.pr_key); | ||
67 | break; | ||
56 | default: | 68 | default: |
57 | return -ENOTSUPP; | 69 | return -ENOTSUPP; |
58 | } | 70 | } |
@@ -155,3 +167,54 @@ fail: | |||
155 | kfree(iomaps); | 167 | kfree(iomaps); |
156 | return -EINVAL; | 168 | return -EINVAL; |
157 | } | 169 | } |
170 | |||
171 | int | ||
172 | nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, | ||
173 | u32 block_size) | ||
174 | { | ||
175 | struct iomap *iomaps; | ||
176 | u32 nr_iomaps, expected, i; | ||
177 | |||
178 | if (len < sizeof(u32)) { | ||
179 | dprintk("%s: extent array too small: %u\n", __func__, len); | ||
180 | return -EINVAL; | ||
181 | } | ||
182 | |||
183 | nr_iomaps = be32_to_cpup(p++); | ||
184 | expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE; | ||
185 | if (len != expected) { | ||
186 | dprintk("%s: extent array size mismatch: %u/%u\n", | ||
187 | __func__, len, expected); | ||
188 | return -EINVAL; | ||
189 | } | ||
190 | |||
191 | iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); | ||
192 | if (!iomaps) { | ||
193 | dprintk("%s: failed to allocate extent array\n", __func__); | ||
194 | return -ENOMEM; | ||
195 | } | ||
196 | |||
197 | for (i = 0; i < nr_iomaps; i++) { | ||
198 | u64 val; | ||
199 | |||
200 | p = xdr_decode_hyper(p, &val); | ||
201 | if (val & (block_size - 1)) { | ||
202 | dprintk("%s: unaligned offset 0x%llx\n", __func__, val); | ||
203 | goto fail; | ||
204 | } | ||
205 | iomaps[i].offset = val; | ||
206 | |||
207 | p = xdr_decode_hyper(p, &val); | ||
208 | if (val & (block_size - 1)) { | ||
209 | dprintk("%s: unaligned length 0x%llx\n", __func__, val); | ||
210 | goto fail; | ||
211 | } | ||
212 | iomaps[i].length = val; | ||
213 | } | ||
214 | |||
215 | *iomapp = iomaps; | ||
216 | return nr_iomaps; | ||
217 | fail: | ||
218 | kfree(iomaps); | ||
219 | return -EINVAL; | ||
220 | } | ||
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h index 6de925fe8499..397bc7563a49 100644 --- a/fs/nfsd/blocklayoutxdr.h +++ b/fs/nfsd/blocklayoutxdr.h | |||
@@ -15,6 +15,11 @@ struct pnfs_block_extent { | |||
15 | enum pnfs_block_extent_state es; | 15 | enum pnfs_block_extent_state es; |
16 | }; | 16 | }; |
17 | 17 | ||
18 | struct pnfs_block_range { | ||
19 | u64 foff; | ||
20 | u64 len; | ||
21 | }; | ||
22 | |||
18 | /* | 23 | /* |
19 | * Random upper cap for the uuid length to avoid unbounded allocation. | 24 | * Random upper cap for the uuid length to avoid unbounded allocation. |
20 | * Not actually limited by the protocol. | 25 | * Not actually limited by the protocol. |
@@ -29,6 +34,13 @@ struct pnfs_block_volume { | |||
29 | u32 sig_len; | 34 | u32 sig_len; |
30 | u8 sig[PNFS_BLOCK_UUID_LEN]; | 35 | u8 sig[PNFS_BLOCK_UUID_LEN]; |
31 | } simple; | 36 | } simple; |
37 | struct { | ||
38 | enum scsi_code_set code_set; | ||
39 | enum scsi_designator_type designator_type; | ||
40 | int designator_len; | ||
41 | u8 designator[256]; | ||
42 | u64 pr_key; | ||
43 | } scsi; | ||
32 | }; | 44 | }; |
33 | }; | 45 | }; |
34 | 46 | ||
@@ -43,5 +55,7 @@ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, | |||
43 | struct nfsd4_layoutget *lgp); | 55 | struct nfsd4_layoutget *lgp); |
44 | int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, | 56 | int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, |
45 | u32 block_size); | 57 | u32 block_size); |
58 | int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, | ||
59 | u32 block_size); | ||
46 | 60 | ||
47 | #endif /* _NFSD_BLOCKLAYOUTXDR_H */ | 61 | #endif /* _NFSD_BLOCKLAYOUTXDR_H */ |
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 4e4def77ebc7..825c7bc8d789 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2014 Christoph Hellwig. | 2 | * Copyright (c) 2014 Christoph Hellwig. |
3 | */ | 3 | */ |
4 | #include <linux/blkdev.h> | ||
4 | #include <linux/kmod.h> | 5 | #include <linux/kmod.h> |
5 | #include <linux/file.h> | 6 | #include <linux/file.h> |
6 | #include <linux/jhash.h> | 7 | #include <linux/jhash.h> |
@@ -29,6 +30,9 @@ const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { | |||
29 | #ifdef CONFIG_NFSD_BLOCKLAYOUT | 30 | #ifdef CONFIG_NFSD_BLOCKLAYOUT |
30 | [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops, | 31 | [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops, |
31 | #endif | 32 | #endif |
33 | #ifdef CONFIG_NFSD_SCSILAYOUT | ||
34 | [LAYOUT_SCSI] = &scsi_layout_ops, | ||
35 | #endif | ||
32 | }; | 36 | }; |
33 | 37 | ||
34 | /* pNFS device ID to export fsid mapping */ | 38 | /* pNFS device ID to export fsid mapping */ |
@@ -123,12 +127,24 @@ void nfsd4_setup_layout_type(struct svc_export *exp) | |||
123 | if (!(exp->ex_flags & NFSEXP_PNFS)) | 127 | if (!(exp->ex_flags & NFSEXP_PNFS)) |
124 | return; | 128 | return; |
125 | 129 | ||
130 | /* | ||
131 | * Check if the file system supports exporting a block-like layout. | ||
132 | * If the block device supports reservations prefer the SCSI layout, | ||
133 | * otherwise advertise the block layout. | ||
134 | */ | ||
126 | #ifdef CONFIG_NFSD_BLOCKLAYOUT | 135 | #ifdef CONFIG_NFSD_BLOCKLAYOUT |
127 | if (sb->s_export_op->get_uuid && | 136 | if (sb->s_export_op->get_uuid && |
128 | sb->s_export_op->map_blocks && | 137 | sb->s_export_op->map_blocks && |
129 | sb->s_export_op->commit_blocks) | 138 | sb->s_export_op->commit_blocks) |
130 | exp->ex_layout_type = LAYOUT_BLOCK_VOLUME; | 139 | exp->ex_layout_type = LAYOUT_BLOCK_VOLUME; |
131 | #endif | 140 | #endif |
141 | #ifdef CONFIG_NFSD_SCSILAYOUT | ||
142 | /* overwrite block layout selection if needed */ | ||
143 | if (sb->s_export_op->map_blocks && | ||
144 | sb->s_export_op->commit_blocks && | ||
145 | sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops) | ||
146 | exp->ex_layout_type = LAYOUT_SCSI; | ||
147 | #endif | ||
132 | } | 148 | } |
133 | 149 | ||
134 | static void | 150 | static void |
@@ -594,8 +610,6 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) | |||
594 | 610 | ||
595 | rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); | 611 | rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); |
596 | 612 | ||
597 | trace_layout_recall_fail(&ls->ls_stid.sc_stateid); | ||
598 | |||
599 | printk(KERN_WARNING | 613 | printk(KERN_WARNING |
600 | "nfsd: client %s failed to respond to layout recall. " | 614 | "nfsd: client %s failed to respond to layout recall. " |
601 | " Fencing..\n", addr_str); | 615 | " Fencing..\n", addr_str); |
@@ -630,6 +644,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) | |||
630 | container_of(cb, struct nfs4_layout_stateid, ls_recall); | 644 | container_of(cb, struct nfs4_layout_stateid, ls_recall); |
631 | struct nfsd_net *nn; | 645 | struct nfsd_net *nn; |
632 | ktime_t now, cutoff; | 646 | ktime_t now, cutoff; |
647 | const struct nfsd4_layout_ops *ops; | ||
633 | LIST_HEAD(reaplist); | 648 | LIST_HEAD(reaplist); |
634 | 649 | ||
635 | 650 | ||
@@ -665,7 +680,13 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) | |||
665 | /* | 680 | /* |
666 | * Unknown error or non-responding client, we'll need to fence. | 681 | * Unknown error or non-responding client, we'll need to fence. |
667 | */ | 682 | */ |
668 | nfsd4_cb_layout_fail(ls); | 683 | trace_layout_recall_fail(&ls->ls_stid.sc_stateid); |
684 | |||
685 | ops = nfsd4_layout_ops[ls->ls_layout_type]; | ||
686 | if (ops->fence_client) | ||
687 | ops->fence_client(ls); | ||
688 | else | ||
689 | nfsd4_cb_layout_fail(ls); | ||
669 | return -1; | 690 | return -1; |
670 | } | 691 | } |
671 | } | 692 | } |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 40b912407d51..de1ff1d98bb1 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -1268,8 +1268,10 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, | |||
1268 | goto out; | 1268 | goto out; |
1269 | 1269 | ||
1270 | nfserr = nfs_ok; | 1270 | nfserr = nfs_ok; |
1271 | if (gdp->gd_maxcount != 0) | 1271 | if (gdp->gd_maxcount != 0) { |
1272 | nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); | 1272 | nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, |
1273 | cstate->session->se_client, gdp); | ||
1274 | } | ||
1273 | 1275 | ||
1274 | gdp->gd_notify_types &= ops->notify_types; | 1276 | gdp->gd_notify_types &= ops->notify_types; |
1275 | out: | 1277 | out: |
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h index ff50bfa1f76f..7d073b9b1553 100644 --- a/fs/nfsd/pnfs.h +++ b/fs/nfsd/pnfs.h | |||
@@ -21,6 +21,7 @@ struct nfsd4_layout_ops { | |||
21 | u32 notify_types; | 21 | u32 notify_types; |
22 | 22 | ||
23 | __be32 (*proc_getdeviceinfo)(struct super_block *sb, | 23 | __be32 (*proc_getdeviceinfo)(struct super_block *sb, |
24 | struct nfs4_client *clp, | ||
24 | struct nfsd4_getdeviceinfo *gdevp); | 25 | struct nfsd4_getdeviceinfo *gdevp); |
25 | __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr, | 26 | __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr, |
26 | struct nfsd4_getdeviceinfo *gdevp); | 27 | struct nfsd4_getdeviceinfo *gdevp); |
@@ -32,12 +33,17 @@ struct nfsd4_layout_ops { | |||
32 | 33 | ||
33 | __be32 (*proc_layoutcommit)(struct inode *inode, | 34 | __be32 (*proc_layoutcommit)(struct inode *inode, |
34 | struct nfsd4_layoutcommit *lcp); | 35 | struct nfsd4_layoutcommit *lcp); |
36 | |||
37 | void (*fence_client)(struct nfs4_layout_stateid *ls); | ||
35 | }; | 38 | }; |
36 | 39 | ||
37 | extern const struct nfsd4_layout_ops *nfsd4_layout_ops[]; | 40 | extern const struct nfsd4_layout_ops *nfsd4_layout_ops[]; |
38 | #ifdef CONFIG_NFSD_BLOCKLAYOUT | 41 | #ifdef CONFIG_NFSD_BLOCKLAYOUT |
39 | extern const struct nfsd4_layout_ops bl_layout_ops; | 42 | extern const struct nfsd4_layout_ops bl_layout_ops; |
40 | #endif | 43 | #endif |
44 | #ifdef CONFIG_NFSD_SCSILAYOUT | ||
45 | extern const struct nfsd4_layout_ops scsi_layout_ops; | ||
46 | #endif | ||
41 | 47 | ||
42 | __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, | 48 | __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, |
43 | struct nfsd4_compound_state *cstate, stateid_t *stateid, | 49 | struct nfsd4_compound_state *cstate, stateid_t *stateid, |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index d68b62a76592..3542d94fddce 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -122,3 +122,4 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o | |||
122 | xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o | 122 | xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o |
123 | xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o | 123 | xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o |
124 | xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o | 124 | xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o |
125 | xfs-$(CONFIG_NFSD_SCSILAYOUT) += xfs_pnfs.o | ||
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h index d85529ca299e..93f74853961b 100644 --- a/fs/xfs/xfs_pnfs.h +++ b/fs/xfs/xfs_pnfs.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _XFS_PNFS_H | 1 | #ifndef _XFS_PNFS_H |
2 | #define _XFS_PNFS_H 1 | 2 | #define _XFS_PNFS_H 1 |
3 | 3 | ||
4 | #ifdef CONFIG_NFSD_BLOCKLAYOUT | 4 | #if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT) |
5 | int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); | 5 | int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); |
6 | int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, | 6 | int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, |
7 | struct iomap *iomap, bool write, u32 *device_generation); | 7 | struct iomap *iomap, bool write, u32 *device_generation); |