aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-03-04 14:46:15 -0500
committerJ. Bruce Fields <bfields@redhat.com>2016-03-18 11:38:17 -0400
commitd9186c03976506cde2c2b1219028bed449c948ed (patch)
tree2d4bb391919038cc05f167f0f82e679fc8fe655f
parent40cf446b9482bd2c681b60062b34cc47c78342f8 (diff)
nfs/blocklayout: add SCSI layout support
This is a trivial extension to the block layout driver to support the new SCSI layouts draft. There are three changes: - device identifcation through the SCSI VPD page. This allows us to directly use the udev generated persistent device names instead of requiring an expensive lookup by crawling every block device node in /dev and reading a signature for it. - use of SCSI persistent reservations to protect device access and allow for robust fencing. On the client sides this just means registering and unregistering a server supplied key. - an optimized LAYOUTCOMMIT payload that doesn't send unessecary fields to the server. Signed-off-by: Christoph Hellwig <hch@lst.de> Acked-by: Trond Myklebust <trond.myklebust@primarydata.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
-rw-r--r--fs/nfs/blocklayout/blocklayout.c59
-rw-r--r--fs/nfs/blocklayout/blocklayout.h14
-rw-r--r--fs/nfs/blocklayout/dev.c144
-rw-r--r--fs/nfs/blocklayout/extent_tree.c44
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c2
5 files changed, 238 insertions, 25 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index ddd0138f410c..b27c409b2f8e 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -446,8 +446,8 @@ static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
446 kfree(bl); 446 kfree(bl);
447} 447}
448 448
449static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, 449static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
450 gfp_t gfp_flags) 450 gfp_t gfp_flags, bool is_scsi_layout)
451{ 451{
452 struct pnfs_block_layout *bl; 452 struct pnfs_block_layout *bl;
453 453
@@ -460,9 +460,22 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
460 bl->bl_ext_ro = RB_ROOT; 460 bl->bl_ext_ro = RB_ROOT;
461 spin_lock_init(&bl->bl_ext_lock); 461 spin_lock_init(&bl->bl_ext_lock);
462 462
463 bl->bl_scsi_layout = is_scsi_layout;
463 return &bl->bl_layout; 464 return &bl->bl_layout;
464} 465}
465 466
467static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
468 gfp_t gfp_flags)
469{
470 return __bl_alloc_layout_hdr(inode, gfp_flags, false);
471}
472
473static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
474 gfp_t gfp_flags)
475{
476 return __bl_alloc_layout_hdr(inode, gfp_flags, true);
477}
478
466static void bl_free_lseg(struct pnfs_layout_segment *lseg) 479static void bl_free_lseg(struct pnfs_layout_segment *lseg)
467{ 480{
468 dprintk("%s enter\n", __func__); 481 dprintk("%s enter\n", __func__);
@@ -888,22 +901,53 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
888 .sync = pnfs_generic_sync, 901 .sync = pnfs_generic_sync,
889}; 902};
890 903
904static struct pnfs_layoutdriver_type scsilayout_type = {
905 .id = LAYOUT_SCSI,
906 .name = "LAYOUT_SCSI",
907 .owner = THIS_MODULE,
908 .flags = PNFS_LAYOUTRET_ON_SETATTR |
909 PNFS_READ_WHOLE_PAGE,
910 .read_pagelist = bl_read_pagelist,
911 .write_pagelist = bl_write_pagelist,
912 .alloc_layout_hdr = sl_alloc_layout_hdr,
913 .free_layout_hdr = bl_free_layout_hdr,
914 .alloc_lseg = bl_alloc_lseg,
915 .free_lseg = bl_free_lseg,
916 .return_range = bl_return_range,
917 .prepare_layoutcommit = bl_prepare_layoutcommit,
918 .cleanup_layoutcommit = bl_cleanup_layoutcommit,
919 .set_layoutdriver = bl_set_layoutdriver,
920 .alloc_deviceid_node = bl_alloc_deviceid_node,
921 .free_deviceid_node = bl_free_deviceid_node,
922 .pg_read_ops = &bl_pg_read_ops,
923 .pg_write_ops = &bl_pg_write_ops,
924 .sync = pnfs_generic_sync,
925};
926
927
891static int __init nfs4blocklayout_init(void) 928static int __init nfs4blocklayout_init(void)
892{ 929{
893 int ret; 930 int ret;
894 931
895 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); 932 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
896 933
897 ret = pnfs_register_layoutdriver(&blocklayout_type); 934 ret = bl_init_pipefs();
898 if (ret) 935 if (ret)
899 goto out; 936 goto out;
900 ret = bl_init_pipefs(); 937
938 ret = pnfs_register_layoutdriver(&blocklayout_type);
901 if (ret) 939 if (ret)
902 goto out_unregister; 940 goto out_cleanup_pipe;
941
942 ret = pnfs_register_layoutdriver(&scsilayout_type);
943 if (ret)
944 goto out_unregister_block;
903 return 0; 945 return 0;
904 946
905out_unregister: 947out_unregister_block:
906 pnfs_unregister_layoutdriver(&blocklayout_type); 948 pnfs_unregister_layoutdriver(&blocklayout_type);
949out_cleanup_pipe:
950 bl_cleanup_pipefs();
907out: 951out:
908 return ret; 952 return ret;
909} 953}
@@ -913,8 +957,9 @@ static void __exit nfs4blocklayout_exit(void)
913 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", 957 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
914 __func__); 958 __func__);
915 959
916 bl_cleanup_pipefs(); 960 pnfs_unregister_layoutdriver(&scsilayout_type);
917 pnfs_unregister_layoutdriver(&blocklayout_type); 961 pnfs_unregister_layoutdriver(&blocklayout_type);
962 bl_cleanup_pipefs();
918} 963}
919 964
920MODULE_ALIAS("nfs-layouttype4-3"); 965MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index c556640dcf3b..bc21205309e0 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -55,7 +55,6 @@ struct pnfs_block_dev;
55 */ 55 */
56#define PNFS_BLOCK_UUID_LEN 128 56#define PNFS_BLOCK_UUID_LEN 128
57 57
58
59struct pnfs_block_volume { 58struct pnfs_block_volume {
60 enum pnfs_block_volume_type type; 59 enum pnfs_block_volume_type type;
61 union { 60 union {
@@ -82,6 +81,13 @@ struct pnfs_block_volume {
82 u32 volumes_count; 81 u32 volumes_count;
83 u32 volumes[PNFS_BLOCK_MAX_DEVICES]; 82 u32 volumes[PNFS_BLOCK_MAX_DEVICES];
84 } stripe; 83 } stripe;
84 struct {
85 enum scsi_code_set code_set;
86 enum scsi_designator_type designator_type;
87 int designator_len;
88 u8 designator[256];
89 u64 pr_key;
90 } scsi;
85 }; 91 };
86}; 92};
87 93
@@ -106,6 +112,9 @@ struct pnfs_block_dev {
106 struct block_device *bdev; 112 struct block_device *bdev;
107 u64 disk_offset; 113 u64 disk_offset;
108 114
115 u64 pr_key;
116 bool pr_registered;
117
109 bool (*map)(struct pnfs_block_dev *dev, u64 offset, 118 bool (*map)(struct pnfs_block_dev *dev, u64 offset,
110 struct pnfs_block_dev_map *map); 119 struct pnfs_block_dev_map *map);
111}; 120};
@@ -131,6 +140,7 @@ struct pnfs_block_layout {
131 struct rb_root bl_ext_rw; 140 struct rb_root bl_ext_rw;
132 struct rb_root bl_ext_ro; 141 struct rb_root bl_ext_ro;
133 spinlock_t bl_ext_lock; /* Protects list manipulation */ 142 spinlock_t bl_ext_lock; /* Protects list manipulation */
143 bool bl_scsi_layout;
134}; 144};
135 145
136static inline struct pnfs_block_layout * 146static inline struct pnfs_block_layout *
@@ -182,6 +192,6 @@ void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
182dev_t bl_resolve_deviceid(struct nfs_server *server, 192dev_t bl_resolve_deviceid(struct nfs_server *server,
183 struct pnfs_block_volume *b, gfp_t gfp_mask); 193 struct pnfs_block_volume *b, gfp_t gfp_mask);
184int __init bl_init_pipefs(void); 194int __init bl_init_pipefs(void);
185void __exit bl_cleanup_pipefs(void); 195void bl_cleanup_pipefs(void);
186 196
187#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ 197#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index a861bbdfe577..e5b89675263e 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -1,11 +1,12 @@
1/* 1/*
2 * Copyright (c) 2014 Christoph Hellwig. 2 * Copyright (c) 2014-2016 Christoph Hellwig.
3 */ 3 */
4#include <linux/sunrpc/svc.h> 4#include <linux/sunrpc/svc.h>
5#include <linux/blkdev.h> 5#include <linux/blkdev.h>
6#include <linux/nfs4.h> 6#include <linux/nfs4.h>
7#include <linux/nfs_fs.h> 7#include <linux/nfs_fs.h>
8#include <linux/nfs_xdr.h> 8#include <linux/nfs_xdr.h>
9#include <linux/pr.h>
9 10
10#include "blocklayout.h" 11#include "blocklayout.h"
11 12
@@ -21,6 +22,17 @@ bl_free_device(struct pnfs_block_dev *dev)
21 bl_free_device(&dev->children[i]); 22 bl_free_device(&dev->children[i]);
22 kfree(dev->children); 23 kfree(dev->children);
23 } else { 24 } else {
25 if (dev->pr_registered) {
26 const struct pr_ops *ops =
27 dev->bdev->bd_disk->fops->pr_ops;
28 int error;
29
30 error = ops->pr_register(dev->bdev, dev->pr_key, 0,
31 false);
32 if (error)
33 pr_err("failed to unregister PR key.\n");
34 }
35
24 if (dev->bdev) 36 if (dev->bdev)
25 blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE); 37 blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
26 } 38 }
@@ -113,6 +125,24 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
113 for (i = 0; i < b->stripe.volumes_count; i++) 125 for (i = 0; i < b->stripe.volumes_count; i++)
114 b->stripe.volumes[i] = be32_to_cpup(p++); 126 b->stripe.volumes[i] = be32_to_cpup(p++);
115 break; 127 break;
128 case PNFS_BLOCK_VOLUME_SCSI:
129 p = xdr_inline_decode(xdr, 4 + 4 + 4);
130 if (!p)
131 return -EIO;
132 b->scsi.code_set = be32_to_cpup(p++);
133 b->scsi.designator_type = be32_to_cpup(p++);
134 b->scsi.designator_len = be32_to_cpup(p++);
135 p = xdr_inline_decode(xdr, b->scsi.designator_len);
136 if (!p)
137 return -EIO;
138 if (b->scsi.designator_len > 256)
139 return -EIO;
140 memcpy(&b->scsi.designator, p, b->scsi.designator_len);
141 p = xdr_inline_decode(xdr, 8);
142 if (!p)
143 return -EIO;
144 p = xdr_decode_hyper(p, &b->scsi.pr_key);
145 break;
116 default: 146 default:
117 dprintk("unknown volume type!\n"); 147 dprintk("unknown volume type!\n");
118 return -EIO; 148 return -EIO;
@@ -216,6 +246,116 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
216 return 0; 246 return 0;
217} 247}
218 248
249static bool
250bl_validate_designator(struct pnfs_block_volume *v)
251{
252 switch (v->scsi.designator_type) {
253 case PS_DESIGNATOR_EUI64:
254 if (v->scsi.code_set != PS_CODE_SET_BINARY)
255 return false;
256
257 if (v->scsi.designator_len != 8 &&
258 v->scsi.designator_len != 10 &&
259 v->scsi.designator_len != 16)
260 return false;
261
262 return true;
263 case PS_DESIGNATOR_NAA:
264 if (v->scsi.code_set != PS_CODE_SET_BINARY)
265 return false;
266
267 if (v->scsi.designator_len != 8 &&
268 v->scsi.designator_len != 16)
269 return false;
270
271 return true;
272 case PS_DESIGNATOR_T10:
273 case PS_DESIGNATOR_NAME:
274 pr_err("pNFS: unsupported designator "
275 "(code set %d, type %d, len %d.\n",
276 v->scsi.code_set,
277 v->scsi.designator_type,
278 v->scsi.designator_len);
279 return false;
280 default:
281 pr_err("pNFS: invalid designator "
282 "(code set %d, type %d, len %d.\n",
283 v->scsi.code_set,
284 v->scsi.designator_type,
285 v->scsi.designator_len);
286 return false;
287 }
288}
289
290static int
291bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
292 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
293{
294 struct pnfs_block_volume *v = &volumes[idx];
295 const struct pr_ops *ops;
296 const char *devname;
297 int error;
298
299 if (!bl_validate_designator(v))
300 return -EINVAL;
301
302 switch (v->scsi.designator_len) {
303 case 8:
304 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN",
305 v->scsi.designator);
306 break;
307 case 12:
308 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
309 v->scsi.designator);
310 break;
311 case 16:
312 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
313 v->scsi.designator);
314 break;
315 default:
316 return -EINVAL;
317 }
318
319 d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
320 if (IS_ERR(d->bdev)) {
321 pr_warn("pNFS: failed to open device %s (%ld)\n",
322 devname, PTR_ERR(d->bdev));
323 kfree(devname);
324 return PTR_ERR(d->bdev);
325 }
326
327 kfree(devname);
328
329 d->len = i_size_read(d->bdev->bd_inode);
330 d->map = bl_map_simple;
331 d->pr_key = v->scsi.pr_key;
332
333 pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
334 d->bdev->bd_disk->disk_name, d->pr_key);
335
336 ops = d->bdev->bd_disk->fops->pr_ops;
337 if (!ops) {
338 pr_err("pNFS: block device %s does not support reservations.",
339 d->bdev->bd_disk->disk_name);
340 error = -EINVAL;
341 goto out_blkdev_put;
342 }
343
344 error = ops->pr_register(d->bdev, 0, d->pr_key, true);
345 if (error) {
346 pr_err("pNFS: failed to register key for block device %s.",
347 d->bdev->bd_disk->disk_name);
348 goto out_blkdev_put;
349 }
350
351 d->pr_registered = true;
352 return 0;
353
354out_blkdev_put:
355 blkdev_put(d->bdev, FMODE_READ);
356 return error;
357}
358
219static int 359static int
220bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, 360bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
221 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 361 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
@@ -303,6 +443,8 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
303 return bl_parse_concat(server, d, volumes, idx, gfp_mask); 443 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
304 case PNFS_BLOCK_VOLUME_STRIPE: 444 case PNFS_BLOCK_VOLUME_STRIPE:
305 return bl_parse_stripe(server, d, volumes, idx, gfp_mask); 445 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
446 case PNFS_BLOCK_VOLUME_SCSI:
447 return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
306 default: 448 default:
307 dprintk("unsupported volume type: %d\n", volumes[idx].type); 449 dprintk("unsupported volume type: %d\n", volumes[idx].type);
308 return -EIO; 450 return -EIO;
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index c59a59c37f3d..df366fc68095 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014 Christoph Hellwig. 2 * Copyright (c) 2014-2016 Christoph Hellwig.
3 */ 3 */
4 4
5#include <linux/vmalloc.h> 5#include <linux/vmalloc.h>
@@ -462,10 +462,12 @@ out:
462 return err; 462 return err;
463} 463}
464 464
465static size_t ext_tree_layoutupdate_size(size_t count) 465static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
466{ 466{
467 return sizeof(__be32) /* number of entries */ + 467 if (bl->bl_scsi_layout)
468 PNFS_BLOCK_EXTENT_SIZE * count; 468 return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
469 else
470 return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
469} 471}
470 472
471static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg, 473static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
@@ -482,6 +484,23 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
482 } 484 }
483} 485}
484 486
487static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
488{
489 p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
490 NFS4_DEVICEID4_SIZE);
491 p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
492 p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
493 p = xdr_encode_hyper(p, 0LL);
494 *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
495 return p;
496}
497
498static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
499{
500 p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
501 return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
502}
503
485static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, 504static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
486 size_t buffer_size, size_t *count) 505 size_t buffer_size, size_t *count)
487{ 506{
@@ -495,19 +514,16 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
495 continue; 514 continue;
496 515
497 (*count)++; 516 (*count)++;
498 if (ext_tree_layoutupdate_size(*count) > buffer_size) { 517 if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
499 /* keep counting.. */ 518 /* keep counting.. */
500 ret = -ENOSPC; 519 ret = -ENOSPC;
501 continue; 520 continue;
502 } 521 }
503 522
504 p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, 523 if (bl->bl_scsi_layout)
505 NFS4_DEVICEID4_SIZE); 524 p = encode_scsi_range(be, p);
506 p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT); 525 else
507 p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); 526 p = encode_block_extent(be, p);
508 p = xdr_encode_hyper(p, 0LL);
509 *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
510
511 be->be_tag = EXTENT_COMMITTING; 527 be->be_tag = EXTENT_COMMITTING;
512 } 528 }
513 spin_unlock(&bl->bl_ext_lock); 529 spin_unlock(&bl->bl_ext_lock);
@@ -536,7 +552,7 @@ retry:
536 if (unlikely(ret)) { 552 if (unlikely(ret)) {
537 ext_tree_free_commitdata(arg, buffer_size); 553 ext_tree_free_commitdata(arg, buffer_size);
538 554
539 buffer_size = ext_tree_layoutupdate_size(count); 555 buffer_size = ext_tree_layoutupdate_size(bl, count);
540 count = 0; 556 count = 0;
541 557
542 arg->layoutupdate_pages = 558 arg->layoutupdate_pages =
@@ -555,7 +571,7 @@ retry:
555 } 571 }
556 572
557 *start_p = cpu_to_be32(count); 573 *start_p = cpu_to_be32(count);
558 arg->layoutupdate_len = ext_tree_layoutupdate_size(count); 574 arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
559 575
560 if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { 576 if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
561 void *p = start_p, *end = p + arg->layoutupdate_len; 577 void *p = start_p, *end = p + arg->layoutupdate_len;
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index dbe5839cdeba..9fb067a6f7e0 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -281,7 +281,7 @@ out:
281 return ret; 281 return ret;
282} 282}
283 283
284void __exit bl_cleanup_pipefs(void) 284void bl_cleanup_pipefs(void)
285{ 285{
286 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); 286 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
287 unregister_pernet_subsys(&nfs4blocklayout_net_ops); 287 unregister_pernet_subsys(&nfs4blocklayout_net_ops);