aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c72
-rw-r--r--fs/nfs/blocklayout/blocklayout.h14
-rw-r--r--fs/nfs/blocklayout/dev.c144
-rw-r--r--fs/nfs/blocklayout/extent_tree.c44
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c2
-rw-r--r--fs/nfs/callback.h3
-rw-r--r--fs/nfs/callback_proc.c69
-rw-r--r--fs/nfs/callback_xdr.c12
-rw-r--r--fs/nfs/dir.c12
-rw-r--r--fs/nfs/file.c12
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h6
-rw-r--r--fs/nfs/nfs4file.c33
-rw-r--r--fs/nfs/nfs4proc.c76
-rw-r--r--fs/nfs/nfs4session.c54
-rw-r--r--fs/nfs/nfs4session.h8
-rw-r--r--fs/nfs/pnfs_nfs.c16
18 files changed, 436 insertions, 145 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index ddd0138f410c..02e4d87d2ed3 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -446,8 +446,8 @@ static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
446 kfree(bl); 446 kfree(bl);
447} 447}
448 448
449static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, 449static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
450 gfp_t gfp_flags) 450 gfp_t gfp_flags, bool is_scsi_layout)
451{ 451{
452 struct pnfs_block_layout *bl; 452 struct pnfs_block_layout *bl;
453 453
@@ -460,9 +460,22 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
460 bl->bl_ext_ro = RB_ROOT; 460 bl->bl_ext_ro = RB_ROOT;
461 spin_lock_init(&bl->bl_ext_lock); 461 spin_lock_init(&bl->bl_ext_lock);
462 462
463 bl->bl_scsi_layout = is_scsi_layout;
463 return &bl->bl_layout; 464 return &bl->bl_layout;
464} 465}
465 466
467static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
468 gfp_t gfp_flags)
469{
470 return __bl_alloc_layout_hdr(inode, gfp_flags, false);
471}
472
473static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
474 gfp_t gfp_flags)
475{
476 return __bl_alloc_layout_hdr(inode, gfp_flags, true);
477}
478
466static void bl_free_lseg(struct pnfs_layout_segment *lseg) 479static void bl_free_lseg(struct pnfs_layout_segment *lseg)
467{ 480{
468 dprintk("%s enter\n", __func__); 481 dprintk("%s enter\n", __func__);
@@ -743,7 +756,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
743 756
744static bool 757static bool
745is_aligned_req(struct nfs_pageio_descriptor *pgio, 758is_aligned_req(struct nfs_pageio_descriptor *pgio,
746 struct nfs_page *req, unsigned int alignment) 759 struct nfs_page *req, unsigned int alignment, bool is_write)
747{ 760{
748 /* 761 /*
749 * Always accept buffered writes, higher layers take care of the 762 * Always accept buffered writes, higher layers take care of the
@@ -758,7 +771,8 @@ is_aligned_req(struct nfs_pageio_descriptor *pgio,
758 if (IS_ALIGNED(req->wb_bytes, alignment)) 771 if (IS_ALIGNED(req->wb_bytes, alignment))
759 return true; 772 return true;
760 773
761 if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) { 774 if (is_write &&
775 (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) {
762 /* 776 /*
763 * If the write goes up to the inode size, just write 777 * If the write goes up to the inode size, just write
764 * the full page. Data past the inode size is 778 * the full page. Data past the inode size is
@@ -775,7 +789,7 @@ is_aligned_req(struct nfs_pageio_descriptor *pgio,
775static void 789static void
776bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 790bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
777{ 791{
778 if (!is_aligned_req(pgio, req, SECTOR_SIZE)) { 792 if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) {
779 nfs_pageio_reset_read_mds(pgio); 793 nfs_pageio_reset_read_mds(pgio);
780 return; 794 return;
781 } 795 }
@@ -791,7 +805,7 @@ static size_t
791bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 805bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
792 struct nfs_page *req) 806 struct nfs_page *req)
793{ 807{
794 if (!is_aligned_req(pgio, req, SECTOR_SIZE)) 808 if (!is_aligned_req(pgio, req, SECTOR_SIZE, false))
795 return 0; 809 return 0;
796 return pnfs_generic_pg_test(pgio, prev, req); 810 return pnfs_generic_pg_test(pgio, prev, req);
797} 811}
@@ -824,7 +838,7 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
824{ 838{
825 u64 wb_size; 839 u64 wb_size;
826 840
827 if (!is_aligned_req(pgio, req, PAGE_SIZE)) { 841 if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) {
828 nfs_pageio_reset_write_mds(pgio); 842 nfs_pageio_reset_write_mds(pgio);
829 return; 843 return;
830 } 844 }
@@ -846,7 +860,7 @@ static size_t
846bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 860bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
847 struct nfs_page *req) 861 struct nfs_page *req)
848{ 862{
849 if (!is_aligned_req(pgio, req, PAGE_SIZE)) 863 if (!is_aligned_req(pgio, req, PAGE_SIZE, true))
850 return 0; 864 return 0;
851 return pnfs_generic_pg_test(pgio, prev, req); 865 return pnfs_generic_pg_test(pgio, prev, req);
852} 866}
@@ -888,22 +902,53 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
888 .sync = pnfs_generic_sync, 902 .sync = pnfs_generic_sync,
889}; 903};
890 904
905static struct pnfs_layoutdriver_type scsilayout_type = {
906 .id = LAYOUT_SCSI,
907 .name = "LAYOUT_SCSI",
908 .owner = THIS_MODULE,
909 .flags = PNFS_LAYOUTRET_ON_SETATTR |
910 PNFS_READ_WHOLE_PAGE,
911 .read_pagelist = bl_read_pagelist,
912 .write_pagelist = bl_write_pagelist,
913 .alloc_layout_hdr = sl_alloc_layout_hdr,
914 .free_layout_hdr = bl_free_layout_hdr,
915 .alloc_lseg = bl_alloc_lseg,
916 .free_lseg = bl_free_lseg,
917 .return_range = bl_return_range,
918 .prepare_layoutcommit = bl_prepare_layoutcommit,
919 .cleanup_layoutcommit = bl_cleanup_layoutcommit,
920 .set_layoutdriver = bl_set_layoutdriver,
921 .alloc_deviceid_node = bl_alloc_deviceid_node,
922 .free_deviceid_node = bl_free_deviceid_node,
923 .pg_read_ops = &bl_pg_read_ops,
924 .pg_write_ops = &bl_pg_write_ops,
925 .sync = pnfs_generic_sync,
926};
927
928
891static int __init nfs4blocklayout_init(void) 929static int __init nfs4blocklayout_init(void)
892{ 930{
893 int ret; 931 int ret;
894 932
895 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); 933 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
896 934
897 ret = pnfs_register_layoutdriver(&blocklayout_type); 935 ret = bl_init_pipefs();
898 if (ret) 936 if (ret)
899 goto out; 937 goto out;
900 ret = bl_init_pipefs(); 938
939 ret = pnfs_register_layoutdriver(&blocklayout_type);
901 if (ret) 940 if (ret)
902 goto out_unregister; 941 goto out_cleanup_pipe;
942
943 ret = pnfs_register_layoutdriver(&scsilayout_type);
944 if (ret)
945 goto out_unregister_block;
903 return 0; 946 return 0;
904 947
905out_unregister: 948out_unregister_block:
906 pnfs_unregister_layoutdriver(&blocklayout_type); 949 pnfs_unregister_layoutdriver(&blocklayout_type);
950out_cleanup_pipe:
951 bl_cleanup_pipefs();
907out: 952out:
908 return ret; 953 return ret;
909} 954}
@@ -913,8 +958,9 @@ static void __exit nfs4blocklayout_exit(void)
913 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", 958 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
914 __func__); 959 __func__);
915 960
916 bl_cleanup_pipefs(); 961 pnfs_unregister_layoutdriver(&scsilayout_type);
917 pnfs_unregister_layoutdriver(&blocklayout_type); 962 pnfs_unregister_layoutdriver(&blocklayout_type);
963 bl_cleanup_pipefs();
918} 964}
919 965
920MODULE_ALIAS("nfs-layouttype4-3"); 966MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index c556640dcf3b..bc21205309e0 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -55,7 +55,6 @@ struct pnfs_block_dev;
55 */ 55 */
56#define PNFS_BLOCK_UUID_LEN 128 56#define PNFS_BLOCK_UUID_LEN 128
57 57
58
59struct pnfs_block_volume { 58struct pnfs_block_volume {
60 enum pnfs_block_volume_type type; 59 enum pnfs_block_volume_type type;
61 union { 60 union {
@@ -82,6 +81,13 @@ struct pnfs_block_volume {
82 u32 volumes_count; 81 u32 volumes_count;
83 u32 volumes[PNFS_BLOCK_MAX_DEVICES]; 82 u32 volumes[PNFS_BLOCK_MAX_DEVICES];
84 } stripe; 83 } stripe;
84 struct {
85 enum scsi_code_set code_set;
86 enum scsi_designator_type designator_type;
87 int designator_len;
88 u8 designator[256];
89 u64 pr_key;
90 } scsi;
85 }; 91 };
86}; 92};
87 93
@@ -106,6 +112,9 @@ struct pnfs_block_dev {
106 struct block_device *bdev; 112 struct block_device *bdev;
107 u64 disk_offset; 113 u64 disk_offset;
108 114
115 u64 pr_key;
116 bool pr_registered;
117
109 bool (*map)(struct pnfs_block_dev *dev, u64 offset, 118 bool (*map)(struct pnfs_block_dev *dev, u64 offset,
110 struct pnfs_block_dev_map *map); 119 struct pnfs_block_dev_map *map);
111}; 120};
@@ -131,6 +140,7 @@ struct pnfs_block_layout {
131 struct rb_root bl_ext_rw; 140 struct rb_root bl_ext_rw;
132 struct rb_root bl_ext_ro; 141 struct rb_root bl_ext_ro;
133 spinlock_t bl_ext_lock; /* Protects list manipulation */ 142 spinlock_t bl_ext_lock; /* Protects list manipulation */
143 bool bl_scsi_layout;
134}; 144};
135 145
136static inline struct pnfs_block_layout * 146static inline struct pnfs_block_layout *
@@ -182,6 +192,6 @@ void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
182dev_t bl_resolve_deviceid(struct nfs_server *server, 192dev_t bl_resolve_deviceid(struct nfs_server *server,
183 struct pnfs_block_volume *b, gfp_t gfp_mask); 193 struct pnfs_block_volume *b, gfp_t gfp_mask);
184int __init bl_init_pipefs(void); 194int __init bl_init_pipefs(void);
185void __exit bl_cleanup_pipefs(void); 195void bl_cleanup_pipefs(void);
186 196
187#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ 197#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index a861bbdfe577..e5b89675263e 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -1,11 +1,12 @@
1/* 1/*
2 * Copyright (c) 2014 Christoph Hellwig. 2 * Copyright (c) 2014-2016 Christoph Hellwig.
3 */ 3 */
4#include <linux/sunrpc/svc.h> 4#include <linux/sunrpc/svc.h>
5#include <linux/blkdev.h> 5#include <linux/blkdev.h>
6#include <linux/nfs4.h> 6#include <linux/nfs4.h>
7#include <linux/nfs_fs.h> 7#include <linux/nfs_fs.h>
8#include <linux/nfs_xdr.h> 8#include <linux/nfs_xdr.h>
9#include <linux/pr.h>
9 10
10#include "blocklayout.h" 11#include "blocklayout.h"
11 12
@@ -21,6 +22,17 @@ bl_free_device(struct pnfs_block_dev *dev)
21 bl_free_device(&dev->children[i]); 22 bl_free_device(&dev->children[i]);
22 kfree(dev->children); 23 kfree(dev->children);
23 } else { 24 } else {
25 if (dev->pr_registered) {
26 const struct pr_ops *ops =
27 dev->bdev->bd_disk->fops->pr_ops;
28 int error;
29
30 error = ops->pr_register(dev->bdev, dev->pr_key, 0,
31 false);
32 if (error)
33 pr_err("failed to unregister PR key.\n");
34 }
35
24 if (dev->bdev) 36 if (dev->bdev)
25 blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE); 37 blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
26 } 38 }
@@ -113,6 +125,24 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
113 for (i = 0; i < b->stripe.volumes_count; i++) 125 for (i = 0; i < b->stripe.volumes_count; i++)
114 b->stripe.volumes[i] = be32_to_cpup(p++); 126 b->stripe.volumes[i] = be32_to_cpup(p++);
115 break; 127 break;
128 case PNFS_BLOCK_VOLUME_SCSI:
129 p = xdr_inline_decode(xdr, 4 + 4 + 4);
130 if (!p)
131 return -EIO;
132 b->scsi.code_set = be32_to_cpup(p++);
133 b->scsi.designator_type = be32_to_cpup(p++);
134 b->scsi.designator_len = be32_to_cpup(p++);
135 p = xdr_inline_decode(xdr, b->scsi.designator_len);
136 if (!p)
137 return -EIO;
138 if (b->scsi.designator_len > 256)
139 return -EIO;
140 memcpy(&b->scsi.designator, p, b->scsi.designator_len);
141 p = xdr_inline_decode(xdr, 8);
142 if (!p)
143 return -EIO;
144 p = xdr_decode_hyper(p, &b->scsi.pr_key);
145 break;
116 default: 146 default:
117 dprintk("unknown volume type!\n"); 147 dprintk("unknown volume type!\n");
118 return -EIO; 148 return -EIO;
@@ -216,6 +246,116 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
216 return 0; 246 return 0;
217} 247}
218 248
249static bool
250bl_validate_designator(struct pnfs_block_volume *v)
251{
252 switch (v->scsi.designator_type) {
253 case PS_DESIGNATOR_EUI64:
254 if (v->scsi.code_set != PS_CODE_SET_BINARY)
255 return false;
256
257 if (v->scsi.designator_len != 8 &&
258 v->scsi.designator_len != 10 &&
259 v->scsi.designator_len != 16)
260 return false;
261
262 return true;
263 case PS_DESIGNATOR_NAA:
264 if (v->scsi.code_set != PS_CODE_SET_BINARY)
265 return false;
266
267 if (v->scsi.designator_len != 8 &&
268 v->scsi.designator_len != 16)
269 return false;
270
271 return true;
272 case PS_DESIGNATOR_T10:
273 case PS_DESIGNATOR_NAME:
274 pr_err("pNFS: unsupported designator "
275 "(code set %d, type %d, len %d.\n",
276 v->scsi.code_set,
277 v->scsi.designator_type,
278 v->scsi.designator_len);
279 return false;
280 default:
281 pr_err("pNFS: invalid designator "
282 "(code set %d, type %d, len %d.\n",
283 v->scsi.code_set,
284 v->scsi.designator_type,
285 v->scsi.designator_len);
286 return false;
287 }
288}
289
290static int
291bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
292 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
293{
294 struct pnfs_block_volume *v = &volumes[idx];
295 const struct pr_ops *ops;
296 const char *devname;
297 int error;
298
299 if (!bl_validate_designator(v))
300 return -EINVAL;
301
302 switch (v->scsi.designator_len) {
303 case 8:
304 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN",
305 v->scsi.designator);
306 break;
307 case 12:
308 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
309 v->scsi.designator);
310 break;
311 case 16:
312 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
313 v->scsi.designator);
314 break;
315 default:
316 return -EINVAL;
317 }
318
319 d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
320 if (IS_ERR(d->bdev)) {
321 pr_warn("pNFS: failed to open device %s (%ld)\n",
322 devname, PTR_ERR(d->bdev));
323 kfree(devname);
324 return PTR_ERR(d->bdev);
325 }
326
327 kfree(devname);
328
329 d->len = i_size_read(d->bdev->bd_inode);
330 d->map = bl_map_simple;
331 d->pr_key = v->scsi.pr_key;
332
333 pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
334 d->bdev->bd_disk->disk_name, d->pr_key);
335
336 ops = d->bdev->bd_disk->fops->pr_ops;
337 if (!ops) {
338 pr_err("pNFS: block device %s does not support reservations.",
339 d->bdev->bd_disk->disk_name);
340 error = -EINVAL;
341 goto out_blkdev_put;
342 }
343
344 error = ops->pr_register(d->bdev, 0, d->pr_key, true);
345 if (error) {
346 pr_err("pNFS: failed to register key for block device %s.",
347 d->bdev->bd_disk->disk_name);
348 goto out_blkdev_put;
349 }
350
351 d->pr_registered = true;
352 return 0;
353
354out_blkdev_put:
355 blkdev_put(d->bdev, FMODE_READ);
356 return error;
357}
358
219static int 359static int
220bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, 360bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
221 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 361 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
@@ -303,6 +443,8 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
303 return bl_parse_concat(server, d, volumes, idx, gfp_mask); 443 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
304 case PNFS_BLOCK_VOLUME_STRIPE: 444 case PNFS_BLOCK_VOLUME_STRIPE:
305 return bl_parse_stripe(server, d, volumes, idx, gfp_mask); 445 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
446 case PNFS_BLOCK_VOLUME_SCSI:
447 return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
306 default: 448 default:
307 dprintk("unsupported volume type: %d\n", volumes[idx].type); 449 dprintk("unsupported volume type: %d\n", volumes[idx].type);
308 return -EIO; 450 return -EIO;
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 35ab51c04814..720b3ff55fa9 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014 Christoph Hellwig. 2 * Copyright (c) 2014-2016 Christoph Hellwig.
3 */ 3 */
4 4
5#include <linux/vmalloc.h> 5#include <linux/vmalloc.h>
@@ -462,10 +462,12 @@ out:
462 return err; 462 return err;
463} 463}
464 464
465static size_t ext_tree_layoutupdate_size(size_t count) 465static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
466{ 466{
467 return sizeof(__be32) /* number of entries */ + 467 if (bl->bl_scsi_layout)
468 PNFS_BLOCK_EXTENT_SIZE * count; 468 return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
469 else
470 return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
469} 471}
470 472
471static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg, 473static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
@@ -483,6 +485,23 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
483 } 485 }
484} 486}
485 487
488static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
489{
490 p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
491 NFS4_DEVICEID4_SIZE);
492 p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
493 p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
494 p = xdr_encode_hyper(p, 0LL);
495 *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
496 return p;
497}
498
499static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
500{
501 p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
502 return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
503}
504
486static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, 505static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
487 size_t buffer_size, size_t *count) 506 size_t buffer_size, size_t *count)
488{ 507{
@@ -496,19 +515,16 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
496 continue; 515 continue;
497 516
498 (*count)++; 517 (*count)++;
499 if (ext_tree_layoutupdate_size(*count) > buffer_size) { 518 if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
500 /* keep counting.. */ 519 /* keep counting.. */
501 ret = -ENOSPC; 520 ret = -ENOSPC;
502 continue; 521 continue;
503 } 522 }
504 523
505 p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, 524 if (bl->bl_scsi_layout)
506 NFS4_DEVICEID4_SIZE); 525 p = encode_scsi_range(be, p);
507 p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT); 526 else
508 p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); 527 p = encode_block_extent(be, p);
509 p = xdr_encode_hyper(p, 0LL);
510 *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
511
512 be->be_tag = EXTENT_COMMITTING; 528 be->be_tag = EXTENT_COMMITTING;
513 } 529 }
514 spin_unlock(&bl->bl_ext_lock); 530 spin_unlock(&bl->bl_ext_lock);
@@ -537,7 +553,7 @@ retry:
537 if (unlikely(ret)) { 553 if (unlikely(ret)) {
538 ext_tree_free_commitdata(arg, buffer_size); 554 ext_tree_free_commitdata(arg, buffer_size);
539 555
540 buffer_size = ext_tree_layoutupdate_size(count); 556 buffer_size = ext_tree_layoutupdate_size(bl, count);
541 count = 0; 557 count = 0;
542 558
543 arg->layoutupdate_pages = 559 arg->layoutupdate_pages =
@@ -556,7 +572,7 @@ retry:
556 } 572 }
557 573
558 *start_p = cpu_to_be32(count); 574 *start_p = cpu_to_be32(count);
559 arg->layoutupdate_len = ext_tree_layoutupdate_size(count); 575 arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
560 576
561 if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { 577 if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
562 void *p = start_p, *end = p + arg->layoutupdate_len; 578 void *p = start_p, *end = p + arg->layoutupdate_len;
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index dbe5839cdeba..9fb067a6f7e0 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -281,7 +281,7 @@ out:
281 return ret; 281 return ret;
282} 282}
283 283
284void __exit bl_cleanup_pipefs(void) 284void bl_cleanup_pipefs(void)
285{ 285{
286 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); 286 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
287 unregister_pernet_subsys(&nfs4blocklayout_net_ops); 287 unregister_pernet_subsys(&nfs4blocklayout_net_ops);
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index ff8195bd75ea..5fe1cecbf9f0 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -37,10 +37,11 @@ enum nfs4_callback_opnum {
37 OP_CB_ILLEGAL = 10044, 37 OP_CB_ILLEGAL = 10044,
38}; 38};
39 39
40struct nfs4_slot;
40struct cb_process_state { 41struct cb_process_state {
41 __be32 drc_status; 42 __be32 drc_status;
42 struct nfs_client *clp; 43 struct nfs_client *clp;
43 u32 slotid; 44 struct nfs4_slot *slot;
44 u32 minorversion; 45 u32 minorversion;
45 struct net *net; 46 struct net *net;
46}; 47};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f0939d097406..618ced381a14 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -354,47 +354,38 @@ out:
354 * a single outstanding callback request at a time. 354 * a single outstanding callback request at a time.
355 */ 355 */
356static __be32 356static __be32
357validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) 357validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
358 const struct cb_sequenceargs * args)
358{ 359{
359 struct nfs4_slot *slot; 360 dprintk("%s enter. slotid %u seqid %u, slot table seqid: %u\n",
360 361 __func__, args->csa_slotid, args->csa_sequenceid, slot->seq_nr);
361 dprintk("%s enter. slotid %u seqid %u\n",
362 __func__, args->csa_slotid, args->csa_sequenceid);
363 362
364 if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS) 363 if (args->csa_slotid > tbl->server_highest_slotid)
365 return htonl(NFS4ERR_BADSLOT); 364 return htonl(NFS4ERR_BADSLOT);
366 365
367 slot = tbl->slots + args->csa_slotid;
368 dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
369
370 /* Normal */
371 if (likely(args->csa_sequenceid == slot->seq_nr + 1))
372 goto out_ok;
373
374 /* Replay */ 366 /* Replay */
375 if (args->csa_sequenceid == slot->seq_nr) { 367 if (args->csa_sequenceid == slot->seq_nr) {
376 dprintk("%s seqid %u is a replay\n", 368 dprintk("%s seqid %u is a replay\n",
377 __func__, args->csa_sequenceid); 369 __func__, args->csa_sequenceid);
370 if (nfs4_test_locked_slot(tbl, slot->slot_nr))
371 return htonl(NFS4ERR_DELAY);
378 /* Signal process_op to set this error on next op */ 372 /* Signal process_op to set this error on next op */
379 if (args->csa_cachethis == 0) 373 if (args->csa_cachethis == 0)
380 return htonl(NFS4ERR_RETRY_UNCACHED_REP); 374 return htonl(NFS4ERR_RETRY_UNCACHED_REP);
381 375
382 /* The ca_maxresponsesize_cached is 0 with no DRC */ 376 /* Liar! We never allowed you to set csa_cachethis != 0 */
383 else if (args->csa_cachethis == 1) 377 return htonl(NFS4ERR_SEQ_FALSE_RETRY);
384 return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
385 } 378 }
386 379
387 /* Wraparound */ 380 /* Wraparound */
388 if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { 381 if (unlikely(slot->seq_nr == 0xFFFFFFFFU)) {
389 slot->seq_nr = 1; 382 if (args->csa_sequenceid == 1)
390 goto out_ok; 383 return htonl(NFS4_OK);
391 } 384 } else if (likely(args->csa_sequenceid == slot->seq_nr + 1))
385 return htonl(NFS4_OK);
392 386
393 /* Misordered request */ 387 /* Misordered request */
394 return htonl(NFS4ERR_SEQ_MISORDERED); 388 return htonl(NFS4ERR_SEQ_MISORDERED);
395out_ok:
396 tbl->highest_used_slotid = args->csa_slotid;
397 return htonl(NFS4_OK);
398} 389}
399 390
400/* 391/*
@@ -473,6 +464,12 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
473 tbl = &clp->cl_session->bc_slot_table; 464 tbl = &clp->cl_session->bc_slot_table;
474 slot = tbl->slots + args->csa_slotid; 465 slot = tbl->slots + args->csa_slotid;
475 466
467 /* Set up res before grabbing the spinlock */
468 memcpy(&res->csr_sessionid, &args->csa_sessionid,
469 sizeof(res->csr_sessionid));
470 res->csr_sequenceid = args->csa_sequenceid;
471 res->csr_slotid = args->csa_slotid;
472
476 spin_lock(&tbl->slot_tbl_lock); 473 spin_lock(&tbl->slot_tbl_lock);
477 /* state manager is resetting the session */ 474 /* state manager is resetting the session */
478 if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { 475 if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
@@ -485,18 +482,26 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
485 goto out_unlock; 482 goto out_unlock;
486 } 483 }
487 484
488 memcpy(&res->csr_sessionid, &args->csa_sessionid, 485 status = htonl(NFS4ERR_BADSLOT);
489 sizeof(res->csr_sessionid)); 486 slot = nfs4_lookup_slot(tbl, args->csa_slotid);
490 res->csr_sequenceid = args->csa_sequenceid; 487 if (IS_ERR(slot))
491 res->csr_slotid = args->csa_slotid; 488 goto out_unlock;
492 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 489
493 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 490 res->csr_highestslotid = tbl->server_highest_slotid;
491 res->csr_target_highestslotid = tbl->target_highest_slotid;
494 492
495 status = validate_seqid(tbl, args); 493 status = validate_seqid(tbl, slot, args);
496 if (status) 494 if (status)
497 goto out_unlock; 495 goto out_unlock;
496 if (!nfs4_try_to_lock_slot(tbl, slot)) {
497 status = htonl(NFS4ERR_DELAY);
498 goto out_unlock;
499 }
500 cps->slot = slot;
498 501
499 cps->slotid = args->csa_slotid; 502 /* The ca_maxresponsesize_cached is 0 with no DRC */
503 if (args->csa_cachethis != 0)
504 return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
500 505
501 /* 506 /*
502 * Check for pending referring calls. If a match is found, a 507 * Check for pending referring calls. If a match is found, a
@@ -513,7 +518,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
513 * If CB_SEQUENCE returns an error, then the state of the slot 518 * If CB_SEQUENCE returns an error, then the state of the slot
514 * (sequence ID, cached reply) MUST NOT change. 519 * (sequence ID, cached reply) MUST NOT change.
515 */ 520 */
516 slot->seq_nr++; 521 slot->seq_nr = args->csa_sequenceid;
517out_unlock: 522out_unlock:
518 spin_unlock(&tbl->slot_tbl_lock); 523 spin_unlock(&tbl->slot_tbl_lock);
519 524
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 646cdac73488..976c90608e56 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -752,7 +752,8 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
752 return htonl(NFS_OK); 752 return htonl(NFS_OK);
753} 753}
754 754
755static void nfs4_callback_free_slot(struct nfs4_session *session) 755static void nfs4_callback_free_slot(struct nfs4_session *session,
756 struct nfs4_slot *slot)
756{ 757{
757 struct nfs4_slot_table *tbl = &session->bc_slot_table; 758 struct nfs4_slot_table *tbl = &session->bc_slot_table;
758 759
@@ -761,15 +762,17 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
761 * Let the state manager know callback processing done. 762 * Let the state manager know callback processing done.
762 * A single slot, so highest used slotid is either 0 or -1 763 * A single slot, so highest used slotid is either 0 or -1
763 */ 764 */
764 tbl->highest_used_slotid = NFS4_NO_SLOT; 765 nfs4_free_slot(tbl, slot);
765 nfs4_slot_tbl_drain_complete(tbl); 766 nfs4_slot_tbl_drain_complete(tbl);
766 spin_unlock(&tbl->slot_tbl_lock); 767 spin_unlock(&tbl->slot_tbl_lock);
767} 768}
768 769
769static void nfs4_cb_free_slot(struct cb_process_state *cps) 770static void nfs4_cb_free_slot(struct cb_process_state *cps)
770{ 771{
771 if (cps->slotid != NFS4_NO_SLOT) 772 if (cps->slot) {
772 nfs4_callback_free_slot(cps->clp->cl_session); 773 nfs4_callback_free_slot(cps->clp->cl_session, cps->slot);
774 cps->slot = NULL;
775 }
773} 776}
774 777
775#else /* CONFIG_NFS_V4_1 */ 778#else /* CONFIG_NFS_V4_1 */
@@ -893,7 +896,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
893 struct cb_process_state cps = { 896 struct cb_process_state cps = {
894 .drc_status = 0, 897 .drc_status = 0,
895 .clp = NULL, 898 .clp = NULL,
896 .slotid = NFS4_NO_SLOT,
897 .net = SVC_NET(rqstp), 899 .net = SVC_NET(rqstp),
898 }; 900 };
899 unsigned int nops = 0; 901 unsigned int nops = 0;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9cce67043f92..4bfa7d8bcade 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1360,19 +1360,15 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
1360 dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry); 1360 dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
1361 nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); 1361 nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
1362 1362
1363 res = ERR_PTR(-ENAMETOOLONG); 1363 if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
1364 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) 1364 return ERR_PTR(-ENAMETOOLONG);
1365 goto out;
1366 1365
1367 /* 1366 /*
1368 * If we're doing an exclusive create, optimize away the lookup 1367 * If we're doing an exclusive create, optimize away the lookup
1369 * but don't hash the dentry. 1368 * but don't hash the dentry.
1370 */ 1369 */
1371 if (nfs_is_exclusive_create(dir, flags)) { 1370 if (nfs_is_exclusive_create(dir, flags))
1372 d_instantiate(dentry, NULL); 1371 return NULL;
1373 res = NULL;
1374 goto out;
1375 }
1376 1372
1377 res = ERR_PTR(-ENOMEM); 1373 res = ERR_PTR(-ENOMEM);
1378 fhandle = nfs_alloc_fhandle(); 1374 fhandle = nfs_alloc_fhandle();
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 748bb813b8ec..89bf093d342a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
233 * nfs_file_write() that a write error occurred, and hence cause it to 233 * nfs_file_write() that a write error occurred, and hence cause it to
234 * fall back to doing a synchronous write. 234 * fall back to doing a synchronous write.
235 */ 235 */
236int 236static int
237nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) 237nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
238{ 238{
239 struct nfs_open_context *ctx = nfs_file_open_context(file); 239 struct nfs_open_context *ctx = nfs_file_open_context(file);
@@ -263,9 +263,8 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
263out: 263out:
264 return ret; 264 return ret;
265} 265}
266EXPORT_SYMBOL_GPL(nfs_file_fsync_commit);
267 266
268static int 267int
269nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) 268nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
270{ 269{
271 int ret; 270 int ret;
@@ -273,13 +272,15 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
273 272
274 trace_nfs_fsync_enter(inode); 273 trace_nfs_fsync_enter(inode);
275 274
276 nfs_inode_dio_wait(inode); 275 inode_dio_wait(inode);
277 do { 276 do {
278 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 277 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
279 if (ret != 0) 278 if (ret != 0)
280 break; 279 break;
281 inode_lock(inode); 280 inode_lock(inode);
282 ret = nfs_file_fsync_commit(file, start, end, datasync); 281 ret = nfs_file_fsync_commit(file, start, end, datasync);
282 if (!ret)
283 ret = pnfs_sync_inode(inode, !!datasync);
283 inode_unlock(inode); 284 inode_unlock(inode);
284 /* 285 /*
285 * If nfs_file_fsync_commit detected a server reboot, then 286 * If nfs_file_fsync_commit detected a server reboot, then
@@ -293,6 +294,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
293 trace_nfs_fsync_exit(inode, ret); 294 trace_nfs_fsync_exit(inode, ret);
294 return ret; 295 return ret;
295} 296}
297EXPORT_SYMBOL_GPL(nfs_file_fsync);
296 298
297/* 299/*
298 * Decide whether a read/modify/write cycle may be more efficient 300 * Decide whether a read/modify/write cycle may be more efficient
@@ -368,7 +370,7 @@ start:
368 /* 370 /*
369 * Wait for O_DIRECT to complete 371 * Wait for O_DIRECT to complete
370 */ 372 */
371 nfs_inode_dio_wait(mapping->host); 373 inode_dio_wait(mapping->host);
372 374
373 page = grab_cache_page_write_begin(mapping, index, flags); 375 page = grab_cache_page_write_begin(mapping, index, flags);
374 if (!page) 376 if (!page)
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index eb370460ce20..add0e5a70bd6 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -418,6 +418,8 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
418 pnfs_error_mark_layout_for_return(ino, lseg); 418 pnfs_error_mark_layout_for_return(ino, lseg);
419 } else 419 } else
420 pnfs_error_mark_layout_for_return(ino, lseg); 420 pnfs_error_mark_layout_for_return(ino, lseg);
421 ds = NULL;
422 goto out;
421 } 423 }
422out_update_creds: 424out_update_creds:
423 if (ff_layout_update_mirror_cred(mirror, ds)) 425 if (ff_layout_update_mirror_cred(mirror, ds))
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 86faecf8f328..33d18c411905 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -141,7 +141,7 @@ void nfs_evict_inode(struct inode *inode)
141 141
142int nfs_sync_inode(struct inode *inode) 142int nfs_sync_inode(struct inode *inode)
143{ 143{
144 nfs_inode_dio_wait(inode); 144 inode_dio_wait(inode);
145 return nfs_wb_all(inode); 145 return nfs_wb_all(inode);
146} 146}
147EXPORT_SYMBOL_GPL(nfs_sync_inode); 147EXPORT_SYMBOL_GPL(nfs_sync_inode);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9a547aa3ec8e..565f8135ae1f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -358,7 +358,7 @@ int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
358int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 358int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
359 359
360/* file.c */ 360/* file.c */
361int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); 361int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
362loff_t nfs_file_llseek(struct file *, loff_t, int); 362loff_t nfs_file_llseek(struct file *, loff_t, int);
363ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); 363ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
364ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, 364ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
@@ -515,10 +515,6 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
515/* direct.c */ 515/* direct.c */
516void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, 516void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
517 struct nfs_direct_req *dreq); 517 struct nfs_direct_req *dreq);
518static inline void nfs_inode_dio_wait(struct inode *inode)
519{
520 inode_dio_wait(inode);
521}
522extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 518extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
523 519
524/* nfs4proc.c */ 520/* nfs4proc.c */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 57ca1c8039c1..22c35abbee9d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -128,37 +128,6 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
128 return vfs_fsync(file, 0); 128 return vfs_fsync(file, 0);
129} 129}
130 130
131static int
132nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
133{
134 int ret;
135 struct inode *inode = file_inode(file);
136
137 trace_nfs_fsync_enter(inode);
138
139 nfs_inode_dio_wait(inode);
140 do {
141 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
142 if (ret != 0)
143 break;
144 inode_lock(inode);
145 ret = nfs_file_fsync_commit(file, start, end, datasync);
146 if (!ret)
147 ret = pnfs_sync_inode(inode, !!datasync);
148 inode_unlock(inode);
149 /*
150 * If nfs_file_fsync_commit detected a server reboot, then
151 * resend all dirty pages that might have been covered by
152 * the NFS_CONTEXT_RESEND_WRITES flag
153 */
154 start = 0;
155 end = LLONG_MAX;
156 } while (ret == -EAGAIN);
157
158 trace_nfs_fsync_exit(inode, ret);
159 return ret;
160}
161
162#ifdef CONFIG_NFS_V4_2 131#ifdef CONFIG_NFS_V4_2
163static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) 132static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
164{ 133{
@@ -266,7 +235,7 @@ const struct file_operations nfs4_file_operations = {
266 .open = nfs4_file_open, 235 .open = nfs4_file_open,
267 .flush = nfs4_file_flush, 236 .flush = nfs4_file_flush,
268 .release = nfs_file_release, 237 .release = nfs_file_release,
269 .fsync = nfs4_file_fsync, 238 .fsync = nfs_file_fsync,
270 .lock = nfs_lock, 239 .lock = nfs_lock,
271 .flock = nfs_flock, 240 .flock = nfs_flock,
272 .splice_read = nfs_file_splice_read, 241 .splice_read = nfs_file_splice_read,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 14881594dd07..327b8c34d360 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2461,14 +2461,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2461 2461
2462 dentry = opendata->dentry; 2462 dentry = opendata->dentry;
2463 if (d_really_is_negative(dentry)) { 2463 if (d_really_is_negative(dentry)) {
2464 /* FIXME: Is this d_drop() ever needed? */ 2464 struct dentry *alias;
2465 d_drop(dentry); 2465 d_drop(dentry);
2466 dentry = d_add_unique(dentry, igrab(state->inode)); 2466 alias = d_exact_alias(dentry, state->inode);
2467 if (dentry == NULL) { 2467 if (!alias)
2468 dentry = opendata->dentry; 2468 alias = d_splice_alias(igrab(state->inode), dentry);
2469 } else { 2469 /* d_splice_alias() can't fail here - it's a non-directory */
2470 if (alias) {
2470 dput(ctx->dentry); 2471 dput(ctx->dentry);
2471 ctx->dentry = dentry; 2472 ctx->dentry = dentry = alias;
2472 } 2473 }
2473 nfs_set_verifier(dentry, 2474 nfs_set_verifier(dentry,
2474 nfs_save_change_attribute(d_inode(opendata->dir))); 2475 nfs_save_change_attribute(d_inode(opendata->dir)));
@@ -6782,13 +6783,26 @@ nfs41_same_server_scope(struct nfs41_server_scope *a,
6782 return false; 6783 return false;
6783} 6784}
6784 6785
6786static void
6787nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata)
6788{
6789}
6790
6791static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = {
6792 .rpc_call_done = &nfs4_bind_one_conn_to_session_done,
6793};
6794
6785/* 6795/*
6786 * nfs4_proc_bind_conn_to_session() 6796 * nfs4_proc_bind_one_conn_to_session()
6787 * 6797 *
6788 * The 4.1 client currently uses the same TCP connection for the 6798 * The 4.1 client currently uses the same TCP connection for the
6789 * fore and backchannel. 6799 * fore and backchannel.
6790 */ 6800 */
6791int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred) 6801static
6802int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
6803 struct rpc_xprt *xprt,
6804 struct nfs_client *clp,
6805 struct rpc_cred *cred)
6792{ 6806{
6793 int status; 6807 int status;
6794 struct nfs41_bind_conn_to_session_args args = { 6808 struct nfs41_bind_conn_to_session_args args = {
@@ -6803,6 +6817,14 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred
6803 .rpc_resp = &res, 6817 .rpc_resp = &res,
6804 .rpc_cred = cred, 6818 .rpc_cred = cred,
6805 }; 6819 };
6820 struct rpc_task_setup task_setup_data = {
6821 .rpc_client = clnt,
6822 .rpc_xprt = xprt,
6823 .callback_ops = &nfs4_bind_one_conn_to_session_ops,
6824 .rpc_message = &msg,
6825 .flags = RPC_TASK_TIMEOUT,
6826 };
6827 struct rpc_task *task;
6806 6828
6807 dprintk("--> %s\n", __func__); 6829 dprintk("--> %s\n", __func__);
6808 6830
@@ -6810,7 +6832,16 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred
6810 if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) 6832 if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
6811 args.dir = NFS4_CDFC4_FORE; 6833 args.dir = NFS4_CDFC4_FORE;
6812 6834
6813 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 6835 /* Do not set the backchannel flag unless this is clnt->cl_xprt */
6836 if (xprt != rcu_access_pointer(clnt->cl_xprt))
6837 args.dir = NFS4_CDFC4_FORE;
6838
6839 task = rpc_run_task(&task_setup_data);
6840 if (!IS_ERR(task)) {
6841 status = task->tk_status;
6842 rpc_put_task(task);
6843 } else
6844 status = PTR_ERR(task);
6814 trace_nfs4_bind_conn_to_session(clp, status); 6845 trace_nfs4_bind_conn_to_session(clp, status);
6815 if (status == 0) { 6846 if (status == 0) {
6816 if (memcmp(res.sessionid.data, 6847 if (memcmp(res.sessionid.data,
@@ -6837,6 +6868,31 @@ out:
6837 return status; 6868 return status;
6838} 6869}
6839 6870
6871struct rpc_bind_conn_calldata {
6872 struct nfs_client *clp;
6873 struct rpc_cred *cred;
6874};
6875
6876static int
6877nfs4_proc_bind_conn_to_session_callback(struct rpc_clnt *clnt,
6878 struct rpc_xprt *xprt,
6879 void *calldata)
6880{
6881 struct rpc_bind_conn_calldata *p = calldata;
6882
6883 return nfs4_proc_bind_one_conn_to_session(clnt, xprt, p->clp, p->cred);
6884}
6885
6886int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
6887{
6888 struct rpc_bind_conn_calldata data = {
6889 .clp = clp,
6890 .cred = cred,
6891 };
6892 return rpc_clnt_iterate_for_each_xprt(clp->cl_rpcclient,
6893 nfs4_proc_bind_conn_to_session_callback, &data);
6894}
6895
6840/* 6896/*
6841 * Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map 6897 * Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map
6842 * and operations we'd like to see to enable certain features in the allow map 6898 * and operations we'd like to see to enable certain features in the allow map
@@ -7319,7 +7375,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
7319 args->bc_attrs.max_resp_sz = PAGE_SIZE; 7375 args->bc_attrs.max_resp_sz = PAGE_SIZE;
7320 args->bc_attrs.max_resp_sz_cached = 0; 7376 args->bc_attrs.max_resp_sz_cached = 0;
7321 args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; 7377 args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS;
7322 args->bc_attrs.max_reqs = 1; 7378 args->bc_attrs.max_reqs = NFS41_BC_MAX_CALLBACKS;
7323 7379
7324 dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " 7380 dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u "
7325 "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", 7381 "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index e23366effcfb..332d06e64fa9 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -135,6 +135,43 @@ static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl,
135 return ERR_PTR(-ENOMEM); 135 return ERR_PTR(-ENOMEM);
136} 136}
137 137
138static void nfs4_lock_slot(struct nfs4_slot_table *tbl,
139 struct nfs4_slot *slot)
140{
141 u32 slotid = slot->slot_nr;
142
143 __set_bit(slotid, tbl->used_slots);
144 if (slotid > tbl->highest_used_slotid ||
145 tbl->highest_used_slotid == NFS4_NO_SLOT)
146 tbl->highest_used_slotid = slotid;
147 slot->generation = tbl->generation;
148}
149
150/*
151 * nfs4_try_to_lock_slot - Given a slot try to allocate it
152 *
153 * Note: must be called with the slot_tbl_lock held.
154 */
155bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot)
156{
157 if (nfs4_test_locked_slot(tbl, slot->slot_nr))
158 return false;
159 nfs4_lock_slot(tbl, slot);
160 return true;
161}
162
163/*
164 * nfs4_lookup_slot - Find a slot but don't allocate it
165 *
166 * Note: must be called with the slot_tbl_lock held.
167 */
168struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid)
169{
170 if (slotid <= tbl->max_slotid)
171 return nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
172 return ERR_PTR(-E2BIG);
173}
174
138/* 175/*
139 * nfs4_alloc_slot - efficiently look for a free slot 176 * nfs4_alloc_slot - efficiently look for a free slot
140 * 177 *
@@ -153,18 +190,11 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
153 __func__, tbl->used_slots[0], tbl->highest_used_slotid, 190 __func__, tbl->used_slots[0], tbl->highest_used_slotid,
154 tbl->max_slotid + 1); 191 tbl->max_slotid + 1);
155 slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1); 192 slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1);
156 if (slotid > tbl->max_slotid) 193 if (slotid <= tbl->max_slotid) {
157 goto out; 194 ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
158 ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); 195 if (!IS_ERR(ret))
159 if (IS_ERR(ret)) 196 nfs4_lock_slot(tbl, ret);
160 goto out; 197 }
161 __set_bit(slotid, tbl->used_slots);
162 if (slotid > tbl->highest_used_slotid ||
163 tbl->highest_used_slotid == NFS4_NO_SLOT)
164 tbl->highest_used_slotid = slotid;
165 ret->generation = tbl->generation;
166
167out:
168 dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n", 198 dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n",
169 __func__, tbl->used_slots[0], tbl->highest_used_slotid, 199 __func__, tbl->used_slots[0], tbl->highest_used_slotid,
170 !IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT); 200 !IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT);
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index e3ea2c5324d6..5b51298d1d03 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -77,6 +77,8 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
77 unsigned int max_reqs, const char *queue); 77 unsigned int max_reqs, const char *queue);
78extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); 78extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
79extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); 79extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
80extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid);
81extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
80extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); 82extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
81extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); 83extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
82bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, 84bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
@@ -88,6 +90,12 @@ static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl)
88 return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); 90 return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
89} 91}
90 92
93static inline bool nfs4_test_locked_slot(const struct nfs4_slot_table *tbl,
94 u32 slotid)
95{
96 return !!test_bit(slotid, tbl->used_slots);
97}
98
91#if defined(CONFIG_NFS_V4_1) 99#if defined(CONFIG_NFS_V4_1)
92extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, 100extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
93 u32 target_highest_slotid); 101 u32 target_highest_slotid);
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 81ac6480f9e7..4aaed890048f 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -606,12 +606,22 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
606 dprintk("%s: DS %s: trying address %s\n", 606 dprintk("%s: DS %s: trying address %s\n",
607 __func__, ds->ds_remotestr, da->da_remotestr); 607 __func__, ds->ds_remotestr, da->da_remotestr);
608 608
609 clp = get_v3_ds_connect(mds_srv->nfs_client, 609 if (!IS_ERR(clp)) {
610 struct xprt_create xprt_args = {
611 .ident = XPRT_TRANSPORT_TCP,
612 .net = clp->cl_net,
613 .dstaddr = (struct sockaddr *)&da->da_addr,
614 .addrlen = da->da_addrlen,
615 .servername = clp->cl_hostname,
616 };
617 /* Add this address as an alias */
618 rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
619 rpc_clnt_test_and_add_xprt, NULL);
620 } else
621 clp = get_v3_ds_connect(mds_srv->nfs_client,
610 (struct sockaddr *)&da->da_addr, 622 (struct sockaddr *)&da->da_addr,
611 da->da_addrlen, IPPROTO_TCP, 623 da->da_addrlen, IPPROTO_TCP,
612 timeo, retrans, au_flavor); 624 timeo, retrans, au_flavor);
613 if (!IS_ERR(clp))
614 break;
615 } 625 }
616 626
617 if (IS_ERR(clp)) { 627 if (IS_ERR(clp)) {