diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-16 18:39:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-16 18:39:20 -0400 |
commit | 52ad096465d60ee7fdc99583f969a99a1166c7c3 (patch) | |
tree | 0cf33926ce8b5eb4f6675d3b1332b35e914f4846 | |
parent | 8cde1ad6683f8738ad71f788dca8ab4810cf5afe (diff) | |
parent | 6925bac120097b823fc990c143b9789c21cc60b5 (diff) |
Merge git://git.linux-nfs.org/projects/trondmy/nfs-2.6
* git://git.linux-nfs.org/projects/trondmy/nfs-2.6: (53 commits)
NFS: Fix a resolution problem with nfs_inode->cache_change_attribute
NFS: Fix the resolution problem with nfs_inode_attrs_need_update()
NFS: Changes to inode->i_nlinks must set the NFS_INO_INVALID_ATTR flag
RPC/RDMA: ensure connection attempt is complete before signalling.
RPC/RDMA: correct the reconnect timer backoff
RPC/RDMA: optionally emit useful transport info upon connect/disconnect.
RPC/RDMA: reformat a debug printk to keep lines together.
RPC/RDMA: harden connection logic against missing/late rdma_cm upcalls.
RPC/RDMA: fix connect/reconnect resource leak.
RPC/RDMA: return a consistent error, when connect fails.
RPC/RDMA: adhere to protocol for unpadded client trailing write chunks.
RPC/RDMA: avoid an oops due to disconnect racing with async upcalls.
RPC/RDMA: maintain the RPC task bytes-sent statistic.
RPC/RDMA: suppress retransmit on RPC/RDMA clients.
RPC/RDMA: fix connection IRD/ORD setting
RPC/RDMA: support FRMR client memory registration.
RPC/RDMA: check selected memory registration mode at runtime.
RPC/RDMA: add data types and new FRMR memory registration enum.
RPC/RDMA: refactor the inline memory registration code.
NFS: fix nfs_parse_ip_address() corner case
...
-rw-r--r-- | fs/nfs/client.c | 5 | ||||
-rw-r--r-- | fs/nfs/dir.c | 20 | ||||
-rw-r--r-- | fs/nfs/file.c | 18 | ||||
-rw-r--r-- | fs/nfs/inode.c | 183 | ||||
-rw-r--r-- | fs/nfs/internal.h | 25 | ||||
-rw-r--r-- | fs/nfs/mount_clnt.c | 3 | ||||
-rw-r--r-- | fs/nfs/namespace.c | 7 | ||||
-rw-r--r-- | fs/nfs/nfs3acl.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 20 | ||||
-rw-r--r-- | fs/nfs/nfs4namespace.c | 105 | ||||
-rw-r--r-- | fs/nfs/proc.c | 10 | ||||
-rw-r--r-- | fs/nfs/super.c | 126 | ||||
-rw-r--r-- | fs/nfs/unlink.c | 5 | ||||
-rw-r--r-- | fs/nfs/write.c | 3 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 19 | ||||
-rw-r--r-- | include/linux/nfs_fs_sb.h | 1 | ||||
-rw-r--r-- | include/linux/nfs_mount.h | 4 | ||||
-rw-r--r-- | include/linux/nfs_xdr.h | 11 | ||||
-rw-r--r-- | include/linux/sunrpc/xprtrdma.h | 4 | ||||
-rw-r--r-- | net/sunrpc/clnt.c | 4 | ||||
-rw-r--r-- | net/sunrpc/rpcb_clnt.c | 40 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 12 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 29 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 41 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 741 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 17 |
26 files changed, 955 insertions, 500 deletions
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5ee23e7058b3..7547600b6174 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -675,7 +675,7 @@ static int nfs_init_server(struct nfs_server *server, | |||
675 | server->nfs_client = clp; | 675 | server->nfs_client = clp; |
676 | 676 | ||
677 | /* Initialise the client representation from the mount data */ | 677 | /* Initialise the client representation from the mount data */ |
678 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; | 678 | server->flags = data->flags; |
679 | 679 | ||
680 | if (data->rsize) | 680 | if (data->rsize) |
681 | server->rsize = nfs_block_size(data->rsize, NULL); | 681 | server->rsize = nfs_block_size(data->rsize, NULL); |
@@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void) | |||
850 | INIT_LIST_HEAD(&server->client_link); | 850 | INIT_LIST_HEAD(&server->client_link); |
851 | INIT_LIST_HEAD(&server->master_link); | 851 | INIT_LIST_HEAD(&server->master_link); |
852 | 852 | ||
853 | init_waitqueue_head(&server->active_wq); | ||
854 | atomic_set(&server->active, 0); | 853 | atomic_set(&server->active, 0); |
855 | 854 | ||
856 | server->io_stats = nfs_alloc_iostats(); | 855 | server->io_stats = nfs_alloc_iostats(); |
@@ -1073,7 +1072,7 @@ static int nfs4_init_server(struct nfs_server *server, | |||
1073 | goto error; | 1072 | goto error; |
1074 | 1073 | ||
1075 | /* Initialise the client representation from the mount data */ | 1074 | /* Initialise the client representation from the mount data */ |
1076 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; | 1075 | server->flags = data->flags; |
1077 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1076 | server->caps |= NFS_CAP_ATOMIC_OPEN; |
1078 | 1077 | ||
1079 | if (data->rsize) | 1078 | if (data->rsize) |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 74f92b717f78..2ab70d46ecbc 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -156,6 +156,7 @@ typedef struct { | |||
156 | decode_dirent_t decode; | 156 | decode_dirent_t decode; |
157 | int plus; | 157 | int plus; |
158 | unsigned long timestamp; | 158 | unsigned long timestamp; |
159 | unsigned long gencount; | ||
159 | int timestamp_valid; | 160 | int timestamp_valid; |
160 | } nfs_readdir_descriptor_t; | 161 | } nfs_readdir_descriptor_t; |
161 | 162 | ||
@@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
177 | struct file *file = desc->file; | 178 | struct file *file = desc->file; |
178 | struct inode *inode = file->f_path.dentry->d_inode; | 179 | struct inode *inode = file->f_path.dentry->d_inode; |
179 | struct rpc_cred *cred = nfs_file_cred(file); | 180 | struct rpc_cred *cred = nfs_file_cred(file); |
180 | unsigned long timestamp; | 181 | unsigned long timestamp, gencount; |
181 | int error; | 182 | int error; |
182 | 183 | ||
183 | dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", | 184 | dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", |
@@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
186 | 187 | ||
187 | again: | 188 | again: |
188 | timestamp = jiffies; | 189 | timestamp = jiffies; |
190 | gencount = nfs_inc_attr_generation_counter(); | ||
189 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, | 191 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, |
190 | NFS_SERVER(inode)->dtsize, desc->plus); | 192 | NFS_SERVER(inode)->dtsize, desc->plus); |
191 | if (error < 0) { | 193 | if (error < 0) { |
@@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
199 | goto error; | 201 | goto error; |
200 | } | 202 | } |
201 | desc->timestamp = timestamp; | 203 | desc->timestamp = timestamp; |
204 | desc->gencount = gencount; | ||
202 | desc->timestamp_valid = 1; | 205 | desc->timestamp_valid = 1; |
203 | SetPageUptodate(page); | 206 | SetPageUptodate(page); |
204 | /* Ensure consistent page alignment of the data. | 207 | /* Ensure consistent page alignment of the data. |
@@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc) | |||
224 | if (IS_ERR(p)) | 227 | if (IS_ERR(p)) |
225 | return PTR_ERR(p); | 228 | return PTR_ERR(p); |
226 | desc->ptr = p; | 229 | desc->ptr = p; |
227 | if (desc->timestamp_valid) | 230 | if (desc->timestamp_valid) { |
228 | desc->entry->fattr->time_start = desc->timestamp; | 231 | desc->entry->fattr->time_start = desc->timestamp; |
229 | else | 232 | desc->entry->fattr->gencount = desc->gencount; |
233 | } else | ||
230 | desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; | 234 | desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; |
231 | return 0; | 235 | return 0; |
232 | } | 236 | } |
@@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
471 | struct rpc_cred *cred = nfs_file_cred(file); | 475 | struct rpc_cred *cred = nfs_file_cred(file); |
472 | struct page *page = NULL; | 476 | struct page *page = NULL; |
473 | int status; | 477 | int status; |
474 | unsigned long timestamp; | 478 | unsigned long timestamp, gencount; |
475 | 479 | ||
476 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", | 480 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", |
477 | (unsigned long long)*desc->dir_cookie); | 481 | (unsigned long long)*desc->dir_cookie); |
@@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
482 | goto out; | 486 | goto out; |
483 | } | 487 | } |
484 | timestamp = jiffies; | 488 | timestamp = jiffies; |
489 | gencount = nfs_inc_attr_generation_counter(); | ||
485 | status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, | 490 | status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, |
486 | *desc->dir_cookie, page, | 491 | *desc->dir_cookie, page, |
487 | NFS_SERVER(inode)->dtsize, | 492 | NFS_SERVER(inode)->dtsize, |
@@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
490 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ | 495 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ |
491 | if (status >= 0) { | 496 | if (status >= 0) { |
492 | desc->timestamp = timestamp; | 497 | desc->timestamp = timestamp; |
498 | desc->gencount = gencount; | ||
493 | desc->timestamp_valid = 1; | 499 | desc->timestamp_valid = 1; |
494 | if ((status = dir_decode(desc)) == 0) | 500 | if ((status = dir_decode(desc)) == 0) |
495 | desc->entry->prev_cookie = *desc->dir_cookie; | 501 | desc->entry->prev_cookie = *desc->dir_cookie; |
@@ -655,7 +661,7 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) | |||
655 | */ | 661 | */ |
656 | void nfs_force_lookup_revalidate(struct inode *dir) | 662 | void nfs_force_lookup_revalidate(struct inode *dir) |
657 | { | 663 | { |
658 | NFS_I(dir)->cache_change_attribute = jiffies; | 664 | NFS_I(dir)->cache_change_attribute++; |
659 | } | 665 | } |
660 | 666 | ||
661 | /* | 667 | /* |
@@ -667,6 +673,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) | |||
667 | { | 673 | { |
668 | if (IS_ROOT(dentry)) | 674 | if (IS_ROOT(dentry)) |
669 | return 1; | 675 | return 1; |
676 | if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) | ||
677 | return 0; | ||
670 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) | 678 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) |
671 | return 0; | 679 | return 0; |
672 | /* Revalidate nfsi->cache_change_attribute before we declare a match */ | 680 | /* Revalidate nfsi->cache_change_attribute before we declare a match */ |
@@ -750,6 +758,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, | |||
750 | /* Don't revalidate a negative dentry if we're creating a new file */ | 758 | /* Don't revalidate a negative dentry if we're creating a new file */ |
751 | if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) | 759 | if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) |
752 | return 0; | 760 | return 0; |
761 | if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) | ||
762 | return 1; | ||
753 | return !nfs_check_verifier(dir, dentry); | 763 | return !nfs_check_verifier(dir, dentry); |
754 | } | 764 | } |
755 | 765 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 78460657f5cb..d319b49f8f06 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -188,13 +188,16 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
188 | /* origin == SEEK_END => we must revalidate the cached file length */ | 188 | /* origin == SEEK_END => we must revalidate the cached file length */ |
189 | if (origin == SEEK_END) { | 189 | if (origin == SEEK_END) { |
190 | struct inode *inode = filp->f_mapping->host; | 190 | struct inode *inode = filp->f_mapping->host; |
191 | |||
191 | int retval = nfs_revalidate_file_size(inode, filp); | 192 | int retval = nfs_revalidate_file_size(inode, filp); |
192 | if (retval < 0) | 193 | if (retval < 0) |
193 | return (loff_t)retval; | 194 | return (loff_t)retval; |
194 | } | 195 | |
195 | lock_kernel(); /* BKL needed? */ | 196 | spin_lock(&inode->i_lock); |
196 | loff = generic_file_llseek_unlocked(filp, offset, origin); | 197 | loff = generic_file_llseek_unlocked(filp, offset, origin); |
197 | unlock_kernel(); | 198 | spin_unlock(&inode->i_lock); |
199 | } else | ||
200 | loff = generic_file_llseek_unlocked(filp, offset, origin); | ||
198 | return loff; | 201 | return loff; |
199 | } | 202 | } |
200 | 203 | ||
@@ -699,13 +702,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) | |||
699 | filp->f_path.dentry->d_name.name, | 702 | filp->f_path.dentry->d_name.name, |
700 | fl->fl_type, fl->fl_flags); | 703 | fl->fl_type, fl->fl_flags); |
701 | 704 | ||
702 | /* | ||
703 | * No BSD flocks over NFS allowed. | ||
704 | * Note: we could try to fake a POSIX lock request here by | ||
705 | * using ((u32) filp | 0x80000000) or some such as the pid. | ||
706 | * Not sure whether that would be unique, though, or whether | ||
707 | * that would break in other places. | ||
708 | */ | ||
709 | if (!(fl->fl_flags & FL_FLOCK)) | 705 | if (!(fl->fl_flags & FL_FLOCK)) |
710 | return -ENOLCK; | 706 | return -ENOLCK; |
711 | 707 | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52daefa2f521..b9195c02a863 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -305,8 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
305 | init_special_inode(inode, inode->i_mode, fattr->rdev); | 305 | init_special_inode(inode, inode->i_mode, fattr->rdev); |
306 | 306 | ||
307 | nfsi->read_cache_jiffies = fattr->time_start; | 307 | nfsi->read_cache_jiffies = fattr->time_start; |
308 | nfsi->last_updated = now; | 308 | nfsi->attr_gencount = fattr->gencount; |
309 | nfsi->cache_change_attribute = now; | ||
310 | inode->i_atime = fattr->atime; | 309 | inode->i_atime = fattr->atime; |
311 | inode->i_mtime = fattr->mtime; | 310 | inode->i_mtime = fattr->mtime; |
312 | inode->i_ctime = fattr->ctime; | 311 | inode->i_ctime = fattr->ctime; |
@@ -453,6 +452,7 @@ out_big: | |||
453 | void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | 452 | void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) |
454 | { | 453 | { |
455 | if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { | 454 | if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { |
455 | spin_lock(&inode->i_lock); | ||
456 | if ((attr->ia_valid & ATTR_MODE) != 0) { | 456 | if ((attr->ia_valid & ATTR_MODE) != 0) { |
457 | int mode = attr->ia_mode & S_IALLUGO; | 457 | int mode = attr->ia_mode & S_IALLUGO; |
458 | mode |= inode->i_mode & ~S_IALLUGO; | 458 | mode |= inode->i_mode & ~S_IALLUGO; |
@@ -462,7 +462,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
462 | inode->i_uid = attr->ia_uid; | 462 | inode->i_uid = attr->ia_uid; |
463 | if ((attr->ia_valid & ATTR_GID) != 0) | 463 | if ((attr->ia_valid & ATTR_GID) != 0) |
464 | inode->i_gid = attr->ia_gid; | 464 | inode->i_gid = attr->ia_gid; |
465 | spin_lock(&inode->i_lock); | ||
466 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 465 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
467 | spin_unlock(&inode->i_lock); | 466 | spin_unlock(&inode->i_lock); |
468 | } | 467 | } |
@@ -472,37 +471,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
472 | } | 471 | } |
473 | } | 472 | } |
474 | 473 | ||
475 | static int nfs_wait_schedule(void *word) | ||
476 | { | ||
477 | if (signal_pending(current)) | ||
478 | return -ERESTARTSYS; | ||
479 | schedule(); | ||
480 | return 0; | ||
481 | } | ||
482 | |||
483 | /* | ||
484 | * Wait for the inode to get unlocked. | ||
485 | */ | ||
486 | static int nfs_wait_on_inode(struct inode *inode) | ||
487 | { | ||
488 | struct nfs_inode *nfsi = NFS_I(inode); | ||
489 | int error; | ||
490 | |||
491 | error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, | ||
492 | nfs_wait_schedule, TASK_KILLABLE); | ||
493 | |||
494 | return error; | ||
495 | } | ||
496 | |||
497 | static void nfs_wake_up_inode(struct inode *inode) | ||
498 | { | ||
499 | struct nfs_inode *nfsi = NFS_I(inode); | ||
500 | |||
501 | clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); | ||
502 | smp_mb__after_clear_bit(); | ||
503 | wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); | ||
504 | } | ||
505 | |||
506 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 474 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
507 | { | 475 | { |
508 | struct inode *inode = dentry->d_inode; | 476 | struct inode *inode = dentry->d_inode; |
@@ -697,20 +665,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
697 | dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", | 665 | dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", |
698 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); | 666 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); |
699 | 667 | ||
700 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | ||
701 | if (is_bad_inode(inode)) | 668 | if (is_bad_inode(inode)) |
702 | goto out_nowait; | 669 | goto out; |
703 | if (NFS_STALE(inode)) | 670 | if (NFS_STALE(inode)) |
704 | goto out_nowait; | ||
705 | |||
706 | status = nfs_wait_on_inode(inode); | ||
707 | if (status < 0) | ||
708 | goto out; | 671 | goto out; |
709 | 672 | ||
710 | status = -ESTALE; | ||
711 | if (NFS_STALE(inode)) | 673 | if (NFS_STALE(inode)) |
712 | goto out; | 674 | goto out; |
713 | 675 | ||
676 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | ||
714 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); | 677 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); |
715 | if (status != 0) { | 678 | if (status != 0) { |
716 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", | 679 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", |
@@ -724,16 +687,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
724 | goto out; | 687 | goto out; |
725 | } | 688 | } |
726 | 689 | ||
727 | spin_lock(&inode->i_lock); | 690 | status = nfs_refresh_inode(inode, &fattr); |
728 | status = nfs_update_inode(inode, &fattr); | ||
729 | if (status) { | 691 | if (status) { |
730 | spin_unlock(&inode->i_lock); | ||
731 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", | 692 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", |
732 | inode->i_sb->s_id, | 693 | inode->i_sb->s_id, |
733 | (long long)NFS_FILEID(inode), status); | 694 | (long long)NFS_FILEID(inode), status); |
734 | goto out; | 695 | goto out; |
735 | } | 696 | } |
736 | spin_unlock(&inode->i_lock); | ||
737 | 697 | ||
738 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) | 698 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) |
739 | nfs_zap_acl_cache(inode); | 699 | nfs_zap_acl_cache(inode); |
@@ -743,9 +703,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
743 | (long long)NFS_FILEID(inode)); | 703 | (long long)NFS_FILEID(inode)); |
744 | 704 | ||
745 | out: | 705 | out: |
746 | nfs_wake_up_inode(inode); | ||
747 | |||
748 | out_nowait: | ||
749 | return status; | 706 | return status; |
750 | } | 707 | } |
751 | 708 | ||
@@ -908,9 +865,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
908 | return -EIO; | 865 | return -EIO; |
909 | } | 866 | } |
910 | 867 | ||
911 | /* Do atomic weak cache consistency updates */ | ||
912 | nfs_wcc_update_inode(inode, fattr); | ||
913 | |||
914 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | 868 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && |
915 | nfsi->change_attr != fattr->change_attr) | 869 | nfsi->change_attr != fattr->change_attr) |
916 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 870 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
@@ -939,15 +893,81 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
939 | 893 | ||
940 | if (invalid != 0) | 894 | if (invalid != 0) |
941 | nfsi->cache_validity |= invalid; | 895 | nfsi->cache_validity |= invalid; |
942 | else | ||
943 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | ||
944 | | NFS_INO_INVALID_ATIME | ||
945 | | NFS_INO_REVAL_PAGECACHE); | ||
946 | 896 | ||
947 | nfsi->read_cache_jiffies = fattr->time_start; | 897 | nfsi->read_cache_jiffies = fattr->time_start; |
948 | return 0; | 898 | return 0; |
949 | } | 899 | } |
950 | 900 | ||
901 | static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
902 | { | ||
903 | return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; | ||
904 | } | ||
905 | |||
906 | static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
907 | { | ||
908 | return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); | ||
909 | } | ||
910 | |||
911 | static unsigned long nfs_attr_generation_counter; | ||
912 | |||
913 | static unsigned long nfs_read_attr_generation_counter(void) | ||
914 | { | ||
915 | smp_rmb(); | ||
916 | return nfs_attr_generation_counter; | ||
917 | } | ||
918 | |||
919 | unsigned long nfs_inc_attr_generation_counter(void) | ||
920 | { | ||
921 | unsigned long ret; | ||
922 | smp_rmb(); | ||
923 | ret = ++nfs_attr_generation_counter; | ||
924 | smp_wmb(); | ||
925 | return ret; | ||
926 | } | ||
927 | |||
928 | void nfs_fattr_init(struct nfs_fattr *fattr) | ||
929 | { | ||
930 | fattr->valid = 0; | ||
931 | fattr->time_start = jiffies; | ||
932 | fattr->gencount = nfs_inc_attr_generation_counter(); | ||
933 | } | ||
934 | |||
935 | /** | ||
936 | * nfs_inode_attrs_need_update - check if the inode attributes need updating | ||
937 | * @inode - pointer to inode | ||
938 | * @fattr - attributes | ||
939 | * | ||
940 | * Attempt to divine whether or not an RPC call reply carrying stale | ||
941 | * attributes got scheduled after another call carrying updated ones. | ||
942 | * | ||
943 | * To do so, the function first assumes that a more recent ctime means | ||
944 | * that the attributes in fattr are newer, however it also attempt to | ||
945 | * catch the case where ctime either didn't change, or went backwards | ||
946 | * (if someone reset the clock on the server) by looking at whether | ||
947 | * or not this RPC call was started after the inode was last updated. | ||
948 | * Note also the check for wraparound of 'attr_gencount' | ||
949 | * | ||
950 | * The function returns 'true' if it thinks the attributes in 'fattr' are | ||
951 | * more recent than the ones cached in the inode. | ||
952 | * | ||
953 | */ | ||
954 | static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
955 | { | ||
956 | const struct nfs_inode *nfsi = NFS_I(inode); | ||
957 | |||
958 | return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || | ||
959 | nfs_ctime_need_update(inode, fattr) || | ||
960 | nfs_size_need_update(inode, fattr) || | ||
961 | ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); | ||
962 | } | ||
963 | |||
964 | static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) | ||
965 | { | ||
966 | if (nfs_inode_attrs_need_update(inode, fattr)) | ||
967 | return nfs_update_inode(inode, fattr); | ||
968 | return nfs_check_inode_attributes(inode, fattr); | ||
969 | } | ||
970 | |||
951 | /** | 971 | /** |
952 | * nfs_refresh_inode - try to update the inode attribute cache | 972 | * nfs_refresh_inode - try to update the inode attribute cache |
953 | * @inode - pointer to inode | 973 | * @inode - pointer to inode |
@@ -960,21 +980,28 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
960 | */ | 980 | */ |
961 | int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | 981 | int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) |
962 | { | 982 | { |
963 | struct nfs_inode *nfsi = NFS_I(inode); | ||
964 | int status; | 983 | int status; |
965 | 984 | ||
966 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | 985 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) |
967 | return 0; | 986 | return 0; |
968 | spin_lock(&inode->i_lock); | 987 | spin_lock(&inode->i_lock); |
969 | if (time_after(fattr->time_start, nfsi->last_updated)) | 988 | status = nfs_refresh_inode_locked(inode, fattr); |
970 | status = nfs_update_inode(inode, fattr); | ||
971 | else | ||
972 | status = nfs_check_inode_attributes(inode, fattr); | ||
973 | |||
974 | spin_unlock(&inode->i_lock); | 989 | spin_unlock(&inode->i_lock); |
975 | return status; | 990 | return status; |
976 | } | 991 | } |
977 | 992 | ||
993 | static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) | ||
994 | { | ||
995 | struct nfs_inode *nfsi = NFS_I(inode); | ||
996 | |||
997 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | ||
998 | if (S_ISDIR(inode->i_mode)) | ||
999 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
1000 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | ||
1001 | return 0; | ||
1002 | return nfs_refresh_inode_locked(inode, fattr); | ||
1003 | } | ||
1004 | |||
978 | /** | 1005 | /** |
979 | * nfs_post_op_update_inode - try to update the inode attribute cache | 1006 | * nfs_post_op_update_inode - try to update the inode attribute cache |
980 | * @inode - pointer to inode | 1007 | * @inode - pointer to inode |
@@ -991,14 +1018,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
991 | */ | 1018 | */ |
992 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | 1019 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) |
993 | { | 1020 | { |
994 | struct nfs_inode *nfsi = NFS_I(inode); | 1021 | int status; |
995 | 1022 | ||
996 | spin_lock(&inode->i_lock); | 1023 | spin_lock(&inode->i_lock); |
997 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 1024 | status = nfs_post_op_update_inode_locked(inode, fattr); |
998 | if (S_ISDIR(inode->i_mode)) | ||
999 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
1000 | spin_unlock(&inode->i_lock); | 1025 | spin_unlock(&inode->i_lock); |
1001 | return nfs_refresh_inode(inode, fattr); | 1026 | return status; |
1002 | } | 1027 | } |
1003 | 1028 | ||
1004 | /** | 1029 | /** |
@@ -1014,6 +1039,15 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1014 | */ | 1039 | */ |
1015 | int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) | 1040 | int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) |
1016 | { | 1041 | { |
1042 | int status; | ||
1043 | |||
1044 | spin_lock(&inode->i_lock); | ||
1045 | /* Don't do a WCC update if these attributes are already stale */ | ||
1046 | if ((fattr->valid & NFS_ATTR_FATTR) == 0 || | ||
1047 | !nfs_inode_attrs_need_update(inode, fattr)) { | ||
1048 | fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC); | ||
1049 | goto out_noforce; | ||
1050 | } | ||
1017 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | 1051 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && |
1018 | (fattr->valid & NFS_ATTR_WCC_V4) == 0) { | 1052 | (fattr->valid & NFS_ATTR_WCC_V4) == 0) { |
1019 | fattr->pre_change_attr = NFS_I(inode)->change_attr; | 1053 | fattr->pre_change_attr = NFS_I(inode)->change_attr; |
@@ -1026,7 +1060,10 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa | |||
1026 | fattr->pre_size = i_size_read(inode); | 1060 | fattr->pre_size = i_size_read(inode); |
1027 | fattr->valid |= NFS_ATTR_WCC; | 1061 | fattr->valid |= NFS_ATTR_WCC; |
1028 | } | 1062 | } |
1029 | return nfs_post_op_update_inode(inode, fattr); | 1063 | out_noforce: |
1064 | status = nfs_post_op_update_inode_locked(inode, fattr); | ||
1065 | spin_unlock(&inode->i_lock); | ||
1066 | return status; | ||
1030 | } | 1067 | } |
1031 | 1068 | ||
1032 | /* | 1069 | /* |
@@ -1092,7 +1129,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1092 | } | 1129 | } |
1093 | /* If ctime has changed we should definitely clear access+acl caches */ | 1130 | /* If ctime has changed we should definitely clear access+acl caches */ |
1094 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) | 1131 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) |
1095 | invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1132 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1096 | } else if (nfsi->change_attr != fattr->change_attr) { | 1133 | } else if (nfsi->change_attr != fattr->change_attr) { |
1097 | dprintk("NFS: change_attr change on server for file %s/%ld\n", | 1134 | dprintk("NFS: change_attr change on server for file %s/%ld\n", |
1098 | inode->i_sb->s_id, inode->i_ino); | 1135 | inode->i_sb->s_id, inode->i_ino); |
@@ -1126,6 +1163,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1126 | inode->i_gid != fattr->gid) | 1163 | inode->i_gid != fattr->gid) |
1127 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1164 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1128 | 1165 | ||
1166 | if (inode->i_nlink != fattr->nlink) | ||
1167 | invalid |= NFS_INO_INVALID_ATTR; | ||
1168 | |||
1129 | inode->i_mode = fattr->mode; | 1169 | inode->i_mode = fattr->mode; |
1130 | inode->i_nlink = fattr->nlink; | 1170 | inode->i_nlink = fattr->nlink; |
1131 | inode->i_uid = fattr->uid; | 1171 | inode->i_uid = fattr->uid; |
@@ -1145,18 +1185,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1145 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); | 1185 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); |
1146 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 1186 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
1147 | nfsi->attrtimeo_timestamp = now; | 1187 | nfsi->attrtimeo_timestamp = now; |
1148 | nfsi->last_updated = now; | 1188 | nfsi->attr_gencount = nfs_inc_attr_generation_counter(); |
1149 | } else { | 1189 | } else { |
1150 | if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { | 1190 | if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { |
1151 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) | 1191 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) |
1152 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); | 1192 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); |
1153 | nfsi->attrtimeo_timestamp = now; | 1193 | nfsi->attrtimeo_timestamp = now; |
1154 | } | 1194 | } |
1155 | /* | ||
1156 | * Avoid jiffy wraparound issues with nfsi->last_updated | ||
1157 | */ | ||
1158 | if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now)) | ||
1159 | nfsi->last_updated = nfsi->read_cache_jiffies; | ||
1160 | } | 1195 | } |
1161 | invalid &= ~NFS_INO_INVALID_ATTR; | 1196 | invalid &= ~NFS_INO_INVALID_ATTR; |
1162 | /* Don't invalidate the data if we were to blame */ | 1197 | /* Don't invalidate the data if we were to blame */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 24241fcbb98d..d212ee41caf2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -153,6 +153,7 @@ extern void nfs4_clear_inode(struct inode *); | |||
153 | void nfs_zap_acl_cache(struct inode *inode); | 153 | void nfs_zap_acl_cache(struct inode *inode); |
154 | 154 | ||
155 | /* super.c */ | 155 | /* super.c */ |
156 | void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); | ||
156 | extern struct file_system_type nfs_xdev_fs_type; | 157 | extern struct file_system_type nfs_xdev_fs_type; |
157 | #ifdef CONFIG_NFS_V4 | 158 | #ifdef CONFIG_NFS_V4 |
158 | extern struct file_system_type nfs4_xdev_fs_type; | 159 | extern struct file_system_type nfs4_xdev_fs_type; |
@@ -163,8 +164,8 @@ extern struct rpc_stat nfs_rpcstat; | |||
163 | 164 | ||
164 | extern int __init register_nfs_fs(void); | 165 | extern int __init register_nfs_fs(void); |
165 | extern void __exit unregister_nfs_fs(void); | 166 | extern void __exit unregister_nfs_fs(void); |
166 | extern void nfs_sb_active(struct nfs_server *server); | 167 | extern void nfs_sb_active(struct super_block *sb); |
167 | extern void nfs_sb_deactive(struct nfs_server *server); | 168 | extern void nfs_sb_deactive(struct super_block *sb); |
168 | 169 | ||
169 | /* namespace.c */ | 170 | /* namespace.c */ |
170 | extern char *nfs_path(const char *base, | 171 | extern char *nfs_path(const char *base, |
@@ -276,3 +277,23 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) | |||
276 | PAGE_SIZE - 1) >> PAGE_SHIFT; | 277 | PAGE_SIZE - 1) >> PAGE_SHIFT; |
277 | } | 278 | } |
278 | 279 | ||
280 | #define IPV6_SCOPE_DELIMITER '%' | ||
281 | |||
282 | /* | ||
283 | * Set the port number in an address. Be agnostic about the address | ||
284 | * family. | ||
285 | */ | ||
286 | static inline void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
287 | { | ||
288 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
289 | struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap; | ||
290 | |||
291 | switch (sap->sa_family) { | ||
292 | case AF_INET: | ||
293 | ap->sin_port = htons(port); | ||
294 | break; | ||
295 | case AF_INET6: | ||
296 | ap6->sin6_port = htons(port); | ||
297 | break; | ||
298 | } | ||
299 | } | ||
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 779d2eb649c5..086a6830d785 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/sched.h> | 15 | #include <linux/sunrpc/sched.h> |
16 | #include <linux/nfs_fs.h> | 16 | #include <linux/nfs_fs.h> |
17 | #include "internal.h" | ||
17 | 18 | ||
18 | #ifdef RPC_DEBUG | 19 | #ifdef RPC_DEBUG |
19 | # define NFSDBG_FACILITY NFSDBG_MOUNT | 20 | # define NFSDBG_FACILITY NFSDBG_MOUNT |
@@ -98,7 +99,7 @@ out_call_err: | |||
98 | 99 | ||
99 | out_mnt_err: | 100 | out_mnt_err: |
100 | dprintk("NFS: MNT server returned result %d\n", result.status); | 101 | dprintk("NFS: MNT server returned result %d\n", result.status); |
101 | status = -EACCES; | 102 | status = nfs_stat_to_errno(result.status); |
102 | goto out; | 103 | goto out; |
103 | } | 104 | } |
104 | 105 | ||
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 66df08dd1caf..64a288ee046d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -105,7 +105,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | |||
105 | 105 | ||
106 | dprintk("--> nfs_follow_mountpoint()\n"); | 106 | dprintk("--> nfs_follow_mountpoint()\n"); |
107 | 107 | ||
108 | BUG_ON(IS_ROOT(dentry)); | 108 | err = -ESTALE; |
109 | if (IS_ROOT(dentry)) | ||
110 | goto out_err; | ||
111 | |||
109 | dprintk("%s: enter\n", __func__); | 112 | dprintk("%s: enter\n", __func__); |
110 | dput(nd->path.dentry); | 113 | dput(nd->path.dentry); |
111 | nd->path.dentry = dget(dentry); | 114 | nd->path.dentry = dget(dentry); |
@@ -189,7 +192,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, | |||
189 | struct nfs_clone_mount *mountdata) | 192 | struct nfs_clone_mount *mountdata) |
190 | { | 193 | { |
191 | #ifdef CONFIG_NFS_V4 | 194 | #ifdef CONFIG_NFS_V4 |
192 | struct vfsmount *mnt = NULL; | 195 | struct vfsmount *mnt = ERR_PTR(-EINVAL); |
193 | switch (server->nfs_client->rpc_ops->version) { | 196 | switch (server->nfs_client->rpc_ops->version) { |
194 | case 2: | 197 | case 2: |
195 | case 3: | 198 | case 3: |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 423842f51ac9..cef62557c87d 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -229,6 +229,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) | |||
229 | 229 | ||
230 | dprintk("NFS call getacl\n"); | 230 | dprintk("NFS call getacl\n"); |
231 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; | 231 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; |
232 | nfs_fattr_init(&fattr); | ||
232 | status = rpc_call_sync(server->client_acl, &msg, 0); | 233 | status = rpc_call_sync(server->client_acl, &msg, 0); |
233 | dprintk("NFS reply getacl: %d\n", status); | 234 | dprintk("NFS reply getacl: %d\n", status); |
234 | 235 | ||
@@ -322,6 +323,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | |||
322 | 323 | ||
323 | dprintk("NFS call setacl\n"); | 324 | dprintk("NFS call setacl\n"); |
324 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; | 325 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; |
326 | nfs_fattr_init(&fattr); | ||
325 | status = rpc_call_sync(server->client_acl, &msg, 0); | 327 | status = rpc_call_sync(server->client_acl, &msg, 0); |
326 | nfs_access_zap_cache(inode); | 328 | nfs_access_zap_cache(inode); |
327 | nfs_zap_acl_cache(inode); | 329 | nfs_zap_acl_cache(inode); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 1e750e4574a9..c55be7a7679e 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -699,7 +699,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, | |||
699 | } | 699 | } |
700 | 700 | ||
701 | static int | 701 | static int |
702 | nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | 702 | do_proc_fsinfo(struct rpc_clnt *client, struct nfs_fh *fhandle, |
703 | struct nfs_fsinfo *info) | 703 | struct nfs_fsinfo *info) |
704 | { | 704 | { |
705 | struct rpc_message msg = { | 705 | struct rpc_message msg = { |
@@ -711,11 +711,27 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | |||
711 | 711 | ||
712 | dprintk("NFS call fsinfo\n"); | 712 | dprintk("NFS call fsinfo\n"); |
713 | nfs_fattr_init(info->fattr); | 713 | nfs_fattr_init(info->fattr); |
714 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 714 | status = rpc_call_sync(client, &msg, 0); |
715 | dprintk("NFS reply fsinfo: %d\n", status); | 715 | dprintk("NFS reply fsinfo: %d\n", status); |
716 | return status; | 716 | return status; |
717 | } | 717 | } |
718 | 718 | ||
719 | /* | ||
720 | * Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via | ||
721 | * nfs_create_server | ||
722 | */ | ||
723 | static int | ||
724 | nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | ||
725 | struct nfs_fsinfo *info) | ||
726 | { | ||
727 | int status; | ||
728 | |||
729 | status = do_proc_fsinfo(server->client, fhandle, info); | ||
730 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
731 | status = do_proc_fsinfo(server->nfs_client->cl_rpcclient, fhandle, info); | ||
732 | return status; | ||
733 | } | ||
734 | |||
719 | static int | 735 | static int |
720 | nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | 736 | nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, |
721 | struct nfs_pathconf *info) | 737 | struct nfs_pathconf *info) |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index b112857301f7..30befc39b3c6 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
@@ -93,21 +93,52 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, | |||
93 | return 0; | 93 | return 0; |
94 | } | 94 | } |
95 | 95 | ||
96 | /* | 96 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, |
97 | * Check if the string represents a "valid" IPv4 address | 97 | char *page, char *page2, |
98 | */ | 98 | const struct nfs4_fs_location *location) |
99 | static inline int valid_ipaddr4(const char *buf) | ||
100 | { | 99 | { |
101 | int rc, count, in[4]; | 100 | struct vfsmount *mnt = ERR_PTR(-ENOENT); |
102 | 101 | char *mnt_path; | |
103 | rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); | 102 | int page2len; |
104 | if (rc != 4) | 103 | unsigned int s; |
105 | return -EINVAL; | 104 | |
106 | for (count = 0; count < 4; count++) { | 105 | mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); |
107 | if (in[count] > 255) | 106 | if (IS_ERR(mnt_path)) |
108 | return -EINVAL; | 107 | return mnt; |
108 | mountdata->mnt_path = mnt_path; | ||
109 | page2 += strlen(mnt_path) + 1; | ||
110 | page2len = PAGE_SIZE - strlen(mnt_path) - 1; | ||
111 | |||
112 | for (s = 0; s < location->nservers; s++) { | ||
113 | const struct nfs4_string *buf = &location->servers[s]; | ||
114 | struct sockaddr_storage addr; | ||
115 | |||
116 | if (buf->len <= 0 || buf->len >= PAGE_SIZE) | ||
117 | continue; | ||
118 | |||
119 | mountdata->addr = (struct sockaddr *)&addr; | ||
120 | |||
121 | if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) | ||
122 | continue; | ||
123 | nfs_parse_ip_address(buf->data, buf->len, | ||
124 | mountdata->addr, &mountdata->addrlen); | ||
125 | if (mountdata->addr->sa_family == AF_UNSPEC) | ||
126 | continue; | ||
127 | nfs_set_port(mountdata->addr, NFS_PORT); | ||
128 | |||
129 | strncpy(page2, buf->data, page2len); | ||
130 | page2[page2len] = '\0'; | ||
131 | mountdata->hostname = page2; | ||
132 | |||
133 | snprintf(page, PAGE_SIZE, "%s:%s", | ||
134 | mountdata->hostname, | ||
135 | mountdata->mnt_path); | ||
136 | |||
137 | mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); | ||
138 | if (!IS_ERR(mnt)) | ||
139 | break; | ||
109 | } | 140 | } |
110 | return 0; | 141 | return mnt; |
111 | } | 142 | } |
112 | 143 | ||
113 | /** | 144 | /** |
@@ -128,7 +159,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
128 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, | 159 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, |
129 | }; | 160 | }; |
130 | char *page = NULL, *page2 = NULL; | 161 | char *page = NULL, *page2 = NULL; |
131 | unsigned int s; | ||
132 | int loc, error; | 162 | int loc, error; |
133 | 163 | ||
134 | if (locations == NULL || locations->nlocations <= 0) | 164 | if (locations == NULL || locations->nlocations <= 0) |
@@ -152,53 +182,16 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
152 | goto out; | 182 | goto out; |
153 | } | 183 | } |
154 | 184 | ||
155 | loc = 0; | 185 | for (loc = 0; loc < locations->nlocations; loc++) { |
156 | while (loc < locations->nlocations && IS_ERR(mnt)) { | ||
157 | const struct nfs4_fs_location *location = &locations->locations[loc]; | 186 | const struct nfs4_fs_location *location = &locations->locations[loc]; |
158 | char *mnt_path; | ||
159 | 187 | ||
160 | if (location == NULL || location->nservers <= 0 || | 188 | if (location == NULL || location->nservers <= 0 || |
161 | location->rootpath.ncomponents == 0) { | 189 | location->rootpath.ncomponents == 0) |
162 | loc++; | ||
163 | continue; | 190 | continue; |
164 | } | ||
165 | 191 | ||
166 | mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); | 192 | mnt = try_location(&mountdata, page, page2, location); |
167 | if (IS_ERR(mnt_path)) { | 193 | if (!IS_ERR(mnt)) |
168 | loc++; | 194 | break; |
169 | continue; | ||
170 | } | ||
171 | mountdata.mnt_path = mnt_path; | ||
172 | |||
173 | s = 0; | ||
174 | while (s < location->nservers) { | ||
175 | struct sockaddr_in addr = { | ||
176 | .sin_family = AF_INET, | ||
177 | .sin_port = htons(NFS_PORT), | ||
178 | }; | ||
179 | |||
180 | if (location->servers[s].len <= 0 || | ||
181 | valid_ipaddr4(location->servers[s].data) < 0) { | ||
182 | s++; | ||
183 | continue; | ||
184 | } | ||
185 | |||
186 | mountdata.hostname = location->servers[s].data; | ||
187 | addr.sin_addr.s_addr = in_aton(mountdata.hostname), | ||
188 | mountdata.addr = (struct sockaddr *)&addr; | ||
189 | mountdata.addrlen = sizeof(addr); | ||
190 | |||
191 | snprintf(page, PAGE_SIZE, "%s:%s", | ||
192 | mountdata.hostname, | ||
193 | mountdata.mnt_path); | ||
194 | |||
195 | mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, &mountdata); | ||
196 | if (!IS_ERR(mnt)) { | ||
197 | break; | ||
198 | } | ||
199 | s++; | ||
200 | } | ||
201 | loc++; | ||
202 | } | 195 | } |
203 | 196 | ||
204 | out: | 197 | out: |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4dbb84df1b68..193465210d7c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -65,14 +65,20 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
65 | 65 | ||
66 | dprintk("%s: call getattr\n", __func__); | 66 | dprintk("%s: call getattr\n", __func__); |
67 | nfs_fattr_init(fattr); | 67 | nfs_fattr_init(fattr); |
68 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 68 | status = rpc_call_sync(server->client, &msg, 0); |
69 | /* Retry with default authentication if different */ | ||
70 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
71 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | ||
69 | dprintk("%s: reply getattr: %d\n", __func__, status); | 72 | dprintk("%s: reply getattr: %d\n", __func__, status); |
70 | if (status) | 73 | if (status) |
71 | return status; | 74 | return status; |
72 | dprintk("%s: call statfs\n", __func__); | 75 | dprintk("%s: call statfs\n", __func__); |
73 | msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; | 76 | msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; |
74 | msg.rpc_resp = &fsinfo; | 77 | msg.rpc_resp = &fsinfo; |
75 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 78 | status = rpc_call_sync(server->client, &msg, 0); |
79 | /* Retry with default authentication if different */ | ||
80 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
81 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | ||
76 | dprintk("%s: reply statfs: %d\n", __func__, status); | 82 | dprintk("%s: reply statfs: %d\n", __func__, status); |
77 | if (status) | 83 | if (status) |
78 | return status; | 84 | return status; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ffb697416cb1..8b28b95c9e44 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -91,6 +91,7 @@ enum { | |||
91 | /* Mount options that take string arguments */ | 91 | /* Mount options that take string arguments */ |
92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, | 92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, |
93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
94 | Opt_lookupcache, | ||
94 | 95 | ||
95 | /* Special mount options */ | 96 | /* Special mount options */ |
96 | Opt_userspace, Opt_deprecated, Opt_sloppy, | 97 | Opt_userspace, Opt_deprecated, Opt_sloppy, |
@@ -154,6 +155,8 @@ static const match_table_t nfs_mount_option_tokens = { | |||
154 | { Opt_mounthost, "mounthost=%s" }, | 155 | { Opt_mounthost, "mounthost=%s" }, |
155 | { Opt_mountaddr, "mountaddr=%s" }, | 156 | { Opt_mountaddr, "mountaddr=%s" }, |
156 | 157 | ||
158 | { Opt_lookupcache, "lookupcache=%s" }, | ||
159 | |||
157 | { Opt_err, NULL } | 160 | { Opt_err, NULL } |
158 | }; | 161 | }; |
159 | 162 | ||
@@ -200,6 +203,22 @@ static const match_table_t nfs_secflavor_tokens = { | |||
200 | { Opt_sec_err, NULL } | 203 | { Opt_sec_err, NULL } |
201 | }; | 204 | }; |
202 | 205 | ||
206 | enum { | ||
207 | Opt_lookupcache_all, Opt_lookupcache_positive, | ||
208 | Opt_lookupcache_none, | ||
209 | |||
210 | Opt_lookupcache_err | ||
211 | }; | ||
212 | |||
213 | static match_table_t nfs_lookupcache_tokens = { | ||
214 | { Opt_lookupcache_all, "all" }, | ||
215 | { Opt_lookupcache_positive, "pos" }, | ||
216 | { Opt_lookupcache_positive, "positive" }, | ||
217 | { Opt_lookupcache_none, "none" }, | ||
218 | |||
219 | { Opt_lookupcache_err, NULL } | ||
220 | }; | ||
221 | |||
203 | 222 | ||
204 | static void nfs_umount_begin(struct super_block *); | 223 | static void nfs_umount_begin(struct super_block *); |
205 | static int nfs_statfs(struct dentry *, struct kstatfs *); | 224 | static int nfs_statfs(struct dentry *, struct kstatfs *); |
@@ -209,7 +228,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru | |||
209 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, | 228 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, |
210 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); | 229 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); |
211 | static void nfs_kill_super(struct super_block *); | 230 | static void nfs_kill_super(struct super_block *); |
212 | static void nfs_put_super(struct super_block *); | ||
213 | static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); | 231 | static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); |
214 | 232 | ||
215 | static struct file_system_type nfs_fs_type = { | 233 | static struct file_system_type nfs_fs_type = { |
@@ -232,7 +250,6 @@ static const struct super_operations nfs_sops = { | |||
232 | .alloc_inode = nfs_alloc_inode, | 250 | .alloc_inode = nfs_alloc_inode, |
233 | .destroy_inode = nfs_destroy_inode, | 251 | .destroy_inode = nfs_destroy_inode, |
234 | .write_inode = nfs_write_inode, | 252 | .write_inode = nfs_write_inode, |
235 | .put_super = nfs_put_super, | ||
236 | .statfs = nfs_statfs, | 253 | .statfs = nfs_statfs, |
237 | .clear_inode = nfs_clear_inode, | 254 | .clear_inode = nfs_clear_inode, |
238 | .umount_begin = nfs_umount_begin, | 255 | .umount_begin = nfs_umount_begin, |
@@ -337,26 +354,20 @@ void __exit unregister_nfs_fs(void) | |||
337 | unregister_filesystem(&nfs_fs_type); | 354 | unregister_filesystem(&nfs_fs_type); |
338 | } | 355 | } |
339 | 356 | ||
340 | void nfs_sb_active(struct nfs_server *server) | 357 | void nfs_sb_active(struct super_block *sb) |
341 | { | 358 | { |
342 | atomic_inc(&server->active); | 359 | struct nfs_server *server = NFS_SB(sb); |
343 | } | ||
344 | 360 | ||
345 | void nfs_sb_deactive(struct nfs_server *server) | 361 | if (atomic_inc_return(&server->active) == 1) |
346 | { | 362 | atomic_inc(&sb->s_active); |
347 | if (atomic_dec_and_test(&server->active)) | ||
348 | wake_up(&server->active_wq); | ||
349 | } | 363 | } |
350 | 364 | ||
351 | static void nfs_put_super(struct super_block *sb) | 365 | void nfs_sb_deactive(struct super_block *sb) |
352 | { | 366 | { |
353 | struct nfs_server *server = NFS_SB(sb); | 367 | struct nfs_server *server = NFS_SB(sb); |
354 | /* | 368 | |
355 | * Make sure there are no outstanding ops to this server. | 369 | if (atomic_dec_and_test(&server->active)) |
356 | * If so, wait for them to finish before allowing the | 370 | deactivate_super(sb); |
357 | * unmount to continue. | ||
358 | */ | ||
359 | wait_event(server->active_wq, atomic_read(&server->active) == 0); | ||
360 | } | 371 | } |
361 | 372 | ||
362 | /* | 373 | /* |
@@ -664,25 +675,6 @@ static void nfs_umount_begin(struct super_block *sb) | |||
664 | } | 675 | } |
665 | 676 | ||
666 | /* | 677 | /* |
667 | * Set the port number in an address. Be agnostic about the address family. | ||
668 | */ | ||
669 | static void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
670 | { | ||
671 | switch (sap->sa_family) { | ||
672 | case AF_INET: { | ||
673 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
674 | ap->sin_port = htons(port); | ||
675 | break; | ||
676 | } | ||
677 | case AF_INET6: { | ||
678 | struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; | ||
679 | ap->sin6_port = htons(port); | ||
680 | break; | ||
681 | } | ||
682 | } | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * Sanity-check a server address provided by the mount command. | 678 | * Sanity-check a server address provided by the mount command. |
687 | * | 679 | * |
688 | * Address family must be initialized, and address must not be | 680 | * Address family must be initialized, and address must not be |
@@ -724,20 +716,22 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len, | |||
724 | *addr_len = 0; | 716 | *addr_len = 0; |
725 | } | 717 | } |
726 | 718 | ||
727 | #define IPV6_SCOPE_DELIMITER '%' | ||
728 | |||
729 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 719 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
730 | static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | 720 | static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, |
731 | const char *delim, | 721 | const char *delim, |
732 | struct sockaddr_in6 *sin6) | 722 | struct sockaddr_in6 *sin6) |
733 | { | 723 | { |
734 | char *p; | 724 | char *p; |
735 | size_t len; | 725 | size_t len; |
736 | 726 | ||
737 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | 727 | if ((string + str_len) == delim) |
738 | return ; | 728 | return 1; |
729 | |||
739 | if (*delim != IPV6_SCOPE_DELIMITER) | 730 | if (*delim != IPV6_SCOPE_DELIMITER) |
740 | return; | 731 | return 0; |
732 | |||
733 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | ||
734 | return 0; | ||
741 | 735 | ||
742 | len = (string + str_len) - delim - 1; | 736 | len = (string + str_len) - delim - 1; |
743 | p = kstrndup(delim + 1, len, GFP_KERNEL); | 737 | p = kstrndup(delim + 1, len, GFP_KERNEL); |
@@ -750,14 +744,20 @@ static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | |||
750 | scope_id = dev->ifindex; | 744 | scope_id = dev->ifindex; |
751 | dev_put(dev); | 745 | dev_put(dev); |
752 | } else { | 746 | } else { |
753 | /* scope_id is set to zero on error */ | 747 | if (strict_strtoul(p, 10, &scope_id) == 0) { |
754 | strict_strtoul(p, 10, &scope_id); | 748 | kfree(p); |
749 | return 0; | ||
750 | } | ||
755 | } | 751 | } |
756 | 752 | ||
757 | kfree(p); | 753 | kfree(p); |
754 | |||
758 | sin6->sin6_scope_id = scope_id; | 755 | sin6->sin6_scope_id = scope_id; |
759 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); | 756 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); |
757 | return 1; | ||
760 | } | 758 | } |
759 | |||
760 | return 0; | ||
761 | } | 761 | } |
762 | 762 | ||
763 | static void nfs_parse_ipv6_address(char *string, size_t str_len, | 763 | static void nfs_parse_ipv6_address(char *string, size_t str_len, |
@@ -773,9 +773,11 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, | |||
773 | 773 | ||
774 | sin6->sin6_family = AF_INET6; | 774 | sin6->sin6_family = AF_INET6; |
775 | *addr_len = sizeof(*sin6); | 775 | *addr_len = sizeof(*sin6); |
776 | if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { | 776 | if (in6_pton(string, str_len, addr, |
777 | nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); | 777 | IPV6_SCOPE_DELIMITER, &delim) != 0) { |
778 | return; | 778 | if (nfs_parse_ipv6_scope_id(string, str_len, |
779 | delim, sin6) != 0) | ||
780 | return; | ||
779 | } | 781 | } |
780 | } | 782 | } |
781 | 783 | ||
@@ -798,7 +800,7 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, | |||
798 | * If there is a problem constructing the new sockaddr, set the address | 800 | * If there is a problem constructing the new sockaddr, set the address |
799 | * family to AF_UNSPEC. | 801 | * family to AF_UNSPEC. |
800 | */ | 802 | */ |
801 | static void nfs_parse_ip_address(char *string, size_t str_len, | 803 | void nfs_parse_ip_address(char *string, size_t str_len, |
802 | struct sockaddr *sap, size_t *addr_len) | 804 | struct sockaddr *sap, size_t *addr_len) |
803 | { | 805 | { |
804 | unsigned int i, colons; | 806 | unsigned int i, colons; |
@@ -1258,6 +1260,30 @@ static int nfs_parse_mount_options(char *raw, | |||
1258 | &mnt->mount_server.addrlen); | 1260 | &mnt->mount_server.addrlen); |
1259 | kfree(string); | 1261 | kfree(string); |
1260 | break; | 1262 | break; |
1263 | case Opt_lookupcache: | ||
1264 | string = match_strdup(args); | ||
1265 | if (string == NULL) | ||
1266 | goto out_nomem; | ||
1267 | token = match_token(string, | ||
1268 | nfs_lookupcache_tokens, args); | ||
1269 | kfree(string); | ||
1270 | switch (token) { | ||
1271 | case Opt_lookupcache_all: | ||
1272 | mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); | ||
1273 | break; | ||
1274 | case Opt_lookupcache_positive: | ||
1275 | mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; | ||
1276 | mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; | ||
1277 | break; | ||
1278 | case Opt_lookupcache_none: | ||
1279 | mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; | ||
1280 | break; | ||
1281 | default: | ||
1282 | errors++; | ||
1283 | dfprintk(MOUNT, "NFS: invalid " | ||
1284 | "lookupcache argument\n"); | ||
1285 | }; | ||
1286 | break; | ||
1261 | 1287 | ||
1262 | /* | 1288 | /* |
1263 | * Special options | 1289 | * Special options |
@@ -1558,7 +1584,7 @@ static int nfs_validate_mount_data(void *options, | |||
1558 | * Translate to nfs_parsed_mount_data, which nfs_fill_super | 1584 | * Translate to nfs_parsed_mount_data, which nfs_fill_super |
1559 | * can deal with. | 1585 | * can deal with. |
1560 | */ | 1586 | */ |
1561 | args->flags = data->flags; | 1587 | args->flags = data->flags & NFS_MOUNT_FLAGMASK; |
1562 | args->rsize = data->rsize; | 1588 | args->rsize = data->rsize; |
1563 | args->wsize = data->wsize; | 1589 | args->wsize = data->wsize; |
1564 | args->timeo = data->timeo; | 1590 | args->timeo = data->timeo; |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index f089e5839d7d..ecc295347775 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata) | |||
99 | 99 | ||
100 | nfs_dec_sillycount(data->dir); | 100 | nfs_dec_sillycount(data->dir); |
101 | nfs_free_unlinkdata(data); | 101 | nfs_free_unlinkdata(data); |
102 | nfs_sb_deactive(NFS_SB(sb)); | 102 | nfs_sb_deactive(sb); |
103 | } | 103 | } |
104 | 104 | ||
105 | static const struct rpc_call_ops nfs_unlink_ops = { | 105 | static const struct rpc_call_ops nfs_unlink_ops = { |
@@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
118 | .rpc_message = &msg, | 118 | .rpc_message = &msg, |
119 | .callback_ops = &nfs_unlink_ops, | 119 | .callback_ops = &nfs_unlink_ops, |
120 | .callback_data = data, | 120 | .callback_data = data, |
121 | .workqueue = nfsiod_workqueue, | ||
121 | .flags = RPC_TASK_ASYNC, | 122 | .flags = RPC_TASK_ASYNC, |
122 | }; | 123 | }; |
123 | struct rpc_task *task; | 124 | struct rpc_task *task; |
@@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
149 | nfs_dec_sillycount(dir); | 150 | nfs_dec_sillycount(dir); |
150 | return 0; | 151 | return 0; |
151 | } | 152 | } |
152 | nfs_sb_active(NFS_SERVER(dir)); | 153 | nfs_sb_active(dir->i_sb); |
153 | data->args.fh = NFS_FH(dir); | 154 | data->args.fh = NFS_FH(dir); |
154 | nfs_fattr_init(&data->res.dir_attr); | 155 | nfs_fattr_init(&data->res.dir_attr); |
155 | 156 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3229e217c773..9f9845859fc1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1427,8 +1427,9 @@ static int nfs_write_mapping(struct address_space *mapping, int how) | |||
1427 | .bdi = mapping->backing_dev_info, | 1427 | .bdi = mapping->backing_dev_info, |
1428 | .sync_mode = WB_SYNC_NONE, | 1428 | .sync_mode = WB_SYNC_NONE, |
1429 | .nr_to_write = LONG_MAX, | 1429 | .nr_to_write = LONG_MAX, |
1430 | .range_start = 0, | ||
1431 | .range_end = LLONG_MAX, | ||
1430 | .for_writepages = 1, | 1432 | .for_writepages = 1, |
1431 | .range_cyclic = 1, | ||
1432 | }; | 1433 | }; |
1433 | int ret; | 1434 | int ret; |
1434 | 1435 | ||
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 78a5922a2f11..ac8d0233b05c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -137,7 +137,7 @@ struct nfs_inode { | |||
137 | unsigned long attrtimeo_timestamp; | 137 | unsigned long attrtimeo_timestamp; |
138 | __u64 change_attr; /* v4 only */ | 138 | __u64 change_attr; /* v4 only */ |
139 | 139 | ||
140 | unsigned long last_updated; | 140 | unsigned long attr_gencount; |
141 | /* "Generation counter" for the attribute cache. This is | 141 | /* "Generation counter" for the attribute cache. This is |
142 | * bumped whenever we update the metadata on the | 142 | * bumped whenever we update the metadata on the |
143 | * server. | 143 | * server. |
@@ -200,11 +200,10 @@ struct nfs_inode { | |||
200 | /* | 200 | /* |
201 | * Bit offsets in flags field | 201 | * Bit offsets in flags field |
202 | */ | 202 | */ |
203 | #define NFS_INO_REVALIDATING (0) /* revalidating attrs */ | 203 | #define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus */ |
204 | #define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ | 204 | #define NFS_INO_STALE (1) /* possible stale inode */ |
205 | #define NFS_INO_STALE (2) /* possible stale inode */ | 205 | #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ |
206 | #define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ | 206 | #define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ |
207 | #define NFS_INO_MOUNTPOINT (4) /* inode is remote mountpoint */ | ||
208 | 207 | ||
209 | static inline struct nfs_inode *NFS_I(const struct inode *inode) | 208 | static inline struct nfs_inode *NFS_I(const struct inode *inode) |
210 | { | 209 | { |
@@ -345,15 +344,11 @@ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ct | |||
345 | extern void put_nfs_open_context(struct nfs_open_context *ctx); | 344 | extern void put_nfs_open_context(struct nfs_open_context *ctx); |
346 | extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); | 345 | extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); |
347 | extern u64 nfs_compat_user_ino64(u64 fileid); | 346 | extern u64 nfs_compat_user_ino64(u64 fileid); |
347 | extern void nfs_fattr_init(struct nfs_fattr *fattr); | ||
348 | 348 | ||
349 | /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ | 349 | /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ |
350 | extern __be32 root_nfs_parse_addr(char *name); /*__init*/ | 350 | extern __be32 root_nfs_parse_addr(char *name); /*__init*/ |
351 | 351 | extern unsigned long nfs_inc_attr_generation_counter(void); | |
352 | static inline void nfs_fattr_init(struct nfs_fattr *fattr) | ||
353 | { | ||
354 | fattr->valid = 0; | ||
355 | fattr->time_start = jiffies; | ||
356 | } | ||
357 | 352 | ||
358 | /* | 353 | /* |
359 | * linux/fs/nfs/file.c | 354 | * linux/fs/nfs/file.c |
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c9beacd16c00..4e477ae58699 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
@@ -119,7 +119,6 @@ struct nfs_server { | |||
119 | void (*destroy)(struct nfs_server *); | 119 | void (*destroy)(struct nfs_server *); |
120 | 120 | ||
121 | atomic_t active; /* Keep trace of any activity to this server */ | 121 | atomic_t active; /* Keep trace of any activity to this server */ |
122 | wait_queue_head_t active_wq; /* Wait for any activity to stop */ | ||
123 | 122 | ||
124 | /* mountd-related mount options */ | 123 | /* mountd-related mount options */ |
125 | struct sockaddr_storage mountd_address; | 124 | struct sockaddr_storage mountd_address; |
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index df7c6b7a7ebb..6549a06ac16e 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h | |||
@@ -65,4 +65,8 @@ struct nfs_mount_data { | |||
65 | #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ | 65 | #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ |
66 | #define NFS_MOUNT_FLAGMASK 0xFFFF | 66 | #define NFS_MOUNT_FLAGMASK 0xFFFF |
67 | 67 | ||
68 | /* The following are for internal use only */ | ||
69 | #define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000 | ||
70 | #define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000 | ||
71 | |||
68 | #endif | 72 | #endif |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8c77c11224d1..c1c31acb8a2b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -36,6 +36,7 @@ struct nfs_fattr { | |||
36 | __u32 nlink; | 36 | __u32 nlink; |
37 | __u32 uid; | 37 | __u32 uid; |
38 | __u32 gid; | 38 | __u32 gid; |
39 | dev_t rdev; | ||
39 | __u64 size; | 40 | __u64 size; |
40 | union { | 41 | union { |
41 | struct { | 42 | struct { |
@@ -46,7 +47,6 @@ struct nfs_fattr { | |||
46 | __u64 used; | 47 | __u64 used; |
47 | } nfs3; | 48 | } nfs3; |
48 | } du; | 49 | } du; |
49 | dev_t rdev; | ||
50 | struct nfs_fsid fsid; | 50 | struct nfs_fsid fsid; |
51 | __u64 fileid; | 51 | __u64 fileid; |
52 | struct timespec atime; | 52 | struct timespec atime; |
@@ -56,6 +56,7 @@ struct nfs_fattr { | |||
56 | __u64 change_attr; /* NFSv4 change attribute */ | 56 | __u64 change_attr; /* NFSv4 change attribute */ |
57 | __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ | 57 | __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ |
58 | unsigned long time_start; | 58 | unsigned long time_start; |
59 | unsigned long gencount; | ||
59 | }; | 60 | }; |
60 | 61 | ||
61 | #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ | 62 | #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ |
@@ -672,16 +673,16 @@ struct nfs4_rename_res { | |||
672 | struct nfs_fattr * new_fattr; | 673 | struct nfs_fattr * new_fattr; |
673 | }; | 674 | }; |
674 | 675 | ||
675 | #define NFS4_SETCLIENTID_NAMELEN (56) | 676 | #define NFS4_SETCLIENTID_NAMELEN (127) |
676 | struct nfs4_setclientid { | 677 | struct nfs4_setclientid { |
677 | const nfs4_verifier * sc_verifier; | 678 | const nfs4_verifier * sc_verifier; |
678 | unsigned int sc_name_len; | 679 | unsigned int sc_name_len; |
679 | char sc_name[NFS4_SETCLIENTID_NAMELEN]; | 680 | char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; |
680 | u32 sc_prog; | 681 | u32 sc_prog; |
681 | unsigned int sc_netid_len; | 682 | unsigned int sc_netid_len; |
682 | char sc_netid[RPCBIND_MAXNETIDLEN]; | 683 | char sc_netid[RPCBIND_MAXNETIDLEN + 1]; |
683 | unsigned int sc_uaddr_len; | 684 | unsigned int sc_uaddr_len; |
684 | char sc_uaddr[RPCBIND_MAXUADDRLEN]; | 685 | char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; |
685 | u32 sc_cb_ident; | 686 | u32 sc_cb_ident; |
686 | }; | 687 | }; |
687 | 688 | ||
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 4de56b1d372b..54a379c9e8eb 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h | |||
@@ -66,9 +66,6 @@ | |||
66 | 66 | ||
67 | #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ | 67 | #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ |
68 | 68 | ||
69 | #define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */ | ||
70 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | ||
71 | |||
72 | /* memory registration strategies */ | 69 | /* memory registration strategies */ |
73 | #define RPCRDMA_PERSISTENT_REGISTRATION (1) | 70 | #define RPCRDMA_PERSISTENT_REGISTRATION (1) |
74 | 71 | ||
@@ -78,6 +75,7 @@ enum rpcrdma_memreg { | |||
78 | RPCRDMA_MEMWINDOWS, | 75 | RPCRDMA_MEMWINDOWS, |
79 | RPCRDMA_MEMWINDOWS_ASYNC, | 76 | RPCRDMA_MEMWINDOWS_ASYNC, |
80 | RPCRDMA_MTHCAFMR, | 77 | RPCRDMA_MTHCAFMR, |
78 | RPCRDMA_FRMR, | ||
81 | RPCRDMA_ALLPHYSICAL, | 79 | RPCRDMA_ALLPHYSICAL, |
82 | RPCRDMA_LAST | 80 | RPCRDMA_LAST |
83 | }; | 81 | }; |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index da0789fa1b88..4895c341e46d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru | |||
213 | } | 213 | } |
214 | 214 | ||
215 | /* save the nodename */ | 215 | /* save the nodename */ |
216 | clnt->cl_nodelen = strlen(utsname()->nodename); | 216 | clnt->cl_nodelen = strlen(init_utsname()->nodename); |
217 | if (clnt->cl_nodelen > UNX_MAXNODENAME) | 217 | if (clnt->cl_nodelen > UNX_MAXNODENAME) |
218 | clnt->cl_nodelen = UNX_MAXNODENAME; | 218 | clnt->cl_nodelen = UNX_MAXNODENAME; |
219 | memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); | 219 | memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen); |
220 | rpc_register_client(clnt); | 220 | rpc_register_client(clnt); |
221 | return clnt; | 221 | return clnt; |
222 | 222 | ||
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 34abc91058d8..41013dd66ac3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
@@ -460,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi | |||
460 | return rpc_run_task(&task_setup_data); | 460 | return rpc_run_task(&task_setup_data); |
461 | } | 461 | } |
462 | 462 | ||
463 | /* | ||
464 | * In the case where rpc clients have been cloned, we want to make | ||
465 | * sure that we use the program number/version etc of the actual | ||
466 | * owner of the xprt. To do so, we walk back up the tree of parents | ||
467 | * to find whoever created the transport and/or whoever has the | ||
468 | * autobind flag set. | ||
469 | */ | ||
470 | static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) | ||
471 | { | ||
472 | struct rpc_clnt *parent = clnt->cl_parent; | ||
473 | |||
474 | while (parent != clnt) { | ||
475 | if (parent->cl_xprt != clnt->cl_xprt) | ||
476 | break; | ||
477 | if (clnt->cl_autobind) | ||
478 | break; | ||
479 | clnt = parent; | ||
480 | parent = parent->cl_parent; | ||
481 | } | ||
482 | return clnt; | ||
483 | } | ||
484 | |||
463 | /** | 485 | /** |
464 | * rpcb_getport_async - obtain the port for a given RPC service on a given host | 486 | * rpcb_getport_async - obtain the port for a given RPC service on a given host |
465 | * @task: task that is waiting for portmapper request | 487 | * @task: task that is waiting for portmapper request |
@@ -469,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi | |||
469 | */ | 491 | */ |
470 | void rpcb_getport_async(struct rpc_task *task) | 492 | void rpcb_getport_async(struct rpc_task *task) |
471 | { | 493 | { |
472 | struct rpc_clnt *clnt = task->tk_client; | 494 | struct rpc_clnt *clnt; |
473 | struct rpc_procinfo *proc; | 495 | struct rpc_procinfo *proc; |
474 | u32 bind_version; | 496 | u32 bind_version; |
475 | struct rpc_xprt *xprt = task->tk_xprt; | 497 | struct rpc_xprt *xprt; |
476 | struct rpc_clnt *rpcb_clnt; | 498 | struct rpc_clnt *rpcb_clnt; |
477 | static struct rpcbind_args *map; | 499 | static struct rpcbind_args *map; |
478 | struct rpc_task *child; | 500 | struct rpc_task *child; |
@@ -481,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task) | |||
481 | size_t salen; | 503 | size_t salen; |
482 | int status; | 504 | int status; |
483 | 505 | ||
506 | clnt = rpcb_find_transport_owner(task->tk_client); | ||
507 | xprt = clnt->cl_xprt; | ||
508 | |||
484 | dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", | 509 | dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", |
485 | task->tk_pid, __func__, | 510 | task->tk_pid, __func__, |
486 | clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); | 511 | clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); |
487 | 512 | ||
488 | /* Autobind on cloned rpc clients is discouraged */ | ||
489 | BUG_ON(clnt->cl_parent != clnt); | ||
490 | |||
491 | /* Put self on the wait queue to ensure we get notified if | 513 | /* Put self on the wait queue to ensure we get notified if |
492 | * some other task is already attempting to bind the port */ | 514 | * some other task is already attempting to bind the port */ |
493 | rpc_sleep_on(&xprt->binding, task, NULL); | 515 | rpc_sleep_on(&xprt->binding, task, NULL); |
@@ -549,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task) | |||
549 | status = -ENOMEM; | 571 | status = -ENOMEM; |
550 | dprintk("RPC: %5u %s: no memory available\n", | 572 | dprintk("RPC: %5u %s: no memory available\n", |
551 | task->tk_pid, __func__); | 573 | task->tk_pid, __func__); |
552 | goto bailout_nofree; | 574 | goto bailout_release_client; |
553 | } | 575 | } |
554 | map->r_prog = clnt->cl_prog; | 576 | map->r_prog = clnt->cl_prog; |
555 | map->r_vers = clnt->cl_vers; | 577 | map->r_vers = clnt->cl_vers; |
@@ -569,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task) | |||
569 | task->tk_pid, __func__); | 591 | task->tk_pid, __func__); |
570 | return; | 592 | return; |
571 | } | 593 | } |
572 | rpc_put_task(child); | ||
573 | 594 | ||
574 | task->tk_xprt->stat.bind_count++; | 595 | xprt->stat.bind_count++; |
596 | rpc_put_task(child); | ||
575 | return; | 597 | return; |
576 | 598 | ||
599 | bailout_release_client: | ||
600 | rpc_release_client(rpcb_clnt); | ||
577 | bailout_nofree: | 601 | bailout_nofree: |
578 | rpcb_wake_rpcbind_waiters(xprt, status); | 602 | rpcb_wake_rpcbind_waiters(xprt, status); |
579 | task->tk_status = status; | 603 | task->tk_status = status; |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 99a52aabe332..29e401bb612e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport) | |||
108 | goto out; | 108 | goto out; |
109 | } | 109 | } |
110 | 110 | ||
111 | result = -EINVAL; | 111 | list_add_tail(&transport->list, &xprt_list); |
112 | if (try_module_get(THIS_MODULE)) { | 112 | printk(KERN_INFO "RPC: Registered %s transport module.\n", |
113 | list_add_tail(&transport->list, &xprt_list); | 113 | transport->name); |
114 | printk(KERN_INFO "RPC: Registered %s transport module.\n", | 114 | result = 0; |
115 | transport->name); | ||
116 | result = 0; | ||
117 | } | ||
118 | 115 | ||
119 | out: | 116 | out: |
120 | spin_unlock(&xprt_list_lock); | 117 | spin_unlock(&xprt_list_lock); |
@@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport) | |||
143 | "RPC: Unregistered %s transport module.\n", | 140 | "RPC: Unregistered %s transport module.\n", |
144 | transport->name); | 141 | transport->name); |
145 | list_del_init(&transport->list); | 142 | list_del_init(&transport->list); |
146 | module_put(THIS_MODULE); | ||
147 | goto out; | 143 | goto out; |
148 | } | 144 | } |
149 | } | 145 | } |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 5c1954d28d09..14106d26bb95 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
118 | } | 118 | } |
119 | 119 | ||
120 | if (xdrbuf->tail[0].iov_len) { | 120 | if (xdrbuf->tail[0].iov_len) { |
121 | /* the rpcrdma protocol allows us to omit any trailing | ||
122 | * xdr pad bytes, saving the server an RDMA operation. */ | ||
123 | if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) | ||
124 | return n; | ||
121 | if (n == nsegs) | 125 | if (n == nsegs) |
122 | return 0; | 126 | return 0; |
123 | seg[n].mr_page = NULL; | 127 | seg[n].mr_page = NULL; |
@@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
508 | if (hdrlen == 0) | 512 | if (hdrlen == 0) |
509 | return -1; | 513 | return -1; |
510 | 514 | ||
511 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" | 515 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
512 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 516 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
513 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, | 517 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, |
514 | headerp, base, req->rl_iov.lkey); | 518 | headerp, base, req->rl_iov.lkey); |
515 | 519 | ||
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b | |||
594 | * Scatter inline received data back into provided iov's. | 598 | * Scatter inline received data back into provided iov's. |
595 | */ | 599 | */ |
596 | static void | 600 | static void |
597 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) | 601 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) |
598 | { | 602 | { |
599 | int i, npages, curlen, olen; | 603 | int i, npages, curlen, olen; |
600 | char *destp; | 604 | char *destp; |
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) | |||
660 | } else | 664 | } else |
661 | rqst->rq_rcv_buf.tail[0].iov_len = 0; | 665 | rqst->rq_rcv_buf.tail[0].iov_len = 0; |
662 | 666 | ||
667 | if (pad) { | ||
668 | /* implicit padding on terminal chunk */ | ||
669 | unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base; | ||
670 | while (pad--) | ||
671 | p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0; | ||
672 | } | ||
673 | |||
663 | if (copy_len) | 674 | if (copy_len) |
664 | dprintk("RPC: %s: %d bytes in" | 675 | dprintk("RPC: %s: %d bytes in" |
665 | " %d extra segments (%d lost)\n", | 676 | " %d extra segments (%d lost)\n", |
@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) | |||
681 | struct rpc_xprt *xprt = ep->rep_xprt; | 692 | struct rpc_xprt *xprt = ep->rep_xprt; |
682 | 693 | ||
683 | spin_lock_bh(&xprt->transport_lock); | 694 | spin_lock_bh(&xprt->transport_lock); |
695 | if (++xprt->connect_cookie == 0) /* maintain a reserved value */ | ||
696 | ++xprt->connect_cookie; | ||
684 | if (ep->rep_connected > 0) { | 697 | if (ep->rep_connected > 0) { |
685 | if (!xprt_test_and_set_connected(xprt)) | 698 | if (!xprt_test_and_set_connected(xprt)) |
686 | xprt_wake_pending_tasks(xprt, 0); | 699 | xprt_wake_pending_tasks(xprt, 0); |
687 | } else { | 700 | } else { |
688 | if (xprt_test_and_clear_connected(xprt)) | 701 | if (xprt_test_and_clear_connected(xprt)) |
689 | xprt_wake_pending_tasks(xprt, ep->rep_connected); | 702 | xprt_wake_pending_tasks(xprt, -ENOTCONN); |
690 | } | 703 | } |
691 | spin_unlock_bh(&xprt->transport_lock); | 704 | spin_unlock_bh(&xprt->transport_lock); |
692 | } | 705 | } |
@@ -792,14 +805,20 @@ repost: | |||
792 | ((unsigned char *)iptr - (unsigned char *)headerp); | 805 | ((unsigned char *)iptr - (unsigned char *)headerp); |
793 | status = rep->rr_len + rdmalen; | 806 | status = rep->rr_len + rdmalen; |
794 | r_xprt->rx_stats.total_rdma_reply += rdmalen; | 807 | r_xprt->rx_stats.total_rdma_reply += rdmalen; |
808 | /* special case - last chunk may omit padding */ | ||
809 | if (rdmalen &= 3) { | ||
810 | rdmalen = 4 - rdmalen; | ||
811 | status += rdmalen; | ||
812 | } | ||
795 | } else { | 813 | } else { |
796 | /* else ordinary inline */ | 814 | /* else ordinary inline */ |
815 | rdmalen = 0; | ||
797 | iptr = (__be32 *)((unsigned char *)headerp + 28); | 816 | iptr = (__be32 *)((unsigned char *)headerp + 28); |
798 | rep->rr_len -= 28; /*sizeof *headerp;*/ | 817 | rep->rr_len -= 28; /*sizeof *headerp;*/ |
799 | status = rep->rr_len; | 818 | status = rep->rr_len; |
800 | } | 819 | } |
801 | /* Fix up the rpc results for upper layer */ | 820 | /* Fix up the rpc results for upper layer */ |
802 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); | 821 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); |
803 | break; | 822 | break; |
804 | 823 | ||
805 | case htonl(RDMA_NOMSG): | 824 | case htonl(RDMA_NOMSG): |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a564c1a39ec5..9839c3d94145 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | |||
70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | 70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
72 | static unsigned int xprt_rdma_inline_write_padding; | 72 | static unsigned int xprt_rdma_inline_write_padding; |
73 | #if !RPCRDMA_PERSISTENT_REGISTRATION | 73 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; |
74 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ | 74 | int xprt_rdma_pad_optimize = 0; |
75 | #else | ||
76 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; | ||
77 | #endif | ||
78 | 75 | ||
79 | #ifdef RPC_DEBUG | 76 | #ifdef RPC_DEBUG |
80 | 77 | ||
@@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = { | |||
140 | .extra2 = &max_memreg, | 137 | .extra2 = &max_memreg, |
141 | }, | 138 | }, |
142 | { | 139 | { |
140 | .ctl_name = CTL_UNNUMBERED, | ||
141 | .procname = "rdma_pad_optimize", | ||
142 | .data = &xprt_rdma_pad_optimize, | ||
143 | .maxlen = sizeof(unsigned int), | ||
144 | .mode = 0644, | ||
145 | .proc_handler = &proc_dointvec, | ||
146 | }, | ||
147 | { | ||
143 | .ctl_name = 0, | 148 | .ctl_name = 0, |
144 | }, | 149 | }, |
145 | }; | 150 | }; |
@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt) | |||
458 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 463 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
459 | 464 | ||
460 | dprintk("RPC: %s: closing\n", __func__); | 465 | dprintk("RPC: %s: closing\n", __func__); |
466 | if (r_xprt->rx_ep.rep_connected > 0) | ||
467 | xprt->reestablish_timeout = 0; | ||
461 | xprt_disconnect_done(xprt); | 468 | xprt_disconnect_done(xprt); |
462 | (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); | 469 | (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); |
463 | } | 470 | } |
@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task) | |||
485 | /* Reconnect */ | 492 | /* Reconnect */ |
486 | schedule_delayed_work(&r_xprt->rdma_connect, | 493 | schedule_delayed_work(&r_xprt->rdma_connect, |
487 | xprt->reestablish_timeout); | 494 | xprt->reestablish_timeout); |
495 | xprt->reestablish_timeout <<= 1; | ||
496 | if (xprt->reestablish_timeout > (30 * HZ)) | ||
497 | xprt->reestablish_timeout = (30 * HZ); | ||
498 | else if (xprt->reestablish_timeout < (5 * HZ)) | ||
499 | xprt->reestablish_timeout = (5 * HZ); | ||
488 | } else { | 500 | } else { |
489 | schedule_delayed_work(&r_xprt->rdma_connect, 0); | 501 | schedule_delayed_work(&r_xprt->rdma_connect, 0); |
490 | if (!RPC_IS_ASYNC(task)) | 502 | if (!RPC_IS_ASYNC(task)) |
@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
591 | } | 603 | } |
592 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); | 604 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); |
593 | out: | 605 | out: |
606 | req->rl_connect_cookie = 0; /* our reserved value */ | ||
594 | return req->rl_xdr_buf; | 607 | return req->rl_xdr_buf; |
595 | 608 | ||
596 | outfail: | 609 | outfail: |
@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
694 | req->rl_reply->rr_xprt = xprt; | 707 | req->rl_reply->rr_xprt = xprt; |
695 | } | 708 | } |
696 | 709 | ||
697 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { | 710 | /* Must suppress retransmit to maintain credits */ |
698 | xprt_disconnect_done(xprt); | 711 | if (req->rl_connect_cookie == xprt->connect_cookie) |
699 | return -ENOTCONN; /* implies disconnect */ | 712 | goto drop_connection; |
700 | } | 713 | req->rl_connect_cookie = xprt->connect_cookie; |
714 | |||
715 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | ||
716 | goto drop_connection; | ||
701 | 717 | ||
718 | task->tk_bytes_sent += rqst->rq_snd_buf.len; | ||
702 | rqst->rq_bytes_sent = 0; | 719 | rqst->rq_bytes_sent = 0; |
703 | return 0; | 720 | return 0; |
721 | |||
722 | drop_connection: | ||
723 | xprt_disconnect_done(xprt); | ||
724 | return -ENOTCONN; /* implies disconnect */ | ||
704 | } | 725 | } |
705 | 726 | ||
706 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | 727 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void) | |||
770 | { | 791 | { |
771 | int rc; | 792 | int rc; |
772 | 793 | ||
773 | dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); | 794 | dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); |
774 | #ifdef RPC_DEBUG | 795 | #ifdef RPC_DEBUG |
775 | if (sunrpc_table_header) { | 796 | if (sunrpc_table_header) { |
776 | unregister_sysctl_table(sunrpc_table_header); | 797 | unregister_sysctl_table(sunrpc_table_header); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8ea283ecc522..a5fef5e6c323 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
284 | switch (event->event) { | 284 | switch (event->event) { |
285 | case RDMA_CM_EVENT_ADDR_RESOLVED: | 285 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | 286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
287 | ia->ri_async_rc = 0; | ||
287 | complete(&ia->ri_done); | 288 | complete(&ia->ri_done); |
288 | break; | 289 | break; |
289 | case RDMA_CM_EVENT_ADDR_ERROR: | 290 | case RDMA_CM_EVENT_ADDR_ERROR: |
@@ -338,13 +339,32 @@ connected: | |||
338 | wake_up_all(&ep->rep_connect_wait); | 339 | wake_up_all(&ep->rep_connect_wait); |
339 | break; | 340 | break; |
340 | default: | 341 | default: |
341 | ia->ri_async_rc = -EINVAL; | 342 | dprintk("RPC: %s: unexpected CM event %d\n", |
342 | dprintk("RPC: %s: unexpected CM event %X\n", | ||
343 | __func__, event->event); | 343 | __func__, event->event); |
344 | complete(&ia->ri_done); | ||
345 | break; | 344 | break; |
346 | } | 345 | } |
347 | 346 | ||
347 | #ifdef RPC_DEBUG | ||
348 | if (connstate == 1) { | ||
349 | int ird = attr.max_dest_rd_atomic; | ||
350 | int tird = ep->rep_remote_cma.responder_resources; | ||
351 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
352 | "on %s, memreg %d slots %d ird %d%s\n", | ||
353 | NIPQUAD(addr->sin_addr.s_addr), | ||
354 | ntohs(addr->sin_port), | ||
355 | ia->ri_id->device->name, | ||
356 | ia->ri_memreg_strategy, | ||
357 | xprt->rx_buf.rb_max_requests, | ||
358 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | ||
359 | } else if (connstate < 0) { | ||
360 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
361 | "closed (%d)\n", | ||
362 | NIPQUAD(addr->sin_addr.s_addr), | ||
363 | ntohs(addr->sin_port), | ||
364 | connstate); | ||
365 | } | ||
366 | #endif | ||
367 | |||
348 | return 0; | 368 | return 0; |
349 | } | 369 | } |
350 | 370 | ||
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
355 | struct rdma_cm_id *id; | 375 | struct rdma_cm_id *id; |
356 | int rc; | 376 | int rc; |
357 | 377 | ||
378 | init_completion(&ia->ri_done); | ||
379 | |||
358 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); | 380 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); |
359 | if (IS_ERR(id)) { | 381 | if (IS_ERR(id)) { |
360 | rc = PTR_ERR(id); | 382 | rc = PTR_ERR(id); |
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
363 | return id; | 385 | return id; |
364 | } | 386 | } |
365 | 387 | ||
366 | ia->ri_async_rc = 0; | 388 | ia->ri_async_rc = -ETIMEDOUT; |
367 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); | 389 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); |
368 | if (rc) { | 390 | if (rc) { |
369 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", | 391 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", |
370 | __func__, rc); | 392 | __func__, rc); |
371 | goto out; | 393 | goto out; |
372 | } | 394 | } |
373 | wait_for_completion(&ia->ri_done); | 395 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
396 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
374 | rc = ia->ri_async_rc; | 397 | rc = ia->ri_async_rc; |
375 | if (rc) | 398 | if (rc) |
376 | goto out; | 399 | goto out; |
377 | 400 | ||
378 | ia->ri_async_rc = 0; | 401 | ia->ri_async_rc = -ETIMEDOUT; |
379 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); | 402 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); |
380 | if (rc) { | 403 | if (rc) { |
381 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", | 404 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", |
382 | __func__, rc); | 405 | __func__, rc); |
383 | goto out; | 406 | goto out; |
384 | } | 407 | } |
385 | wait_for_completion(&ia->ri_done); | 408 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
409 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
386 | rc = ia->ri_async_rc; | 410 | rc = ia->ri_async_rc; |
387 | if (rc) | 411 | if (rc) |
388 | goto out; | 412 | goto out; |
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq) | |||
423 | int | 447 | int |
424 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | 448 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) |
425 | { | 449 | { |
426 | int rc; | 450 | int rc, mem_priv; |
451 | struct ib_device_attr devattr; | ||
427 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 452 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
428 | 453 | ||
429 | init_completion(&ia->ri_done); | ||
430 | |||
431 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); | 454 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); |
432 | if (IS_ERR(ia->ri_id)) { | 455 | if (IS_ERR(ia->ri_id)) { |
433 | rc = PTR_ERR(ia->ri_id); | 456 | rc = PTR_ERR(ia->ri_id); |
@@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
443 | } | 466 | } |
444 | 467 | ||
445 | /* | 468 | /* |
469 | * Query the device to determine if the requested memory | ||
470 | * registration strategy is supported. If it isn't, set the | ||
471 | * strategy to a globally supported model. | ||
472 | */ | ||
473 | rc = ib_query_device(ia->ri_id->device, &devattr); | ||
474 | if (rc) { | ||
475 | dprintk("RPC: %s: ib_query_device failed %d\n", | ||
476 | __func__, rc); | ||
477 | goto out2; | ||
478 | } | ||
479 | |||
480 | if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { | ||
481 | ia->ri_have_dma_lkey = 1; | ||
482 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; | ||
483 | } | ||
484 | |||
485 | switch (memreg) { | ||
486 | case RPCRDMA_MEMWINDOWS: | ||
487 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
488 | if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { | ||
489 | dprintk("RPC: %s: MEMWINDOWS registration " | ||
490 | "specified but not supported by adapter, " | ||
491 | "using slower RPCRDMA_REGISTER\n", | ||
492 | __func__); | ||
493 | memreg = RPCRDMA_REGISTER; | ||
494 | } | ||
495 | break; | ||
496 | case RPCRDMA_MTHCAFMR: | ||
497 | if (!ia->ri_id->device->alloc_fmr) { | ||
498 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
499 | dprintk("RPC: %s: MTHCAFMR registration " | ||
500 | "specified but not supported by adapter, " | ||
501 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
502 | __func__); | ||
503 | memreg = RPCRDMA_ALLPHYSICAL; | ||
504 | #else | ||
505 | dprintk("RPC: %s: MTHCAFMR registration " | ||
506 | "specified but not supported by adapter, " | ||
507 | "using slower RPCRDMA_REGISTER\n", | ||
508 | __func__); | ||
509 | memreg = RPCRDMA_REGISTER; | ||
510 | #endif | ||
511 | } | ||
512 | break; | ||
513 | case RPCRDMA_FRMR: | ||
514 | /* Requires both frmr reg and local dma lkey */ | ||
515 | if ((devattr.device_cap_flags & | ||
516 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | ||
517 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
518 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
519 | dprintk("RPC: %s: FRMR registration " | ||
520 | "specified but not supported by adapter, " | ||
521 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
522 | __func__); | ||
523 | memreg = RPCRDMA_ALLPHYSICAL; | ||
524 | #else | ||
525 | dprintk("RPC: %s: FRMR registration " | ||
526 | "specified but not supported by adapter, " | ||
527 | "using slower RPCRDMA_REGISTER\n", | ||
528 | __func__); | ||
529 | memreg = RPCRDMA_REGISTER; | ||
530 | #endif | ||
531 | } | ||
532 | break; | ||
533 | } | ||
534 | |||
535 | /* | ||
446 | * Optionally obtain an underlying physical identity mapping in | 536 | * Optionally obtain an underlying physical identity mapping in |
447 | * order to do a memory window-based bind. This base registration | 537 | * order to do a memory window-based bind. This base registration |
448 | * is protected from remote access - that is enabled only by binding | 538 | * is protected from remote access - that is enabled only by binding |
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
450 | * revoked after the corresponding completion similar to a storage | 540 | * revoked after the corresponding completion similar to a storage |
451 | * adapter. | 541 | * adapter. |
452 | */ | 542 | */ |
453 | if (memreg > RPCRDMA_REGISTER) { | 543 | switch (memreg) { |
454 | int mem_priv = IB_ACCESS_LOCAL_WRITE; | 544 | case RPCRDMA_BOUNCEBUFFERS: |
455 | switch (memreg) { | 545 | case RPCRDMA_REGISTER: |
546 | case RPCRDMA_FRMR: | ||
547 | break; | ||
456 | #if RPCRDMA_PERSISTENT_REGISTRATION | 548 | #if RPCRDMA_PERSISTENT_REGISTRATION |
457 | case RPCRDMA_ALLPHYSICAL: | 549 | case RPCRDMA_ALLPHYSICAL: |
458 | mem_priv |= IB_ACCESS_REMOTE_WRITE; | 550 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
459 | mem_priv |= IB_ACCESS_REMOTE_READ; | 551 | IB_ACCESS_REMOTE_WRITE | |
460 | break; | 552 | IB_ACCESS_REMOTE_READ; |
553 | goto register_setup; | ||
461 | #endif | 554 | #endif |
462 | case RPCRDMA_MEMWINDOWS_ASYNC: | 555 | case RPCRDMA_MEMWINDOWS_ASYNC: |
463 | case RPCRDMA_MEMWINDOWS: | 556 | case RPCRDMA_MEMWINDOWS: |
464 | mem_priv |= IB_ACCESS_MW_BIND; | 557 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
465 | break; | 558 | IB_ACCESS_MW_BIND; |
466 | default: | 559 | goto register_setup; |
560 | case RPCRDMA_MTHCAFMR: | ||
561 | if (ia->ri_have_dma_lkey) | ||
467 | break; | 562 | break; |
468 | } | 563 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
564 | register_setup: | ||
469 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); | 565 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); |
470 | if (IS_ERR(ia->ri_bind_mem)) { | 566 | if (IS_ERR(ia->ri_bind_mem)) { |
471 | printk(KERN_ALERT "%s: ib_get_dma_mr for " | 567 | printk(KERN_ALERT "%s: ib_get_dma_mr for " |
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
475 | memreg = RPCRDMA_REGISTER; | 571 | memreg = RPCRDMA_REGISTER; |
476 | ia->ri_bind_mem = NULL; | 572 | ia->ri_bind_mem = NULL; |
477 | } | 573 | } |
574 | break; | ||
575 | default: | ||
576 | printk(KERN_ERR "%s: invalid memory registration mode %d\n", | ||
577 | __func__, memreg); | ||
578 | rc = -EINVAL; | ||
579 | goto out2; | ||
478 | } | 580 | } |
581 | dprintk("RPC: %s: memory registration strategy is %d\n", | ||
582 | __func__, memreg); | ||
479 | 583 | ||
480 | /* Else will do memory reg/dereg for each chunk */ | 584 | /* Else will do memory reg/dereg for each chunk */ |
481 | ia->ri_memreg_strategy = memreg; | 585 | ia->ri_memreg_strategy = memreg; |
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
483 | return 0; | 587 | return 0; |
484 | out2: | 588 | out2: |
485 | rdma_destroy_id(ia->ri_id); | 589 | rdma_destroy_id(ia->ri_id); |
590 | ia->ri_id = NULL; | ||
486 | out1: | 591 | out1: |
487 | return rc; | 592 | return rc; |
488 | } | 593 | } |
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) | |||
503 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", | 608 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", |
504 | __func__, rc); | 609 | __func__, rc); |
505 | } | 610 | } |
506 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) | 611 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
507 | rdma_destroy_qp(ia->ri_id); | 612 | if (ia->ri_id->qp) |
613 | rdma_destroy_qp(ia->ri_id); | ||
614 | rdma_destroy_id(ia->ri_id); | ||
615 | ia->ri_id = NULL; | ||
616 | } | ||
508 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { | 617 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { |
509 | rc = ib_dealloc_pd(ia->ri_pd); | 618 | rc = ib_dealloc_pd(ia->ri_pd); |
510 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", | 619 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", |
511 | __func__, rc); | 620 | __func__, rc); |
512 | } | 621 | } |
513 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) | ||
514 | rdma_destroy_id(ia->ri_id); | ||
515 | } | 622 | } |
516 | 623 | ||
517 | /* | 624 | /* |
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
541 | ep->rep_attr.srq = NULL; | 648 | ep->rep_attr.srq = NULL; |
542 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 649 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
543 | switch (ia->ri_memreg_strategy) { | 650 | switch (ia->ri_memreg_strategy) { |
651 | case RPCRDMA_FRMR: | ||
652 | /* Add room for frmr register and invalidate WRs */ | ||
653 | ep->rep_attr.cap.max_send_wr *= 3; | ||
654 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
655 | return -EINVAL; | ||
656 | break; | ||
544 | case RPCRDMA_MEMWINDOWS_ASYNC: | 657 | case RPCRDMA_MEMWINDOWS_ASYNC: |
545 | case RPCRDMA_MEMWINDOWS: | 658 | case RPCRDMA_MEMWINDOWS: |
546 | /* Add room for mw_binds+unbinds - overkill! */ | 659 | /* Add room for mw_binds+unbinds - overkill! */ |
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
617 | ep->rep_remote_cma.private_data_len = 0; | 730 | ep->rep_remote_cma.private_data_len = 0; |
618 | 731 | ||
619 | /* Client offers RDMA Read but does not initiate */ | 732 | /* Client offers RDMA Read but does not initiate */ |
620 | switch (ia->ri_memreg_strategy) { | 733 | ep->rep_remote_cma.initiator_depth = 0; |
621 | case RPCRDMA_BOUNCEBUFFERS: | 734 | if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) |
622 | ep->rep_remote_cma.responder_resources = 0; | 735 | ep->rep_remote_cma.responder_resources = 0; |
623 | break; | 736 | else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ |
624 | case RPCRDMA_MTHCAFMR: | 737 | ep->rep_remote_cma.responder_resources = 32; |
625 | case RPCRDMA_REGISTER: | 738 | else |
626 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
627 | (RPCRDMA_MAX_DATA_SEGS / 8); | ||
628 | break; | ||
629 | case RPCRDMA_MEMWINDOWS: | ||
630 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
631 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
632 | case RPCRDMA_ALLPHYSICAL: | ||
633 | #endif | ||
634 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
635 | (RPCRDMA_MAX_DATA_SEGS / 2); | ||
636 | break; | ||
637 | default: | ||
638 | break; | ||
639 | } | ||
640 | if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) | ||
641 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; | 739 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; |
642 | ep->rep_remote_cma.initiator_depth = 0; | ||
643 | 740 | ||
644 | ep->rep_remote_cma.retry_count = 7; | 741 | ep->rep_remote_cma.retry_count = 7; |
645 | ep->rep_remote_cma.flow_control = 0; | 742 | ep->rep_remote_cma.flow_control = 0; |
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
679 | if (rc) | 776 | if (rc) |
680 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | 777 | dprintk("RPC: %s: rpcrdma_ep_disconnect" |
681 | " returned %i\n", __func__, rc); | 778 | " returned %i\n", __func__, rc); |
779 | rdma_destroy_qp(ia->ri_id); | ||
780 | ia->ri_id->qp = NULL; | ||
682 | } | 781 | } |
683 | 782 | ||
684 | ep->rep_func = NULL; | ||
685 | |||
686 | /* padding - could be done in rpcrdma_buffer_destroy... */ | 783 | /* padding - could be done in rpcrdma_buffer_destroy... */ |
687 | if (ep->rep_pad_mr) { | 784 | if (ep->rep_pad_mr) { |
688 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); | 785 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); |
689 | ep->rep_pad_mr = NULL; | 786 | ep->rep_pad_mr = NULL; |
690 | } | 787 | } |
691 | 788 | ||
692 | if (ia->ri_id->qp) { | ||
693 | rdma_destroy_qp(ia->ri_id); | ||
694 | ia->ri_id->qp = NULL; | ||
695 | } | ||
696 | |||
697 | rpcrdma_clean_cq(ep->rep_cq); | 789 | rpcrdma_clean_cq(ep->rep_cq); |
698 | rc = ib_destroy_cq(ep->rep_cq); | 790 | rc = ib_destroy_cq(ep->rep_cq); |
699 | if (rc) | 791 | if (rc) |
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
712 | struct rdma_cm_id *id; | 804 | struct rdma_cm_id *id; |
713 | int rc = 0; | 805 | int rc = 0; |
714 | int retry_count = 0; | 806 | int retry_count = 0; |
715 | int reconnect = (ep->rep_connected != 0); | ||
716 | 807 | ||
717 | if (reconnect) { | 808 | if (ep->rep_connected != 0) { |
718 | struct rpcrdma_xprt *xprt; | 809 | struct rpcrdma_xprt *xprt; |
719 | retry: | 810 | retry: |
720 | rc = rpcrdma_ep_disconnect(ep, ia); | 811 | rc = rpcrdma_ep_disconnect(ep, ia); |
@@ -745,6 +836,7 @@ retry: | |||
745 | goto out; | 836 | goto out; |
746 | } | 837 | } |
747 | /* END TEMP */ | 838 | /* END TEMP */ |
839 | rdma_destroy_qp(ia->ri_id); | ||
748 | rdma_destroy_id(ia->ri_id); | 840 | rdma_destroy_id(ia->ri_id); |
749 | ia->ri_id = id; | 841 | ia->ri_id = id; |
750 | } | 842 | } |
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
769 | } | 861 | } |
770 | } | 862 | } |
771 | 863 | ||
772 | /* Theoretically a client initiator_depth > 0 is not needed, | ||
773 | * but many peers fail to complete the connection unless they | ||
774 | * == responder_resources! */ | ||
775 | if (ep->rep_remote_cma.initiator_depth != | ||
776 | ep->rep_remote_cma.responder_resources) | ||
777 | ep->rep_remote_cma.initiator_depth = | ||
778 | ep->rep_remote_cma.responder_resources; | ||
779 | |||
780 | ep->rep_connected = 0; | 864 | ep->rep_connected = 0; |
781 | 865 | ||
782 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | 866 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
786 | goto out; | 870 | goto out; |
787 | } | 871 | } |
788 | 872 | ||
789 | if (reconnect) | ||
790 | return 0; | ||
791 | |||
792 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | 873 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
793 | 874 | ||
794 | /* | 875 | /* |
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
805 | if (ep->rep_connected <= 0) { | 886 | if (ep->rep_connected <= 0) { |
806 | /* Sometimes, the only way to reliably connect to remote | 887 | /* Sometimes, the only way to reliably connect to remote |
807 | * CMs is to use same nonzero values for ORD and IRD. */ | 888 | * CMs is to use same nonzero values for ORD and IRD. */ |
808 | ep->rep_remote_cma.initiator_depth = | 889 | if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && |
809 | ep->rep_remote_cma.responder_resources; | 890 | (ep->rep_remote_cma.responder_resources == 0 || |
810 | if (ep->rep_remote_cma.initiator_depth == 0) | 891 | ep->rep_remote_cma.initiator_depth != |
811 | ++ep->rep_remote_cma.initiator_depth; | 892 | ep->rep_remote_cma.responder_resources)) { |
812 | if (ep->rep_remote_cma.responder_resources == 0) | 893 | if (ep->rep_remote_cma.responder_resources == 0) |
813 | ++ep->rep_remote_cma.responder_resources; | 894 | ep->rep_remote_cma.responder_resources = 1; |
814 | if (retry_count++ == 0) | 895 | ep->rep_remote_cma.initiator_depth = |
896 | ep->rep_remote_cma.responder_resources; | ||
815 | goto retry; | 897 | goto retry; |
898 | } | ||
816 | rc = ep->rep_connected; | 899 | rc = ep->rep_connected; |
817 | } else { | 900 | } else { |
818 | dprintk("RPC: %s: connected\n", __func__); | 901 | dprintk("RPC: %s: connected\n", __func__); |
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
863 | char *p; | 946 | char *p; |
864 | size_t len; | 947 | size_t len; |
865 | int i, rc; | 948 | int i, rc; |
949 | struct rpcrdma_mw *r; | ||
866 | 950 | ||
867 | buf->rb_max_requests = cdata->max_requests; | 951 | buf->rb_max_requests = cdata->max_requests; |
868 | spin_lock_init(&buf->rb_lock); | 952 | spin_lock_init(&buf->rb_lock); |
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
873 | * 2. arrays of struct rpcrdma_req to fill in pointers | 957 | * 2. arrays of struct rpcrdma_req to fill in pointers |
874 | * 3. array of struct rpcrdma_rep for replies | 958 | * 3. array of struct rpcrdma_rep for replies |
875 | * 4. padding, if any | 959 | * 4. padding, if any |
876 | * 5. mw's, if any | 960 | * 5. mw's, fmr's or frmr's, if any |
877 | * Send/recv buffers in req/rep need to be registered | 961 | * Send/recv buffers in req/rep need to be registered |
878 | */ | 962 | */ |
879 | 963 | ||
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
881 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); | 965 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); |
882 | len += cdata->padding; | 966 | len += cdata->padding; |
883 | switch (ia->ri_memreg_strategy) { | 967 | switch (ia->ri_memreg_strategy) { |
968 | case RPCRDMA_FRMR: | ||
969 | len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * | ||
970 | sizeof(struct rpcrdma_mw); | ||
971 | break; | ||
884 | case RPCRDMA_MTHCAFMR: | 972 | case RPCRDMA_MTHCAFMR: |
885 | /* TBD we are perhaps overallocating here */ | 973 | /* TBD we are perhaps overallocating here */ |
886 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | 974 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * |
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
927 | * and also reduce unbind-to-bind collision. | 1015 | * and also reduce unbind-to-bind collision. |
928 | */ | 1016 | */ |
929 | INIT_LIST_HEAD(&buf->rb_mws); | 1017 | INIT_LIST_HEAD(&buf->rb_mws); |
1018 | r = (struct rpcrdma_mw *)p; | ||
930 | switch (ia->ri_memreg_strategy) { | 1019 | switch (ia->ri_memreg_strategy) { |
1020 | case RPCRDMA_FRMR: | ||
1021 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | ||
1022 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1023 | RPCRDMA_MAX_SEGS); | ||
1024 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1025 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1026 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1027 | " failed %i\n", __func__, rc); | ||
1028 | goto out; | ||
1029 | } | ||
1030 | r->r.frmr.fr_pgl = | ||
1031 | ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1032 | RPCRDMA_MAX_SEGS); | ||
1033 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1034 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1035 | dprintk("RPC: %s: " | ||
1036 | "ib_alloc_fast_reg_page_list " | ||
1037 | "failed %i\n", __func__, rc); | ||
1038 | goto out; | ||
1039 | } | ||
1040 | list_add(&r->mw_list, &buf->rb_mws); | ||
1041 | ++r; | ||
1042 | } | ||
1043 | break; | ||
931 | case RPCRDMA_MTHCAFMR: | 1044 | case RPCRDMA_MTHCAFMR: |
932 | { | ||
933 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
934 | struct ib_fmr_attr fa = { | ||
935 | RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT | ||
936 | }; | ||
937 | /* TBD we are perhaps overallocating here */ | 1045 | /* TBD we are perhaps overallocating here */ |
938 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1046 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
1047 | static struct ib_fmr_attr fa = | ||
1048 | { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; | ||
939 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, | 1049 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, |
940 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, | 1050 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, |
941 | &fa); | 1051 | &fa); |
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
948 | list_add(&r->mw_list, &buf->rb_mws); | 1058 | list_add(&r->mw_list, &buf->rb_mws); |
949 | ++r; | 1059 | ++r; |
950 | } | 1060 | } |
951 | } | ||
952 | break; | 1061 | break; |
953 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1062 | case RPCRDMA_MEMWINDOWS_ASYNC: |
954 | case RPCRDMA_MEMWINDOWS: | 1063 | case RPCRDMA_MEMWINDOWS: |
955 | { | ||
956 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
957 | /* Allocate one extra request's worth, for full cycling */ | 1064 | /* Allocate one extra request's worth, for full cycling */ |
958 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1065 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
959 | r->r.mw = ib_alloc_mw(ia->ri_pd); | 1066 | r->r.mw = ib_alloc_mw(ia->ri_pd); |
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
966 | list_add(&r->mw_list, &buf->rb_mws); | 1073 | list_add(&r->mw_list, &buf->rb_mws); |
967 | ++r; | 1074 | ++r; |
968 | } | 1075 | } |
969 | } | ||
970 | break; | 1076 | break; |
971 | default: | 1077 | default: |
972 | break; | 1078 | break; |
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1046 | { | 1152 | { |
1047 | int rc, i; | 1153 | int rc, i; |
1048 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | 1154 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
1155 | struct rpcrdma_mw *r; | ||
1049 | 1156 | ||
1050 | /* clean up in reverse order from create | 1157 | /* clean up in reverse order from create |
1051 | * 1. recv mr memory (mr free, then kfree) | 1158 | * 1. recv mr memory (mr free, then kfree) |
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1065 | } | 1172 | } |
1066 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { | 1173 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { |
1067 | while (!list_empty(&buf->rb_mws)) { | 1174 | while (!list_empty(&buf->rb_mws)) { |
1068 | struct rpcrdma_mw *r; | ||
1069 | r = list_entry(buf->rb_mws.next, | 1175 | r = list_entry(buf->rb_mws.next, |
1070 | struct rpcrdma_mw, mw_list); | 1176 | struct rpcrdma_mw, mw_list); |
1071 | list_del(&r->mw_list); | 1177 | list_del(&r->mw_list); |
1072 | switch (ia->ri_memreg_strategy) { | 1178 | switch (ia->ri_memreg_strategy) { |
1179 | case RPCRDMA_FRMR: | ||
1180 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1181 | if (rc) | ||
1182 | dprintk("RPC: %s:" | ||
1183 | " ib_dereg_mr" | ||
1184 | " failed %i\n", | ||
1185 | __func__, rc); | ||
1186 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1187 | break; | ||
1073 | case RPCRDMA_MTHCAFMR: | 1188 | case RPCRDMA_MTHCAFMR: |
1074 | rc = ib_dealloc_fmr(r->r.fmr); | 1189 | rc = ib_dealloc_fmr(r->r.fmr); |
1075 | if (rc) | 1190 | if (rc) |
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1115 | { | 1230 | { |
1116 | struct rpcrdma_req *req; | 1231 | struct rpcrdma_req *req; |
1117 | unsigned long flags; | 1232 | unsigned long flags; |
1233 | int i; | ||
1234 | struct rpcrdma_mw *r; | ||
1118 | 1235 | ||
1119 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1236 | spin_lock_irqsave(&buffers->rb_lock, flags); |
1120 | if (buffers->rb_send_index == buffers->rb_max_requests) { | 1237 | if (buffers->rb_send_index == buffers->rb_max_requests) { |
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1135 | } | 1252 | } |
1136 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; | 1253 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; |
1137 | if (!list_empty(&buffers->rb_mws)) { | 1254 | if (!list_empty(&buffers->rb_mws)) { |
1138 | int i = RPCRDMA_MAX_SEGS - 1; | 1255 | i = RPCRDMA_MAX_SEGS - 1; |
1139 | do { | 1256 | do { |
1140 | struct rpcrdma_mw *r; | ||
1141 | r = list_entry(buffers->rb_mws.next, | 1257 | r = list_entry(buffers->rb_mws.next, |
1142 | struct rpcrdma_mw, mw_list); | 1258 | struct rpcrdma_mw, mw_list); |
1143 | list_del(&r->mw_list); | 1259 | list_del(&r->mw_list); |
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) | |||
1171 | req->rl_reply = NULL; | 1287 | req->rl_reply = NULL; |
1172 | } | 1288 | } |
1173 | switch (ia->ri_memreg_strategy) { | 1289 | switch (ia->ri_memreg_strategy) { |
1290 | case RPCRDMA_FRMR: | ||
1174 | case RPCRDMA_MTHCAFMR: | 1291 | case RPCRDMA_MTHCAFMR: |
1175 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1292 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1176 | case RPCRDMA_MEMWINDOWS: | 1293 | case RPCRDMA_MEMWINDOWS: |
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | |||
1252 | va, len, DMA_BIDIRECTIONAL); | 1369 | va, len, DMA_BIDIRECTIONAL); |
1253 | iov->length = len; | 1370 | iov->length = len; |
1254 | 1371 | ||
1255 | if (ia->ri_bind_mem != NULL) { | 1372 | if (ia->ri_have_dma_lkey) { |
1373 | *mrp = NULL; | ||
1374 | iov->lkey = ia->ri_dma_lkey; | ||
1375 | return 0; | ||
1376 | } else if (ia->ri_bind_mem != NULL) { | ||
1256 | *mrp = NULL; | 1377 | *mrp = NULL; |
1257 | iov->lkey = ia->ri_bind_mem->lkey; | 1378 | iov->lkey = ia->ri_bind_mem->lkey; |
1258 | return 0; | 1379 | return 0; |
@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | |||
1329 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | 1450 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); |
1330 | } | 1451 | } |
1331 | 1452 | ||
1453 | static int | ||
1454 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1455 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1456 | struct rpcrdma_xprt *r_xprt) | ||
1457 | { | ||
1458 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1459 | struct ib_send_wr frmr_wr, *bad_wr; | ||
1460 | u8 key; | ||
1461 | int len, pageoff; | ||
1462 | int i, rc; | ||
1463 | |||
1464 | pageoff = offset_in_page(seg1->mr_offset); | ||
1465 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1466 | seg1->mr_len += pageoff; | ||
1467 | len = -pageoff; | ||
1468 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1469 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1470 | for (i = 0; i < *nsegs;) { | ||
1471 | rpcrdma_map_one(ia, seg, writing); | ||
1472 | seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; | ||
1473 | len += seg->mr_len; | ||
1474 | ++seg; | ||
1475 | ++i; | ||
1476 | /* Check for holes */ | ||
1477 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1478 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1479 | break; | ||
1480 | } | ||
1481 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
1482 | __func__, seg1->mr_chunk.rl_mw, i); | ||
1483 | |||
1484 | /* Bump the key */ | ||
1485 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
1486 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
1487 | |||
1488 | /* Prepare FRMR WR */ | ||
1489 | memset(&frmr_wr, 0, sizeof frmr_wr); | ||
1490 | frmr_wr.opcode = IB_WR_FAST_REG_MR; | ||
1491 | frmr_wr.send_flags = 0; /* unsignaled */ | ||
1492 | frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; | ||
1493 | frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; | ||
1494 | frmr_wr.wr.fast_reg.page_list_len = i; | ||
1495 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1496 | frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; | ||
1497 | frmr_wr.wr.fast_reg.access_flags = (writing ? | ||
1498 | IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); | ||
1499 | frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1500 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1501 | |||
1502 | rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); | ||
1503 | |||
1504 | if (rc) { | ||
1505 | dprintk("RPC: %s: failed ib_post_send for register," | ||
1506 | " status %i\n", __func__, rc); | ||
1507 | while (i--) | ||
1508 | rpcrdma_unmap_one(ia, --seg); | ||
1509 | } else { | ||
1510 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1511 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1512 | seg1->mr_nsegs = i; | ||
1513 | seg1->mr_len = len; | ||
1514 | } | ||
1515 | *nsegs = i; | ||
1516 | return rc; | ||
1517 | } | ||
1518 | |||
1519 | static int | ||
1520 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1521 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
1522 | { | ||
1523 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1524 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1525 | int rc; | ||
1526 | |||
1527 | while (seg1->mr_nsegs--) | ||
1528 | rpcrdma_unmap_one(ia, seg++); | ||
1529 | |||
1530 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
1531 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1532 | invalidate_wr.send_flags = 0; /* unsignaled */ | ||
1533 | invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1534 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1535 | |||
1536 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1537 | if (rc) | ||
1538 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
1539 | " status %i\n", __func__, rc); | ||
1540 | return rc; | ||
1541 | } | ||
1542 | |||
1543 | static int | ||
1544 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
1545 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
1546 | { | ||
1547 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1548 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
1549 | int len, pageoff, i, rc; | ||
1550 | |||
1551 | pageoff = offset_in_page(seg1->mr_offset); | ||
1552 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1553 | seg1->mr_len += pageoff; | ||
1554 | len = -pageoff; | ||
1555 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1556 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1557 | for (i = 0; i < *nsegs;) { | ||
1558 | rpcrdma_map_one(ia, seg, writing); | ||
1559 | physaddrs[i] = seg->mr_dma; | ||
1560 | len += seg->mr_len; | ||
1561 | ++seg; | ||
1562 | ++i; | ||
1563 | /* Check for holes */ | ||
1564 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1565 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1566 | break; | ||
1567 | } | ||
1568 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
1569 | physaddrs, i, seg1->mr_dma); | ||
1570 | if (rc) { | ||
1571 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
1572 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
1573 | len, (unsigned long long)seg1->mr_dma, | ||
1574 | pageoff, i, rc); | ||
1575 | while (i--) | ||
1576 | rpcrdma_unmap_one(ia, --seg); | ||
1577 | } else { | ||
1578 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
1579 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1580 | seg1->mr_nsegs = i; | ||
1581 | seg1->mr_len = len; | ||
1582 | } | ||
1583 | *nsegs = i; | ||
1584 | return rc; | ||
1585 | } | ||
1586 | |||
1587 | static int | ||
1588 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
1589 | struct rpcrdma_ia *ia) | ||
1590 | { | ||
1591 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1592 | LIST_HEAD(l); | ||
1593 | int rc; | ||
1594 | |||
1595 | list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); | ||
1596 | rc = ib_unmap_fmr(&l); | ||
1597 | while (seg1->mr_nsegs--) | ||
1598 | rpcrdma_unmap_one(ia, seg++); | ||
1599 | if (rc) | ||
1600 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
1601 | " status %i\n", __func__, rc); | ||
1602 | return rc; | ||
1603 | } | ||
1604 | |||
1605 | static int | ||
1606 | rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1607 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1608 | struct rpcrdma_xprt *r_xprt) | ||
1609 | { | ||
1610 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1611 | IB_ACCESS_REMOTE_READ); | ||
1612 | struct ib_mw_bind param; | ||
1613 | int rc; | ||
1614 | |||
1615 | *nsegs = 1; | ||
1616 | rpcrdma_map_one(ia, seg, writing); | ||
1617 | param.mr = ia->ri_bind_mem; | ||
1618 | param.wr_id = 0ULL; /* no send cookie */ | ||
1619 | param.addr = seg->mr_dma; | ||
1620 | param.length = seg->mr_len; | ||
1621 | param.send_flags = 0; | ||
1622 | param.mw_access_flags = mem_priv; | ||
1623 | |||
1624 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1625 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1626 | if (rc) { | ||
1627 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1628 | "%u@0x%llx status %i\n", | ||
1629 | __func__, seg->mr_len, | ||
1630 | (unsigned long long)seg->mr_dma, rc); | ||
1631 | rpcrdma_unmap_one(ia, seg); | ||
1632 | } else { | ||
1633 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1634 | seg->mr_base = param.addr; | ||
1635 | seg->mr_nsegs = 1; | ||
1636 | } | ||
1637 | return rc; | ||
1638 | } | ||
1639 | |||
1640 | static int | ||
1641 | rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1642 | struct rpcrdma_ia *ia, | ||
1643 | struct rpcrdma_xprt *r_xprt, void **r) | ||
1644 | { | ||
1645 | struct ib_mw_bind param; | ||
1646 | LIST_HEAD(l); | ||
1647 | int rc; | ||
1648 | |||
1649 | BUG_ON(seg->mr_nsegs != 1); | ||
1650 | param.mr = ia->ri_bind_mem; | ||
1651 | param.addr = 0ULL; /* unbind */ | ||
1652 | param.length = 0; | ||
1653 | param.mw_access_flags = 0; | ||
1654 | if (*r) { | ||
1655 | param.wr_id = (u64) (unsigned long) *r; | ||
1656 | param.send_flags = IB_SEND_SIGNALED; | ||
1657 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1658 | } else { | ||
1659 | param.wr_id = 0ULL; | ||
1660 | param.send_flags = 0; | ||
1661 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1662 | } | ||
1663 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1664 | rpcrdma_unmap_one(ia, seg); | ||
1665 | if (rc) | ||
1666 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1667 | " status %i\n", __func__, rc); | ||
1668 | else | ||
1669 | *r = NULL; /* will upcall on completion */ | ||
1670 | return rc; | ||
1671 | } | ||
1672 | |||
1673 | static int | ||
1674 | rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, | ||
1675 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
1676 | { | ||
1677 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1678 | IB_ACCESS_REMOTE_READ); | ||
1679 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1680 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1681 | int len, i, rc = 0; | ||
1682 | |||
1683 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1684 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1685 | for (len = 0, i = 0; i < *nsegs;) { | ||
1686 | rpcrdma_map_one(ia, seg, writing); | ||
1687 | ipb[i].addr = seg->mr_dma; | ||
1688 | ipb[i].size = seg->mr_len; | ||
1689 | len += seg->mr_len; | ||
1690 | ++seg; | ||
1691 | ++i; | ||
1692 | /* Check for holes */ | ||
1693 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1694 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1695 | break; | ||
1696 | } | ||
1697 | seg1->mr_base = seg1->mr_dma; | ||
1698 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1699 | ipb, i, mem_priv, &seg1->mr_base); | ||
1700 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1701 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1702 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1703 | "%u@0x%llx (%d)... status %i\n", | ||
1704 | __func__, len, | ||
1705 | (unsigned long long)seg1->mr_dma, i, rc); | ||
1706 | while (i--) | ||
1707 | rpcrdma_unmap_one(ia, --seg); | ||
1708 | } else { | ||
1709 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1710 | seg1->mr_nsegs = i; | ||
1711 | seg1->mr_len = len; | ||
1712 | } | ||
1713 | *nsegs = i; | ||
1714 | return rc; | ||
1715 | } | ||
1716 | |||
1717 | static int | ||
1718 | rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, | ||
1719 | struct rpcrdma_ia *ia) | ||
1720 | { | ||
1721 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1722 | int rc; | ||
1723 | |||
1724 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | ||
1725 | seg1->mr_chunk.rl_mr = NULL; | ||
1726 | while (seg1->mr_nsegs--) | ||
1727 | rpcrdma_unmap_one(ia, seg++); | ||
1728 | if (rc) | ||
1729 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1730 | " status %i\n", __func__, rc); | ||
1731 | return rc; | ||
1732 | } | ||
1733 | |||
1332 | int | 1734 | int |
1333 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | 1735 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, |
1334 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | 1736 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) |
1335 | { | 1737 | { |
1336 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1738 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1337 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1338 | IB_ACCESS_REMOTE_READ); | ||
1339 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1340 | int i; | ||
1341 | int rc = 0; | 1739 | int rc = 0; |
1342 | 1740 | ||
1343 | switch (ia->ri_memreg_strategy) { | 1741 | switch (ia->ri_memreg_strategy) { |
@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
1352 | break; | 1750 | break; |
1353 | #endif | 1751 | #endif |
1354 | 1752 | ||
1355 | /* Registration using fast memory registration */ | 1753 | /* Registration using frmr registration */ |
1754 | case RPCRDMA_FRMR: | ||
1755 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
1756 | break; | ||
1757 | |||
1758 | /* Registration using fmr memory registration */ | ||
1356 | case RPCRDMA_MTHCAFMR: | 1759 | case RPCRDMA_MTHCAFMR: |
1357 | { | 1760 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); |
1358 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
1359 | int len, pageoff = offset_in_page(seg->mr_offset); | ||
1360 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1361 | seg1->mr_len += pageoff; | ||
1362 | len = -pageoff; | ||
1363 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1364 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1365 | for (i = 0; i < nsegs;) { | ||
1366 | rpcrdma_map_one(ia, seg, writing); | ||
1367 | physaddrs[i] = seg->mr_dma; | ||
1368 | len += seg->mr_len; | ||
1369 | ++seg; | ||
1370 | ++i; | ||
1371 | /* Check for holes */ | ||
1372 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1373 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1374 | break; | ||
1375 | } | ||
1376 | nsegs = i; | ||
1377 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
1378 | physaddrs, nsegs, seg1->mr_dma); | ||
1379 | if (rc) { | ||
1380 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
1381 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
1382 | len, (unsigned long long)seg1->mr_dma, | ||
1383 | pageoff, nsegs, rc); | ||
1384 | while (nsegs--) | ||
1385 | rpcrdma_unmap_one(ia, --seg); | ||
1386 | } else { | ||
1387 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
1388 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1389 | seg1->mr_nsegs = nsegs; | ||
1390 | seg1->mr_len = len; | ||
1391 | } | ||
1392 | } | ||
1393 | break; | 1761 | break; |
1394 | 1762 | ||
1395 | /* Registration using memory windows */ | 1763 | /* Registration using memory windows */ |
1396 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1764 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1397 | case RPCRDMA_MEMWINDOWS: | 1765 | case RPCRDMA_MEMWINDOWS: |
1398 | { | 1766 | rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); |
1399 | struct ib_mw_bind param; | ||
1400 | rpcrdma_map_one(ia, seg, writing); | ||
1401 | param.mr = ia->ri_bind_mem; | ||
1402 | param.wr_id = 0ULL; /* no send cookie */ | ||
1403 | param.addr = seg->mr_dma; | ||
1404 | param.length = seg->mr_len; | ||
1405 | param.send_flags = 0; | ||
1406 | param.mw_access_flags = mem_priv; | ||
1407 | |||
1408 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1409 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1410 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1411 | if (rc) { | ||
1412 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1413 | "%u@0x%llx status %i\n", | ||
1414 | __func__, seg->mr_len, | ||
1415 | (unsigned long long)seg->mr_dma, rc); | ||
1416 | rpcrdma_unmap_one(ia, seg); | ||
1417 | } else { | ||
1418 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1419 | seg->mr_base = param.addr; | ||
1420 | seg->mr_nsegs = 1; | ||
1421 | nsegs = 1; | ||
1422 | } | ||
1423 | } | ||
1424 | break; | 1767 | break; |
1425 | 1768 | ||
1426 | /* Default registration each time */ | 1769 | /* Default registration each time */ |
1427 | default: | 1770 | default: |
1428 | { | 1771 | rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); |
1429 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1430 | int len = 0; | ||
1431 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1432 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1433 | for (i = 0; i < nsegs;) { | ||
1434 | rpcrdma_map_one(ia, seg, writing); | ||
1435 | ipb[i].addr = seg->mr_dma; | ||
1436 | ipb[i].size = seg->mr_len; | ||
1437 | len += seg->mr_len; | ||
1438 | ++seg; | ||
1439 | ++i; | ||
1440 | /* Check for holes */ | ||
1441 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1442 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1443 | break; | ||
1444 | } | ||
1445 | nsegs = i; | ||
1446 | seg1->mr_base = seg1->mr_dma; | ||
1447 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1448 | ipb, nsegs, mem_priv, &seg1->mr_base); | ||
1449 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1450 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1451 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1452 | "%u@0x%llx (%d)... status %i\n", | ||
1453 | __func__, len, | ||
1454 | (unsigned long long)seg1->mr_dma, nsegs, rc); | ||
1455 | while (nsegs--) | ||
1456 | rpcrdma_unmap_one(ia, --seg); | ||
1457 | } else { | ||
1458 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1459 | seg1->mr_nsegs = nsegs; | ||
1460 | seg1->mr_len = len; | ||
1461 | } | ||
1462 | } | ||
1463 | break; | 1772 | break; |
1464 | } | 1773 | } |
1465 | if (rc) | 1774 | if (rc) |
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1473 | struct rpcrdma_xprt *r_xprt, void *r) | 1782 | struct rpcrdma_xprt *r_xprt, void *r) |
1474 | { | 1783 | { |
1475 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1784 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1476 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1477 | int nsegs = seg->mr_nsegs, rc; | 1785 | int nsegs = seg->mr_nsegs, rc; |
1478 | 1786 | ||
1479 | switch (ia->ri_memreg_strategy) { | 1787 | switch (ia->ri_memreg_strategy) { |
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1486 | break; | 1794 | break; |
1487 | #endif | 1795 | #endif |
1488 | 1796 | ||
1797 | case RPCRDMA_FRMR: | ||
1798 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
1799 | break; | ||
1800 | |||
1489 | case RPCRDMA_MTHCAFMR: | 1801 | case RPCRDMA_MTHCAFMR: |
1490 | { | 1802 | rc = rpcrdma_deregister_fmr_external(seg, ia); |
1491 | LIST_HEAD(l); | ||
1492 | list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); | ||
1493 | rc = ib_unmap_fmr(&l); | ||
1494 | while (seg1->mr_nsegs--) | ||
1495 | rpcrdma_unmap_one(ia, seg++); | ||
1496 | } | ||
1497 | if (rc) | ||
1498 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
1499 | " status %i\n", __func__, rc); | ||
1500 | break; | 1803 | break; |
1501 | 1804 | ||
1502 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1805 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1503 | case RPCRDMA_MEMWINDOWS: | 1806 | case RPCRDMA_MEMWINDOWS: |
1504 | { | 1807 | rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); |
1505 | struct ib_mw_bind param; | ||
1506 | BUG_ON(nsegs != 1); | ||
1507 | param.mr = ia->ri_bind_mem; | ||
1508 | param.addr = 0ULL; /* unbind */ | ||
1509 | param.length = 0; | ||
1510 | param.mw_access_flags = 0; | ||
1511 | if (r) { | ||
1512 | param.wr_id = (u64) (unsigned long) r; | ||
1513 | param.send_flags = IB_SEND_SIGNALED; | ||
1514 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1515 | } else { | ||
1516 | param.wr_id = 0ULL; | ||
1517 | param.send_flags = 0; | ||
1518 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1519 | } | ||
1520 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1521 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1522 | rpcrdma_unmap_one(ia, seg); | ||
1523 | } | ||
1524 | if (rc) | ||
1525 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1526 | " status %i\n", __func__, rc); | ||
1527 | else | ||
1528 | r = NULL; /* will upcall on completion */ | ||
1529 | break; | 1808 | break; |
1530 | 1809 | ||
1531 | default: | 1810 | default: |
1532 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | 1811 | rc = rpcrdma_deregister_default_external(seg, ia); |
1533 | seg1->mr_chunk.rl_mr = NULL; | ||
1534 | while (seg1->mr_nsegs--) | ||
1535 | rpcrdma_unmap_one(ia, seg++); | ||
1536 | if (rc) | ||
1537 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1538 | " status %i\n", __func__, rc); | ||
1539 | break; | 1812 | break; |
1540 | } | 1813 | } |
1541 | if (r) { | 1814 | if (r) { |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2427822f8bd4..c7a7eba991bc 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -51,6 +51,9 @@ | |||
51 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | 51 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ |
52 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | 52 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ |
53 | 53 | ||
54 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ | ||
55 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | ||
56 | |||
54 | /* | 57 | /* |
55 | * Interface Adapter -- one per transport instance | 58 | * Interface Adapter -- one per transport instance |
56 | */ | 59 | */ |
@@ -58,6 +61,8 @@ struct rpcrdma_ia { | |||
58 | struct rdma_cm_id *ri_id; | 61 | struct rdma_cm_id *ri_id; |
59 | struct ib_pd *ri_pd; | 62 | struct ib_pd *ri_pd; |
60 | struct ib_mr *ri_bind_mem; | 63 | struct ib_mr *ri_bind_mem; |
64 | u32 ri_dma_lkey; | ||
65 | int ri_have_dma_lkey; | ||
61 | struct completion ri_done; | 66 | struct completion ri_done; |
62 | int ri_async_rc; | 67 | int ri_async_rc; |
63 | enum rpcrdma_memreg ri_memreg_strategy; | 68 | enum rpcrdma_memreg ri_memreg_strategy; |
@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ | |||
156 | union { | 161 | union { |
157 | struct ib_mw *mw; | 162 | struct ib_mw *mw; |
158 | struct ib_fmr *fmr; | 163 | struct ib_fmr *fmr; |
164 | struct { | ||
165 | struct ib_fast_reg_page_list *fr_pgl; | ||
166 | struct ib_mr *fr_mr; | ||
167 | } frmr; | ||
159 | } r; | 168 | } r; |
160 | struct list_head mw_list; | 169 | struct list_head mw_list; |
161 | } *rl_mw; | 170 | } *rl_mw; |
@@ -175,6 +184,7 @@ struct rpcrdma_req { | |||
175 | size_t rl_size; /* actual length of buffer */ | 184 | size_t rl_size; /* actual length of buffer */ |
176 | unsigned int rl_niovs; /* 0, 2 or 4 */ | 185 | unsigned int rl_niovs; /* 0, 2 or 4 */ |
177 | unsigned int rl_nchunks; /* non-zero if chunks */ | 186 | unsigned int rl_nchunks; /* non-zero if chunks */ |
187 | unsigned int rl_connect_cookie; /* retry detection */ | ||
178 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | 188 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ |
179 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 189 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
180 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ | 190 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ |
@@ -198,7 +208,7 @@ struct rpcrdma_buffer { | |||
198 | atomic_t rb_credits; /* most recent server credits */ | 208 | atomic_t rb_credits; /* most recent server credits */ |
199 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ | 209 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ |
200 | int rb_max_requests;/* client max requests */ | 210 | int rb_max_requests;/* client max requests */ |
201 | struct list_head rb_mws; /* optional memory windows/fmrs */ | 211 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ |
202 | int rb_send_index; | 212 | int rb_send_index; |
203 | struct rpcrdma_req **rb_send_bufs; | 213 | struct rpcrdma_req **rb_send_bufs; |
204 | int rb_recv_index; | 214 | int rb_recv_index; |
@@ -273,6 +283,11 @@ struct rpcrdma_xprt { | |||
273 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) | 283 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) |
274 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) | 284 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
275 | 285 | ||
286 | /* Setting this to 0 ensures interoperability with early servers. | ||
287 | * Setting this to 1 enhances certain unaligned read/write performance. | ||
288 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | ||
289 | extern int xprt_rdma_pad_optimize; | ||
290 | |||
276 | /* | 291 | /* |
277 | * Interface Adapter calls - xprtrdma/verbs.c | 292 | * Interface Adapter calls - xprtrdma/verbs.c |
278 | */ | 293 | */ |