diff options
| -rw-r--r-- | fs/nfs/client.c | 5 | ||||
| -rw-r--r-- | fs/nfs/dir.c | 20 | ||||
| -rw-r--r-- | fs/nfs/file.c | 18 | ||||
| -rw-r--r-- | fs/nfs/inode.c | 183 | ||||
| -rw-r--r-- | fs/nfs/internal.h | 25 | ||||
| -rw-r--r-- | fs/nfs/mount_clnt.c | 3 | ||||
| -rw-r--r-- | fs/nfs/namespace.c | 7 | ||||
| -rw-r--r-- | fs/nfs/nfs3acl.c | 2 | ||||
| -rw-r--r-- | fs/nfs/nfs3proc.c | 20 | ||||
| -rw-r--r-- | fs/nfs/nfs4namespace.c | 105 | ||||
| -rw-r--r-- | fs/nfs/proc.c | 10 | ||||
| -rw-r--r-- | fs/nfs/super.c | 126 | ||||
| -rw-r--r-- | fs/nfs/unlink.c | 5 | ||||
| -rw-r--r-- | fs/nfs/write.c | 3 | ||||
| -rw-r--r-- | include/linux/nfs_fs.h | 19 | ||||
| -rw-r--r-- | include/linux/nfs_fs_sb.h | 1 | ||||
| -rw-r--r-- | include/linux/nfs_mount.h | 4 | ||||
| -rw-r--r-- | include/linux/nfs_xdr.h | 11 | ||||
| -rw-r--r-- | include/linux/sunrpc/xprtrdma.h | 4 | ||||
| -rw-r--r-- | net/sunrpc/clnt.c | 4 | ||||
| -rw-r--r-- | net/sunrpc/rpcb_clnt.c | 40 | ||||
| -rw-r--r-- | net/sunrpc/xprt.c | 12 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 29 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 41 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 741 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 17 |
26 files changed, 955 insertions, 500 deletions
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5ee23e7058b3..7547600b6174 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
| @@ -675,7 +675,7 @@ static int nfs_init_server(struct nfs_server *server, | |||
| 675 | server->nfs_client = clp; | 675 | server->nfs_client = clp; |
| 676 | 676 | ||
| 677 | /* Initialise the client representation from the mount data */ | 677 | /* Initialise the client representation from the mount data */ |
| 678 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; | 678 | server->flags = data->flags; |
| 679 | 679 | ||
| 680 | if (data->rsize) | 680 | if (data->rsize) |
| 681 | server->rsize = nfs_block_size(data->rsize, NULL); | 681 | server->rsize = nfs_block_size(data->rsize, NULL); |
| @@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void) | |||
| 850 | INIT_LIST_HEAD(&server->client_link); | 850 | INIT_LIST_HEAD(&server->client_link); |
| 851 | INIT_LIST_HEAD(&server->master_link); | 851 | INIT_LIST_HEAD(&server->master_link); |
| 852 | 852 | ||
| 853 | init_waitqueue_head(&server->active_wq); | ||
| 854 | atomic_set(&server->active, 0); | 853 | atomic_set(&server->active, 0); |
| 855 | 854 | ||
| 856 | server->io_stats = nfs_alloc_iostats(); | 855 | server->io_stats = nfs_alloc_iostats(); |
| @@ -1073,7 +1072,7 @@ static int nfs4_init_server(struct nfs_server *server, | |||
| 1073 | goto error; | 1072 | goto error; |
| 1074 | 1073 | ||
| 1075 | /* Initialise the client representation from the mount data */ | 1074 | /* Initialise the client representation from the mount data */ |
| 1076 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; | 1075 | server->flags = data->flags; |
| 1077 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1076 | server->caps |= NFS_CAP_ATOMIC_OPEN; |
| 1078 | 1077 | ||
| 1079 | if (data->rsize) | 1078 | if (data->rsize) |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 74f92b717f78..2ab70d46ecbc 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -156,6 +156,7 @@ typedef struct { | |||
| 156 | decode_dirent_t decode; | 156 | decode_dirent_t decode; |
| 157 | int plus; | 157 | int plus; |
| 158 | unsigned long timestamp; | 158 | unsigned long timestamp; |
| 159 | unsigned long gencount; | ||
| 159 | int timestamp_valid; | 160 | int timestamp_valid; |
| 160 | } nfs_readdir_descriptor_t; | 161 | } nfs_readdir_descriptor_t; |
| 161 | 162 | ||
| @@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
| 177 | struct file *file = desc->file; | 178 | struct file *file = desc->file; |
| 178 | struct inode *inode = file->f_path.dentry->d_inode; | 179 | struct inode *inode = file->f_path.dentry->d_inode; |
| 179 | struct rpc_cred *cred = nfs_file_cred(file); | 180 | struct rpc_cred *cred = nfs_file_cred(file); |
| 180 | unsigned long timestamp; | 181 | unsigned long timestamp, gencount; |
| 181 | int error; | 182 | int error; |
| 182 | 183 | ||
| 183 | dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", | 184 | dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", |
| @@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
| 186 | 187 | ||
| 187 | again: | 188 | again: |
| 188 | timestamp = jiffies; | 189 | timestamp = jiffies; |
| 190 | gencount = nfs_inc_attr_generation_counter(); | ||
| 189 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, | 191 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, |
| 190 | NFS_SERVER(inode)->dtsize, desc->plus); | 192 | NFS_SERVER(inode)->dtsize, desc->plus); |
| 191 | if (error < 0) { | 193 | if (error < 0) { |
| @@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
| 199 | goto error; | 201 | goto error; |
| 200 | } | 202 | } |
| 201 | desc->timestamp = timestamp; | 203 | desc->timestamp = timestamp; |
| 204 | desc->gencount = gencount; | ||
| 202 | desc->timestamp_valid = 1; | 205 | desc->timestamp_valid = 1; |
| 203 | SetPageUptodate(page); | 206 | SetPageUptodate(page); |
| 204 | /* Ensure consistent page alignment of the data. | 207 | /* Ensure consistent page alignment of the data. |
| @@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc) | |||
| 224 | if (IS_ERR(p)) | 227 | if (IS_ERR(p)) |
| 225 | return PTR_ERR(p); | 228 | return PTR_ERR(p); |
| 226 | desc->ptr = p; | 229 | desc->ptr = p; |
| 227 | if (desc->timestamp_valid) | 230 | if (desc->timestamp_valid) { |
| 228 | desc->entry->fattr->time_start = desc->timestamp; | 231 | desc->entry->fattr->time_start = desc->timestamp; |
| 229 | else | 232 | desc->entry->fattr->gencount = desc->gencount; |
| 233 | } else | ||
| 230 | desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; | 234 | desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; |
| 231 | return 0; | 235 | return 0; |
| 232 | } | 236 | } |
| @@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
| 471 | struct rpc_cred *cred = nfs_file_cred(file); | 475 | struct rpc_cred *cred = nfs_file_cred(file); |
| 472 | struct page *page = NULL; | 476 | struct page *page = NULL; |
| 473 | int status; | 477 | int status; |
| 474 | unsigned long timestamp; | 478 | unsigned long timestamp, gencount; |
| 475 | 479 | ||
| 476 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", | 480 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", |
| 477 | (unsigned long long)*desc->dir_cookie); | 481 | (unsigned long long)*desc->dir_cookie); |
| @@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
| 482 | goto out; | 486 | goto out; |
| 483 | } | 487 | } |
| 484 | timestamp = jiffies; | 488 | timestamp = jiffies; |
| 489 | gencount = nfs_inc_attr_generation_counter(); | ||
| 485 | status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, | 490 | status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, |
| 486 | *desc->dir_cookie, page, | 491 | *desc->dir_cookie, page, |
| 487 | NFS_SERVER(inode)->dtsize, | 492 | NFS_SERVER(inode)->dtsize, |
| @@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
| 490 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ | 495 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ |
| 491 | if (status >= 0) { | 496 | if (status >= 0) { |
| 492 | desc->timestamp = timestamp; | 497 | desc->timestamp = timestamp; |
| 498 | desc->gencount = gencount; | ||
| 493 | desc->timestamp_valid = 1; | 499 | desc->timestamp_valid = 1; |
| 494 | if ((status = dir_decode(desc)) == 0) | 500 | if ((status = dir_decode(desc)) == 0) |
| 495 | desc->entry->prev_cookie = *desc->dir_cookie; | 501 | desc->entry->prev_cookie = *desc->dir_cookie; |
| @@ -655,7 +661,7 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) | |||
| 655 | */ | 661 | */ |
| 656 | void nfs_force_lookup_revalidate(struct inode *dir) | 662 | void nfs_force_lookup_revalidate(struct inode *dir) |
| 657 | { | 663 | { |
| 658 | NFS_I(dir)->cache_change_attribute = jiffies; | 664 | NFS_I(dir)->cache_change_attribute++; |
| 659 | } | 665 | } |
| 660 | 666 | ||
| 661 | /* | 667 | /* |
| @@ -667,6 +673,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) | |||
| 667 | { | 673 | { |
| 668 | if (IS_ROOT(dentry)) | 674 | if (IS_ROOT(dentry)) |
| 669 | return 1; | 675 | return 1; |
| 676 | if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) | ||
| 677 | return 0; | ||
| 670 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) | 678 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) |
| 671 | return 0; | 679 | return 0; |
| 672 | /* Revalidate nfsi->cache_change_attribute before we declare a match */ | 680 | /* Revalidate nfsi->cache_change_attribute before we declare a match */ |
| @@ -750,6 +758,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, | |||
| 750 | /* Don't revalidate a negative dentry if we're creating a new file */ | 758 | /* Don't revalidate a negative dentry if we're creating a new file */ |
| 751 | if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) | 759 | if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) |
| 752 | return 0; | 760 | return 0; |
| 761 | if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) | ||
| 762 | return 1; | ||
| 753 | return !nfs_check_verifier(dir, dentry); | 763 | return !nfs_check_verifier(dir, dentry); |
| 754 | } | 764 | } |
| 755 | 765 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 78460657f5cb..d319b49f8f06 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -188,13 +188,16 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
| 188 | /* origin == SEEK_END => we must revalidate the cached file length */ | 188 | /* origin == SEEK_END => we must revalidate the cached file length */ |
| 189 | if (origin == SEEK_END) { | 189 | if (origin == SEEK_END) { |
| 190 | struct inode *inode = filp->f_mapping->host; | 190 | struct inode *inode = filp->f_mapping->host; |
| 191 | |||
| 191 | int retval = nfs_revalidate_file_size(inode, filp); | 192 | int retval = nfs_revalidate_file_size(inode, filp); |
| 192 | if (retval < 0) | 193 | if (retval < 0) |
| 193 | return (loff_t)retval; | 194 | return (loff_t)retval; |
| 194 | } | 195 | |
| 195 | lock_kernel(); /* BKL needed? */ | 196 | spin_lock(&inode->i_lock); |
| 196 | loff = generic_file_llseek_unlocked(filp, offset, origin); | 197 | loff = generic_file_llseek_unlocked(filp, offset, origin); |
| 197 | unlock_kernel(); | 198 | spin_unlock(&inode->i_lock); |
| 199 | } else | ||
| 200 | loff = generic_file_llseek_unlocked(filp, offset, origin); | ||
| 198 | return loff; | 201 | return loff; |
| 199 | } | 202 | } |
| 200 | 203 | ||
| @@ -699,13 +702,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) | |||
| 699 | filp->f_path.dentry->d_name.name, | 702 | filp->f_path.dentry->d_name.name, |
| 700 | fl->fl_type, fl->fl_flags); | 703 | fl->fl_type, fl->fl_flags); |
| 701 | 704 | ||
| 702 | /* | ||
| 703 | * No BSD flocks over NFS allowed. | ||
| 704 | * Note: we could try to fake a POSIX lock request here by | ||
| 705 | * using ((u32) filp | 0x80000000) or some such as the pid. | ||
| 706 | * Not sure whether that would be unique, though, or whether | ||
| 707 | * that would break in other places. | ||
| 708 | */ | ||
| 709 | if (!(fl->fl_flags & FL_FLOCK)) | 705 | if (!(fl->fl_flags & FL_FLOCK)) |
| 710 | return -ENOLCK; | 706 | return -ENOLCK; |
| 711 | 707 | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52daefa2f521..b9195c02a863 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -305,8 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
| 305 | init_special_inode(inode, inode->i_mode, fattr->rdev); | 305 | init_special_inode(inode, inode->i_mode, fattr->rdev); |
| 306 | 306 | ||
| 307 | nfsi->read_cache_jiffies = fattr->time_start; | 307 | nfsi->read_cache_jiffies = fattr->time_start; |
| 308 | nfsi->last_updated = now; | 308 | nfsi->attr_gencount = fattr->gencount; |
| 309 | nfsi->cache_change_attribute = now; | ||
| 310 | inode->i_atime = fattr->atime; | 309 | inode->i_atime = fattr->atime; |
| 311 | inode->i_mtime = fattr->mtime; | 310 | inode->i_mtime = fattr->mtime; |
| 312 | inode->i_ctime = fattr->ctime; | 311 | inode->i_ctime = fattr->ctime; |
| @@ -453,6 +452,7 @@ out_big: | |||
| 453 | void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | 452 | void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) |
| 454 | { | 453 | { |
| 455 | if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { | 454 | if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { |
| 455 | spin_lock(&inode->i_lock); | ||
| 456 | if ((attr->ia_valid & ATTR_MODE) != 0) { | 456 | if ((attr->ia_valid & ATTR_MODE) != 0) { |
| 457 | int mode = attr->ia_mode & S_IALLUGO; | 457 | int mode = attr->ia_mode & S_IALLUGO; |
| 458 | mode |= inode->i_mode & ~S_IALLUGO; | 458 | mode |= inode->i_mode & ~S_IALLUGO; |
| @@ -462,7 +462,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
| 462 | inode->i_uid = attr->ia_uid; | 462 | inode->i_uid = attr->ia_uid; |
| 463 | if ((attr->ia_valid & ATTR_GID) != 0) | 463 | if ((attr->ia_valid & ATTR_GID) != 0) |
| 464 | inode->i_gid = attr->ia_gid; | 464 | inode->i_gid = attr->ia_gid; |
| 465 | spin_lock(&inode->i_lock); | ||
| 466 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 465 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
| 467 | spin_unlock(&inode->i_lock); | 466 | spin_unlock(&inode->i_lock); |
| 468 | } | 467 | } |
| @@ -472,37 +471,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
| 472 | } | 471 | } |
| 473 | } | 472 | } |
| 474 | 473 | ||
| 475 | static int nfs_wait_schedule(void *word) | ||
| 476 | { | ||
| 477 | if (signal_pending(current)) | ||
| 478 | return -ERESTARTSYS; | ||
| 479 | schedule(); | ||
| 480 | return 0; | ||
| 481 | } | ||
| 482 | |||
| 483 | /* | ||
| 484 | * Wait for the inode to get unlocked. | ||
| 485 | */ | ||
| 486 | static int nfs_wait_on_inode(struct inode *inode) | ||
| 487 | { | ||
| 488 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 489 | int error; | ||
| 490 | |||
| 491 | error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, | ||
| 492 | nfs_wait_schedule, TASK_KILLABLE); | ||
| 493 | |||
| 494 | return error; | ||
| 495 | } | ||
| 496 | |||
| 497 | static void nfs_wake_up_inode(struct inode *inode) | ||
| 498 | { | ||
| 499 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 500 | |||
| 501 | clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); | ||
| 502 | smp_mb__after_clear_bit(); | ||
| 503 | wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); | ||
| 504 | } | ||
| 505 | |||
| 506 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 474 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
| 507 | { | 475 | { |
| 508 | struct inode *inode = dentry->d_inode; | 476 | struct inode *inode = dentry->d_inode; |
| @@ -697,20 +665,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 697 | dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", | 665 | dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", |
| 698 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); | 666 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); |
| 699 | 667 | ||
| 700 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | ||
| 701 | if (is_bad_inode(inode)) | 668 | if (is_bad_inode(inode)) |
| 702 | goto out_nowait; | 669 | goto out; |
| 703 | if (NFS_STALE(inode)) | 670 | if (NFS_STALE(inode)) |
| 704 | goto out_nowait; | ||
| 705 | |||
| 706 | status = nfs_wait_on_inode(inode); | ||
| 707 | if (status < 0) | ||
| 708 | goto out; | 671 | goto out; |
| 709 | 672 | ||
| 710 | status = -ESTALE; | ||
| 711 | if (NFS_STALE(inode)) | 673 | if (NFS_STALE(inode)) |
| 712 | goto out; | 674 | goto out; |
| 713 | 675 | ||
| 676 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | ||
| 714 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); | 677 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); |
| 715 | if (status != 0) { | 678 | if (status != 0) { |
| 716 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", | 679 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", |
| @@ -724,16 +687,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 724 | goto out; | 687 | goto out; |
| 725 | } | 688 | } |
| 726 | 689 | ||
| 727 | spin_lock(&inode->i_lock); | 690 | status = nfs_refresh_inode(inode, &fattr); |
| 728 | status = nfs_update_inode(inode, &fattr); | ||
| 729 | if (status) { | 691 | if (status) { |
| 730 | spin_unlock(&inode->i_lock); | ||
| 731 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", | 692 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", |
| 732 | inode->i_sb->s_id, | 693 | inode->i_sb->s_id, |
| 733 | (long long)NFS_FILEID(inode), status); | 694 | (long long)NFS_FILEID(inode), status); |
| 734 | goto out; | 695 | goto out; |
| 735 | } | 696 | } |
| 736 | spin_unlock(&inode->i_lock); | ||
| 737 | 697 | ||
| 738 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) | 698 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) |
| 739 | nfs_zap_acl_cache(inode); | 699 | nfs_zap_acl_cache(inode); |
| @@ -743,9 +703,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 743 | (long long)NFS_FILEID(inode)); | 703 | (long long)NFS_FILEID(inode)); |
| 744 | 704 | ||
| 745 | out: | 705 | out: |
| 746 | nfs_wake_up_inode(inode); | ||
| 747 | |||
| 748 | out_nowait: | ||
| 749 | return status; | 706 | return status; |
| 750 | } | 707 | } |
| 751 | 708 | ||
| @@ -908,9 +865,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
| 908 | return -EIO; | 865 | return -EIO; |
| 909 | } | 866 | } |
| 910 | 867 | ||
| 911 | /* Do atomic weak cache consistency updates */ | ||
| 912 | nfs_wcc_update_inode(inode, fattr); | ||
| 913 | |||
| 914 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | 868 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && |
| 915 | nfsi->change_attr != fattr->change_attr) | 869 | nfsi->change_attr != fattr->change_attr) |
| 916 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 870 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
| @@ -939,15 +893,81 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
| 939 | 893 | ||
| 940 | if (invalid != 0) | 894 | if (invalid != 0) |
| 941 | nfsi->cache_validity |= invalid; | 895 | nfsi->cache_validity |= invalid; |
| 942 | else | ||
| 943 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | ||
| 944 | | NFS_INO_INVALID_ATIME | ||
| 945 | | NFS_INO_REVAL_PAGECACHE); | ||
| 946 | 896 | ||
| 947 | nfsi->read_cache_jiffies = fattr->time_start; | 897 | nfsi->read_cache_jiffies = fattr->time_start; |
| 948 | return 0; | 898 | return 0; |
| 949 | } | 899 | } |
| 950 | 900 | ||
| 901 | static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
| 902 | { | ||
| 903 | return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; | ||
| 904 | } | ||
| 905 | |||
| 906 | static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
| 907 | { | ||
| 908 | return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); | ||
| 909 | } | ||
| 910 | |||
| 911 | static unsigned long nfs_attr_generation_counter; | ||
| 912 | |||
| 913 | static unsigned long nfs_read_attr_generation_counter(void) | ||
| 914 | { | ||
| 915 | smp_rmb(); | ||
| 916 | return nfs_attr_generation_counter; | ||
| 917 | } | ||
| 918 | |||
| 919 | unsigned long nfs_inc_attr_generation_counter(void) | ||
| 920 | { | ||
| 921 | unsigned long ret; | ||
| 922 | smp_rmb(); | ||
| 923 | ret = ++nfs_attr_generation_counter; | ||
| 924 | smp_wmb(); | ||
| 925 | return ret; | ||
| 926 | } | ||
| 927 | |||
| 928 | void nfs_fattr_init(struct nfs_fattr *fattr) | ||
| 929 | { | ||
| 930 | fattr->valid = 0; | ||
| 931 | fattr->time_start = jiffies; | ||
| 932 | fattr->gencount = nfs_inc_attr_generation_counter(); | ||
| 933 | } | ||
| 934 | |||
| 935 | /** | ||
| 936 | * nfs_inode_attrs_need_update - check if the inode attributes need updating | ||
| 937 | * @inode - pointer to inode | ||
| 938 | * @fattr - attributes | ||
| 939 | * | ||
| 940 | * Attempt to divine whether or not an RPC call reply carrying stale | ||
| 941 | * attributes got scheduled after another call carrying updated ones. | ||
| 942 | * | ||
| 943 | * To do so, the function first assumes that a more recent ctime means | ||
| 944 | * that the attributes in fattr are newer, however it also attempt to | ||
| 945 | * catch the case where ctime either didn't change, or went backwards | ||
| 946 | * (if someone reset the clock on the server) by looking at whether | ||
| 947 | * or not this RPC call was started after the inode was last updated. | ||
| 948 | * Note also the check for wraparound of 'attr_gencount' | ||
| 949 | * | ||
| 950 | * The function returns 'true' if it thinks the attributes in 'fattr' are | ||
| 951 | * more recent than the ones cached in the inode. | ||
| 952 | * | ||
| 953 | */ | ||
| 954 | static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
| 955 | { | ||
| 956 | const struct nfs_inode *nfsi = NFS_I(inode); | ||
| 957 | |||
| 958 | return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || | ||
| 959 | nfs_ctime_need_update(inode, fattr) || | ||
| 960 | nfs_size_need_update(inode, fattr) || | ||
| 961 | ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); | ||
| 962 | } | ||
| 963 | |||
| 964 | static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) | ||
| 965 | { | ||
| 966 | if (nfs_inode_attrs_need_update(inode, fattr)) | ||
| 967 | return nfs_update_inode(inode, fattr); | ||
| 968 | return nfs_check_inode_attributes(inode, fattr); | ||
| 969 | } | ||
| 970 | |||
| 951 | /** | 971 | /** |
| 952 | * nfs_refresh_inode - try to update the inode attribute cache | 972 | * nfs_refresh_inode - try to update the inode attribute cache |
| 953 | * @inode - pointer to inode | 973 | * @inode - pointer to inode |
| @@ -960,21 +980,28 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
| 960 | */ | 980 | */ |
| 961 | int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | 981 | int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) |
| 962 | { | 982 | { |
| 963 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 964 | int status; | 983 | int status; |
| 965 | 984 | ||
| 966 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | 985 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) |
| 967 | return 0; | 986 | return 0; |
| 968 | spin_lock(&inode->i_lock); | 987 | spin_lock(&inode->i_lock); |
| 969 | if (time_after(fattr->time_start, nfsi->last_updated)) | 988 | status = nfs_refresh_inode_locked(inode, fattr); |
| 970 | status = nfs_update_inode(inode, fattr); | ||
| 971 | else | ||
| 972 | status = nfs_check_inode_attributes(inode, fattr); | ||
| 973 | |||
| 974 | spin_unlock(&inode->i_lock); | 989 | spin_unlock(&inode->i_lock); |
| 975 | return status; | 990 | return status; |
| 976 | } | 991 | } |
| 977 | 992 | ||
| 993 | static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) | ||
| 994 | { | ||
| 995 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 996 | |||
| 997 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | ||
| 998 | if (S_ISDIR(inode->i_mode)) | ||
| 999 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
| 1000 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | ||
| 1001 | return 0; | ||
| 1002 | return nfs_refresh_inode_locked(inode, fattr); | ||
| 1003 | } | ||
| 1004 | |||
| 978 | /** | 1005 | /** |
| 979 | * nfs_post_op_update_inode - try to update the inode attribute cache | 1006 | * nfs_post_op_update_inode - try to update the inode attribute cache |
| 980 | * @inode - pointer to inode | 1007 | * @inode - pointer to inode |
| @@ -991,14 +1018,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 991 | */ | 1018 | */ |
| 992 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | 1019 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) |
| 993 | { | 1020 | { |
| 994 | struct nfs_inode *nfsi = NFS_I(inode); | 1021 | int status; |
| 995 | 1022 | ||
| 996 | spin_lock(&inode->i_lock); | 1023 | spin_lock(&inode->i_lock); |
| 997 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 1024 | status = nfs_post_op_update_inode_locked(inode, fattr); |
| 998 | if (S_ISDIR(inode->i_mode)) | ||
| 999 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
| 1000 | spin_unlock(&inode->i_lock); | 1025 | spin_unlock(&inode->i_lock); |
| 1001 | return nfs_refresh_inode(inode, fattr); | 1026 | return status; |
| 1002 | } | 1027 | } |
| 1003 | 1028 | ||
| 1004 | /** | 1029 | /** |
| @@ -1014,6 +1039,15 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1014 | */ | 1039 | */ |
| 1015 | int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) | 1040 | int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) |
| 1016 | { | 1041 | { |
| 1042 | int status; | ||
| 1043 | |||
| 1044 | spin_lock(&inode->i_lock); | ||
| 1045 | /* Don't do a WCC update if these attributes are already stale */ | ||
| 1046 | if ((fattr->valid & NFS_ATTR_FATTR) == 0 || | ||
| 1047 | !nfs_inode_attrs_need_update(inode, fattr)) { | ||
| 1048 | fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC); | ||
| 1049 | goto out_noforce; | ||
| 1050 | } | ||
| 1017 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | 1051 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && |
| 1018 | (fattr->valid & NFS_ATTR_WCC_V4) == 0) { | 1052 | (fattr->valid & NFS_ATTR_WCC_V4) == 0) { |
| 1019 | fattr->pre_change_attr = NFS_I(inode)->change_attr; | 1053 | fattr->pre_change_attr = NFS_I(inode)->change_attr; |
| @@ -1026,7 +1060,10 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa | |||
| 1026 | fattr->pre_size = i_size_read(inode); | 1060 | fattr->pre_size = i_size_read(inode); |
| 1027 | fattr->valid |= NFS_ATTR_WCC; | 1061 | fattr->valid |= NFS_ATTR_WCC; |
| 1028 | } | 1062 | } |
| 1029 | return nfs_post_op_update_inode(inode, fattr); | 1063 | out_noforce: |
| 1064 | status = nfs_post_op_update_inode_locked(inode, fattr); | ||
| 1065 | spin_unlock(&inode->i_lock); | ||
| 1066 | return status; | ||
| 1030 | } | 1067 | } |
| 1031 | 1068 | ||
| 1032 | /* | 1069 | /* |
| @@ -1092,7 +1129,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1092 | } | 1129 | } |
| 1093 | /* If ctime has changed we should definitely clear access+acl caches */ | 1130 | /* If ctime has changed we should definitely clear access+acl caches */ |
| 1094 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) | 1131 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) |
| 1095 | invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1132 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
| 1096 | } else if (nfsi->change_attr != fattr->change_attr) { | 1133 | } else if (nfsi->change_attr != fattr->change_attr) { |
| 1097 | dprintk("NFS: change_attr change on server for file %s/%ld\n", | 1134 | dprintk("NFS: change_attr change on server for file %s/%ld\n", |
| 1098 | inode->i_sb->s_id, inode->i_ino); | 1135 | inode->i_sb->s_id, inode->i_ino); |
| @@ -1126,6 +1163,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1126 | inode->i_gid != fattr->gid) | 1163 | inode->i_gid != fattr->gid) |
| 1127 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1164 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
| 1128 | 1165 | ||
| 1166 | if (inode->i_nlink != fattr->nlink) | ||
| 1167 | invalid |= NFS_INO_INVALID_ATTR; | ||
| 1168 | |||
| 1129 | inode->i_mode = fattr->mode; | 1169 | inode->i_mode = fattr->mode; |
| 1130 | inode->i_nlink = fattr->nlink; | 1170 | inode->i_nlink = fattr->nlink; |
| 1131 | inode->i_uid = fattr->uid; | 1171 | inode->i_uid = fattr->uid; |
| @@ -1145,18 +1185,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1145 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); | 1185 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); |
| 1146 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 1186 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
| 1147 | nfsi->attrtimeo_timestamp = now; | 1187 | nfsi->attrtimeo_timestamp = now; |
| 1148 | nfsi->last_updated = now; | 1188 | nfsi->attr_gencount = nfs_inc_attr_generation_counter(); |
| 1149 | } else { | 1189 | } else { |
| 1150 | if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { | 1190 | if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { |
| 1151 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) | 1191 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) |
| 1152 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); | 1192 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); |
| 1153 | nfsi->attrtimeo_timestamp = now; | 1193 | nfsi->attrtimeo_timestamp = now; |
| 1154 | } | 1194 | } |
| 1155 | /* | ||
| 1156 | * Avoid jiffy wraparound issues with nfsi->last_updated | ||
| 1157 | */ | ||
| 1158 | if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now)) | ||
| 1159 | nfsi->last_updated = nfsi->read_cache_jiffies; | ||
| 1160 | } | 1195 | } |
| 1161 | invalid &= ~NFS_INO_INVALID_ATTR; | 1196 | invalid &= ~NFS_INO_INVALID_ATTR; |
| 1162 | /* Don't invalidate the data if we were to blame */ | 1197 | /* Don't invalidate the data if we were to blame */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 24241fcbb98d..d212ee41caf2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -153,6 +153,7 @@ extern void nfs4_clear_inode(struct inode *); | |||
| 153 | void nfs_zap_acl_cache(struct inode *inode); | 153 | void nfs_zap_acl_cache(struct inode *inode); |
| 154 | 154 | ||
| 155 | /* super.c */ | 155 | /* super.c */ |
| 156 | void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); | ||
| 156 | extern struct file_system_type nfs_xdev_fs_type; | 157 | extern struct file_system_type nfs_xdev_fs_type; |
| 157 | #ifdef CONFIG_NFS_V4 | 158 | #ifdef CONFIG_NFS_V4 |
| 158 | extern struct file_system_type nfs4_xdev_fs_type; | 159 | extern struct file_system_type nfs4_xdev_fs_type; |
| @@ -163,8 +164,8 @@ extern struct rpc_stat nfs_rpcstat; | |||
| 163 | 164 | ||
| 164 | extern int __init register_nfs_fs(void); | 165 | extern int __init register_nfs_fs(void); |
| 165 | extern void __exit unregister_nfs_fs(void); | 166 | extern void __exit unregister_nfs_fs(void); |
| 166 | extern void nfs_sb_active(struct nfs_server *server); | 167 | extern void nfs_sb_active(struct super_block *sb); |
| 167 | extern void nfs_sb_deactive(struct nfs_server *server); | 168 | extern void nfs_sb_deactive(struct super_block *sb); |
| 168 | 169 | ||
| 169 | /* namespace.c */ | 170 | /* namespace.c */ |
| 170 | extern char *nfs_path(const char *base, | 171 | extern char *nfs_path(const char *base, |
| @@ -276,3 +277,23 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) | |||
| 276 | PAGE_SIZE - 1) >> PAGE_SHIFT; | 277 | PAGE_SIZE - 1) >> PAGE_SHIFT; |
| 277 | } | 278 | } |
| 278 | 279 | ||
| 280 | #define IPV6_SCOPE_DELIMITER '%' | ||
| 281 | |||
| 282 | /* | ||
| 283 | * Set the port number in an address. Be agnostic about the address | ||
| 284 | * family. | ||
| 285 | */ | ||
| 286 | static inline void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
| 287 | { | ||
| 288 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
| 289 | struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap; | ||
| 290 | |||
| 291 | switch (sap->sa_family) { | ||
| 292 | case AF_INET: | ||
| 293 | ap->sin_port = htons(port); | ||
| 294 | break; | ||
| 295 | case AF_INET6: | ||
| 296 | ap6->sin6_port = htons(port); | ||
| 297 | break; | ||
| 298 | } | ||
| 299 | } | ||
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 779d2eb649c5..086a6830d785 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
| 15 | #include <linux/sunrpc/sched.h> | 15 | #include <linux/sunrpc/sched.h> |
| 16 | #include <linux/nfs_fs.h> | 16 | #include <linux/nfs_fs.h> |
| 17 | #include "internal.h" | ||
| 17 | 18 | ||
| 18 | #ifdef RPC_DEBUG | 19 | #ifdef RPC_DEBUG |
| 19 | # define NFSDBG_FACILITY NFSDBG_MOUNT | 20 | # define NFSDBG_FACILITY NFSDBG_MOUNT |
| @@ -98,7 +99,7 @@ out_call_err: | |||
| 98 | 99 | ||
| 99 | out_mnt_err: | 100 | out_mnt_err: |
| 100 | dprintk("NFS: MNT server returned result %d\n", result.status); | 101 | dprintk("NFS: MNT server returned result %d\n", result.status); |
| 101 | status = -EACCES; | 102 | status = nfs_stat_to_errno(result.status); |
| 102 | goto out; | 103 | goto out; |
| 103 | } | 104 | } |
| 104 | 105 | ||
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 66df08dd1caf..64a288ee046d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
| @@ -105,7 +105,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | |||
| 105 | 105 | ||
| 106 | dprintk("--> nfs_follow_mountpoint()\n"); | 106 | dprintk("--> nfs_follow_mountpoint()\n"); |
| 107 | 107 | ||
| 108 | BUG_ON(IS_ROOT(dentry)); | 108 | err = -ESTALE; |
| 109 | if (IS_ROOT(dentry)) | ||
| 110 | goto out_err; | ||
| 111 | |||
| 109 | dprintk("%s: enter\n", __func__); | 112 | dprintk("%s: enter\n", __func__); |
| 110 | dput(nd->path.dentry); | 113 | dput(nd->path.dentry); |
| 111 | nd->path.dentry = dget(dentry); | 114 | nd->path.dentry = dget(dentry); |
| @@ -189,7 +192,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, | |||
| 189 | struct nfs_clone_mount *mountdata) | 192 | struct nfs_clone_mount *mountdata) |
| 190 | { | 193 | { |
| 191 | #ifdef CONFIG_NFS_V4 | 194 | #ifdef CONFIG_NFS_V4 |
| 192 | struct vfsmount *mnt = NULL; | 195 | struct vfsmount *mnt = ERR_PTR(-EINVAL); |
| 193 | switch (server->nfs_client->rpc_ops->version) { | 196 | switch (server->nfs_client->rpc_ops->version) { |
| 194 | case 2: | 197 | case 2: |
| 195 | case 3: | 198 | case 3: |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 423842f51ac9..cef62557c87d 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
| @@ -229,6 +229,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) | |||
| 229 | 229 | ||
| 230 | dprintk("NFS call getacl\n"); | 230 | dprintk("NFS call getacl\n"); |
| 231 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; | 231 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; |
| 232 | nfs_fattr_init(&fattr); | ||
| 232 | status = rpc_call_sync(server->client_acl, &msg, 0); | 233 | status = rpc_call_sync(server->client_acl, &msg, 0); |
| 233 | dprintk("NFS reply getacl: %d\n", status); | 234 | dprintk("NFS reply getacl: %d\n", status); |
| 234 | 235 | ||
| @@ -322,6 +323,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | |||
| 322 | 323 | ||
| 323 | dprintk("NFS call setacl\n"); | 324 | dprintk("NFS call setacl\n"); |
| 324 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; | 325 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; |
| 326 | nfs_fattr_init(&fattr); | ||
| 325 | status = rpc_call_sync(server->client_acl, &msg, 0); | 327 | status = rpc_call_sync(server->client_acl, &msg, 0); |
| 326 | nfs_access_zap_cache(inode); | 328 | nfs_access_zap_cache(inode); |
| 327 | nfs_zap_acl_cache(inode); | 329 | nfs_zap_acl_cache(inode); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 1e750e4574a9..c55be7a7679e 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
| @@ -699,7 +699,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 699 | } | 699 | } |
| 700 | 700 | ||
| 701 | static int | 701 | static int |
| 702 | nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | 702 | do_proc_fsinfo(struct rpc_clnt *client, struct nfs_fh *fhandle, |
| 703 | struct nfs_fsinfo *info) | 703 | struct nfs_fsinfo *info) |
| 704 | { | 704 | { |
| 705 | struct rpc_message msg = { | 705 | struct rpc_message msg = { |
| @@ -711,11 +711,27 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 711 | 711 | ||
| 712 | dprintk("NFS call fsinfo\n"); | 712 | dprintk("NFS call fsinfo\n"); |
| 713 | nfs_fattr_init(info->fattr); | 713 | nfs_fattr_init(info->fattr); |
| 714 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 714 | status = rpc_call_sync(client, &msg, 0); |
| 715 | dprintk("NFS reply fsinfo: %d\n", status); | 715 | dprintk("NFS reply fsinfo: %d\n", status); |
| 716 | return status; | 716 | return status; |
| 717 | } | 717 | } |
| 718 | 718 | ||
| 719 | /* | ||
| 720 | * Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via | ||
| 721 | * nfs_create_server | ||
| 722 | */ | ||
| 723 | static int | ||
| 724 | nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | ||
| 725 | struct nfs_fsinfo *info) | ||
| 726 | { | ||
| 727 | int status; | ||
| 728 | |||
| 729 | status = do_proc_fsinfo(server->client, fhandle, info); | ||
| 730 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
| 731 | status = do_proc_fsinfo(server->nfs_client->cl_rpcclient, fhandle, info); | ||
| 732 | return status; | ||
| 733 | } | ||
| 734 | |||
| 719 | static int | 735 | static int |
| 720 | nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | 736 | nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, |
| 721 | struct nfs_pathconf *info) | 737 | struct nfs_pathconf *info) |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index b112857301f7..30befc39b3c6 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
| @@ -93,21 +93,52 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, | |||
| 93 | return 0; | 93 | return 0; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | /* | 96 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, |
| 97 | * Check if the string represents a "valid" IPv4 address | 97 | char *page, char *page2, |
| 98 | */ | 98 | const struct nfs4_fs_location *location) |
| 99 | static inline int valid_ipaddr4(const char *buf) | ||
| 100 | { | 99 | { |
| 101 | int rc, count, in[4]; | 100 | struct vfsmount *mnt = ERR_PTR(-ENOENT); |
| 102 | 101 | char *mnt_path; | |
| 103 | rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); | 102 | int page2len; |
| 104 | if (rc != 4) | 103 | unsigned int s; |
| 105 | return -EINVAL; | 104 | |
| 106 | for (count = 0; count < 4; count++) { | 105 | mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); |
| 107 | if (in[count] > 255) | 106 | if (IS_ERR(mnt_path)) |
| 108 | return -EINVAL; | 107 | return mnt; |
| 108 | mountdata->mnt_path = mnt_path; | ||
| 109 | page2 += strlen(mnt_path) + 1; | ||
| 110 | page2len = PAGE_SIZE - strlen(mnt_path) - 1; | ||
| 111 | |||
| 112 | for (s = 0; s < location->nservers; s++) { | ||
| 113 | const struct nfs4_string *buf = &location->servers[s]; | ||
| 114 | struct sockaddr_storage addr; | ||
| 115 | |||
| 116 | if (buf->len <= 0 || buf->len >= PAGE_SIZE) | ||
| 117 | continue; | ||
| 118 | |||
| 119 | mountdata->addr = (struct sockaddr *)&addr; | ||
| 120 | |||
| 121 | if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) | ||
| 122 | continue; | ||
| 123 | nfs_parse_ip_address(buf->data, buf->len, | ||
| 124 | mountdata->addr, &mountdata->addrlen); | ||
| 125 | if (mountdata->addr->sa_family == AF_UNSPEC) | ||
| 126 | continue; | ||
| 127 | nfs_set_port(mountdata->addr, NFS_PORT); | ||
| 128 | |||
| 129 | strncpy(page2, buf->data, page2len); | ||
| 130 | page2[page2len] = '\0'; | ||
| 131 | mountdata->hostname = page2; | ||
| 132 | |||
| 133 | snprintf(page, PAGE_SIZE, "%s:%s", | ||
| 134 | mountdata->hostname, | ||
| 135 | mountdata->mnt_path); | ||
| 136 | |||
| 137 | mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); | ||
| 138 | if (!IS_ERR(mnt)) | ||
| 139 | break; | ||
| 109 | } | 140 | } |
| 110 | return 0; | 141 | return mnt; |
| 111 | } | 142 | } |
| 112 | 143 | ||
| 113 | /** | 144 | /** |
| @@ -128,7 +159,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
| 128 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, | 159 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, |
| 129 | }; | 160 | }; |
| 130 | char *page = NULL, *page2 = NULL; | 161 | char *page = NULL, *page2 = NULL; |
| 131 | unsigned int s; | ||
| 132 | int loc, error; | 162 | int loc, error; |
| 133 | 163 | ||
| 134 | if (locations == NULL || locations->nlocations <= 0) | 164 | if (locations == NULL || locations->nlocations <= 0) |
| @@ -152,53 +182,16 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
| 152 | goto out; | 182 | goto out; |
| 153 | } | 183 | } |
| 154 | 184 | ||
| 155 | loc = 0; | 185 | for (loc = 0; loc < locations->nlocations; loc++) { |
| 156 | while (loc < locations->nlocations && IS_ERR(mnt)) { | ||
| 157 | const struct nfs4_fs_location *location = &locations->locations[loc]; | 186 | const struct nfs4_fs_location *location = &locations->locations[loc]; |
| 158 | char *mnt_path; | ||
| 159 | 187 | ||
| 160 | if (location == NULL || location->nservers <= 0 || | 188 | if (location == NULL || location->nservers <= 0 || |
| 161 | location->rootpath.ncomponents == 0) { | 189 | location->rootpath.ncomponents == 0) |
| 162 | loc++; | ||
| 163 | continue; | 190 | continue; |
| 164 | } | ||
| 165 | 191 | ||
| 166 | mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); | 192 | mnt = try_location(&mountdata, page, page2, location); |
| 167 | if (IS_ERR(mnt_path)) { | 193 | if (!IS_ERR(mnt)) |
| 168 | loc++; | 194 | break; |
| 169 | continue; | ||
| 170 | } | ||
| 171 | mountdata.mnt_path = mnt_path; | ||
| 172 | |||
| 173 | s = 0; | ||
| 174 | while (s < location->nservers) { | ||
| 175 | struct sockaddr_in addr = { | ||
| 176 | .sin_family = AF_INET, | ||
| 177 | .sin_port = htons(NFS_PORT), | ||
| 178 | }; | ||
| 179 | |||
| 180 | if (location->servers[s].len <= 0 || | ||
| 181 | valid_ipaddr4(location->servers[s].data) < 0) { | ||
| 182 | s++; | ||
| 183 | continue; | ||
| 184 | } | ||
| 185 | |||
| 186 | mountdata.hostname = location->servers[s].data; | ||
| 187 | addr.sin_addr.s_addr = in_aton(mountdata.hostname), | ||
| 188 | mountdata.addr = (struct sockaddr *)&addr; | ||
| 189 | mountdata.addrlen = sizeof(addr); | ||
| 190 | |||
| 191 | snprintf(page, PAGE_SIZE, "%s:%s", | ||
| 192 | mountdata.hostname, | ||
| 193 | mountdata.mnt_path); | ||
| 194 | |||
| 195 | mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, &mountdata); | ||
| 196 | if (!IS_ERR(mnt)) { | ||
| 197 | break; | ||
| 198 | } | ||
| 199 | s++; | ||
| 200 | } | ||
| 201 | loc++; | ||
| 202 | } | 195 | } |
| 203 | 196 | ||
| 204 | out: | 197 | out: |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4dbb84df1b68..193465210d7c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
| @@ -65,14 +65,20 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 65 | 65 | ||
| 66 | dprintk("%s: call getattr\n", __func__); | 66 | dprintk("%s: call getattr\n", __func__); |
| 67 | nfs_fattr_init(fattr); | 67 | nfs_fattr_init(fattr); |
| 68 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 68 | status = rpc_call_sync(server->client, &msg, 0); |
| 69 | /* Retry with default authentication if different */ | ||
| 70 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
| 71 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | ||
| 69 | dprintk("%s: reply getattr: %d\n", __func__, status); | 72 | dprintk("%s: reply getattr: %d\n", __func__, status); |
| 70 | if (status) | 73 | if (status) |
| 71 | return status; | 74 | return status; |
| 72 | dprintk("%s: call statfs\n", __func__); | 75 | dprintk("%s: call statfs\n", __func__); |
| 73 | msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; | 76 | msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; |
| 74 | msg.rpc_resp = &fsinfo; | 77 | msg.rpc_resp = &fsinfo; |
| 75 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 78 | status = rpc_call_sync(server->client, &msg, 0); |
| 79 | /* Retry with default authentication if different */ | ||
| 80 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
| 81 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | ||
| 76 | dprintk("%s: reply statfs: %d\n", __func__, status); | 82 | dprintk("%s: reply statfs: %d\n", __func__, status); |
| 77 | if (status) | 83 | if (status) |
| 78 | return status; | 84 | return status; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ffb697416cb1..8b28b95c9e44 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -91,6 +91,7 @@ enum { | |||
| 91 | /* Mount options that take string arguments */ | 91 | /* Mount options that take string arguments */ |
| 92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, | 92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, |
| 93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
| 94 | Opt_lookupcache, | ||
| 94 | 95 | ||
| 95 | /* Special mount options */ | 96 | /* Special mount options */ |
| 96 | Opt_userspace, Opt_deprecated, Opt_sloppy, | 97 | Opt_userspace, Opt_deprecated, Opt_sloppy, |
| @@ -154,6 +155,8 @@ static const match_table_t nfs_mount_option_tokens = { | |||
| 154 | { Opt_mounthost, "mounthost=%s" }, | 155 | { Opt_mounthost, "mounthost=%s" }, |
| 155 | { Opt_mountaddr, "mountaddr=%s" }, | 156 | { Opt_mountaddr, "mountaddr=%s" }, |
| 156 | 157 | ||
| 158 | { Opt_lookupcache, "lookupcache=%s" }, | ||
| 159 | |||
| 157 | { Opt_err, NULL } | 160 | { Opt_err, NULL } |
| 158 | }; | 161 | }; |
| 159 | 162 | ||
| @@ -200,6 +203,22 @@ static const match_table_t nfs_secflavor_tokens = { | |||
| 200 | { Opt_sec_err, NULL } | 203 | { Opt_sec_err, NULL } |
| 201 | }; | 204 | }; |
| 202 | 205 | ||
| 206 | enum { | ||
| 207 | Opt_lookupcache_all, Opt_lookupcache_positive, | ||
| 208 | Opt_lookupcache_none, | ||
| 209 | |||
| 210 | Opt_lookupcache_err | ||
| 211 | }; | ||
| 212 | |||
| 213 | static match_table_t nfs_lookupcache_tokens = { | ||
| 214 | { Opt_lookupcache_all, "all" }, | ||
| 215 | { Opt_lookupcache_positive, "pos" }, | ||
| 216 | { Opt_lookupcache_positive, "positive" }, | ||
| 217 | { Opt_lookupcache_none, "none" }, | ||
| 218 | |||
| 219 | { Opt_lookupcache_err, NULL } | ||
| 220 | }; | ||
| 221 | |||
| 203 | 222 | ||
| 204 | static void nfs_umount_begin(struct super_block *); | 223 | static void nfs_umount_begin(struct super_block *); |
| 205 | static int nfs_statfs(struct dentry *, struct kstatfs *); | 224 | static int nfs_statfs(struct dentry *, struct kstatfs *); |
| @@ -209,7 +228,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru | |||
| 209 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, | 228 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, |
| 210 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); | 229 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); |
| 211 | static void nfs_kill_super(struct super_block *); | 230 | static void nfs_kill_super(struct super_block *); |
| 212 | static void nfs_put_super(struct super_block *); | ||
| 213 | static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); | 231 | static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); |
| 214 | 232 | ||
| 215 | static struct file_system_type nfs_fs_type = { | 233 | static struct file_system_type nfs_fs_type = { |
| @@ -232,7 +250,6 @@ static const struct super_operations nfs_sops = { | |||
| 232 | .alloc_inode = nfs_alloc_inode, | 250 | .alloc_inode = nfs_alloc_inode, |
| 233 | .destroy_inode = nfs_destroy_inode, | 251 | .destroy_inode = nfs_destroy_inode, |
| 234 | .write_inode = nfs_write_inode, | 252 | .write_inode = nfs_write_inode, |
| 235 | .put_super = nfs_put_super, | ||
| 236 | .statfs = nfs_statfs, | 253 | .statfs = nfs_statfs, |
| 237 | .clear_inode = nfs_clear_inode, | 254 | .clear_inode = nfs_clear_inode, |
| 238 | .umount_begin = nfs_umount_begin, | 255 | .umount_begin = nfs_umount_begin, |
| @@ -337,26 +354,20 @@ void __exit unregister_nfs_fs(void) | |||
| 337 | unregister_filesystem(&nfs_fs_type); | 354 | unregister_filesystem(&nfs_fs_type); |
| 338 | } | 355 | } |
| 339 | 356 | ||
| 340 | void nfs_sb_active(struct nfs_server *server) | 357 | void nfs_sb_active(struct super_block *sb) |
| 341 | { | 358 | { |
| 342 | atomic_inc(&server->active); | 359 | struct nfs_server *server = NFS_SB(sb); |
| 343 | } | ||
| 344 | 360 | ||
| 345 | void nfs_sb_deactive(struct nfs_server *server) | 361 | if (atomic_inc_return(&server->active) == 1) |
| 346 | { | 362 | atomic_inc(&sb->s_active); |
| 347 | if (atomic_dec_and_test(&server->active)) | ||
| 348 | wake_up(&server->active_wq); | ||
| 349 | } | 363 | } |
| 350 | 364 | ||
| 351 | static void nfs_put_super(struct super_block *sb) | 365 | void nfs_sb_deactive(struct super_block *sb) |
| 352 | { | 366 | { |
| 353 | struct nfs_server *server = NFS_SB(sb); | 367 | struct nfs_server *server = NFS_SB(sb); |
| 354 | /* | 368 | |
| 355 | * Make sure there are no outstanding ops to this server. | 369 | if (atomic_dec_and_test(&server->active)) |
| 356 | * If so, wait for them to finish before allowing the | 370 | deactivate_super(sb); |
| 357 | * unmount to continue. | ||
| 358 | */ | ||
| 359 | wait_event(server->active_wq, atomic_read(&server->active) == 0); | ||
| 360 | } | 371 | } |
| 361 | 372 | ||
| 362 | /* | 373 | /* |
| @@ -664,25 +675,6 @@ static void nfs_umount_begin(struct super_block *sb) | |||
| 664 | } | 675 | } |
| 665 | 676 | ||
| 666 | /* | 677 | /* |
| 667 | * Set the port number in an address. Be agnostic about the address family. | ||
| 668 | */ | ||
| 669 | static void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
| 670 | { | ||
| 671 | switch (sap->sa_family) { | ||
| 672 | case AF_INET: { | ||
| 673 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
| 674 | ap->sin_port = htons(port); | ||
| 675 | break; | ||
| 676 | } | ||
| 677 | case AF_INET6: { | ||
| 678 | struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; | ||
| 679 | ap->sin6_port = htons(port); | ||
| 680 | break; | ||
| 681 | } | ||
| 682 | } | ||
| 683 | } | ||
| 684 | |||
| 685 | /* | ||
| 686 | * Sanity-check a server address provided by the mount command. | 678 | * Sanity-check a server address provided by the mount command. |
| 687 | * | 679 | * |
| 688 | * Address family must be initialized, and address must not be | 680 | * Address family must be initialized, and address must not be |
| @@ -724,20 +716,22 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len, | |||
| 724 | *addr_len = 0; | 716 | *addr_len = 0; |
| 725 | } | 717 | } |
| 726 | 718 | ||
| 727 | #define IPV6_SCOPE_DELIMITER '%' | ||
| 728 | |||
| 729 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 719 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| 730 | static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | 720 | static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, |
| 731 | const char *delim, | 721 | const char *delim, |
| 732 | struct sockaddr_in6 *sin6) | 722 | struct sockaddr_in6 *sin6) |
| 733 | { | 723 | { |
| 734 | char *p; | 724 | char *p; |
| 735 | size_t len; | 725 | size_t len; |
| 736 | 726 | ||
| 737 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | 727 | if ((string + str_len) == delim) |
| 738 | return ; | 728 | return 1; |
| 729 | |||
| 739 | if (*delim != IPV6_SCOPE_DELIMITER) | 730 | if (*delim != IPV6_SCOPE_DELIMITER) |
| 740 | return; | 731 | return 0; |
| 732 | |||
| 733 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | ||
| 734 | return 0; | ||
| 741 | 735 | ||
| 742 | len = (string + str_len) - delim - 1; | 736 | len = (string + str_len) - delim - 1; |
| 743 | p = kstrndup(delim + 1, len, GFP_KERNEL); | 737 | p = kstrndup(delim + 1, len, GFP_KERNEL); |
| @@ -750,14 +744,20 @@ static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | |||
| 750 | scope_id = dev->ifindex; | 744 | scope_id = dev->ifindex; |
| 751 | dev_put(dev); | 745 | dev_put(dev); |
| 752 | } else { | 746 | } else { |
| 753 | /* scope_id is set to zero on error */ | 747 | if (strict_strtoul(p, 10, &scope_id) == 0) { |
| 754 | strict_strtoul(p, 10, &scope_id); | 748 | kfree(p); |
| 749 | return 0; | ||
| 750 | } | ||
| 755 | } | 751 | } |
| 756 | 752 | ||
| 757 | kfree(p); | 753 | kfree(p); |
| 754 | |||
| 758 | sin6->sin6_scope_id = scope_id; | 755 | sin6->sin6_scope_id = scope_id; |
| 759 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); | 756 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); |
| 757 | return 1; | ||
| 760 | } | 758 | } |
| 759 | |||
| 760 | return 0; | ||
| 761 | } | 761 | } |
| 762 | 762 | ||
| 763 | static void nfs_parse_ipv6_address(char *string, size_t str_len, | 763 | static void nfs_parse_ipv6_address(char *string, size_t str_len, |
| @@ -773,9 +773,11 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, | |||
| 773 | 773 | ||
| 774 | sin6->sin6_family = AF_INET6; | 774 | sin6->sin6_family = AF_INET6; |
| 775 | *addr_len = sizeof(*sin6); | 775 | *addr_len = sizeof(*sin6); |
| 776 | if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { | 776 | if (in6_pton(string, str_len, addr, |
| 777 | nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); | 777 | IPV6_SCOPE_DELIMITER, &delim) != 0) { |
| 778 | return; | 778 | if (nfs_parse_ipv6_scope_id(string, str_len, |
| 779 | delim, sin6) != 0) | ||
| 780 | return; | ||
| 779 | } | 781 | } |
| 780 | } | 782 | } |
| 781 | 783 | ||
| @@ -798,7 +800,7 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, | |||
| 798 | * If there is a problem constructing the new sockaddr, set the address | 800 | * If there is a problem constructing the new sockaddr, set the address |
| 799 | * family to AF_UNSPEC. | 801 | * family to AF_UNSPEC. |
| 800 | */ | 802 | */ |
| 801 | static void nfs_parse_ip_address(char *string, size_t str_len, | 803 | void nfs_parse_ip_address(char *string, size_t str_len, |
| 802 | struct sockaddr *sap, size_t *addr_len) | 804 | struct sockaddr *sap, size_t *addr_len) |
| 803 | { | 805 | { |
| 804 | unsigned int i, colons; | 806 | unsigned int i, colons; |
| @@ -1258,6 +1260,30 @@ static int nfs_parse_mount_options(char *raw, | |||
| 1258 | &mnt->mount_server.addrlen); | 1260 | &mnt->mount_server.addrlen); |
| 1259 | kfree(string); | 1261 | kfree(string); |
| 1260 | break; | 1262 | break; |
| 1263 | case Opt_lookupcache: | ||
| 1264 | string = match_strdup(args); | ||
| 1265 | if (string == NULL) | ||
| 1266 | goto out_nomem; | ||
| 1267 | token = match_token(string, | ||
| 1268 | nfs_lookupcache_tokens, args); | ||
| 1269 | kfree(string); | ||
| 1270 | switch (token) { | ||
| 1271 | case Opt_lookupcache_all: | ||
| 1272 | mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); | ||
| 1273 | break; | ||
| 1274 | case Opt_lookupcache_positive: | ||
| 1275 | mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; | ||
| 1276 | mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; | ||
| 1277 | break; | ||
| 1278 | case Opt_lookupcache_none: | ||
| 1279 | mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; | ||
| 1280 | break; | ||
| 1281 | default: | ||
| 1282 | errors++; | ||
| 1283 | dfprintk(MOUNT, "NFS: invalid " | ||
| 1284 | "lookupcache argument\n"); | ||
| 1285 | }; | ||
| 1286 | break; | ||
| 1261 | 1287 | ||
| 1262 | /* | 1288 | /* |
| 1263 | * Special options | 1289 | * Special options |
| @@ -1558,7 +1584,7 @@ static int nfs_validate_mount_data(void *options, | |||
| 1558 | * Translate to nfs_parsed_mount_data, which nfs_fill_super | 1584 | * Translate to nfs_parsed_mount_data, which nfs_fill_super |
| 1559 | * can deal with. | 1585 | * can deal with. |
| 1560 | */ | 1586 | */ |
| 1561 | args->flags = data->flags; | 1587 | args->flags = data->flags & NFS_MOUNT_FLAGMASK; |
| 1562 | args->rsize = data->rsize; | 1588 | args->rsize = data->rsize; |
| 1563 | args->wsize = data->wsize; | 1589 | args->wsize = data->wsize; |
| 1564 | args->timeo = data->timeo; | 1590 | args->timeo = data->timeo; |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index f089e5839d7d..ecc295347775 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
| @@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata) | |||
| 99 | 99 | ||
| 100 | nfs_dec_sillycount(data->dir); | 100 | nfs_dec_sillycount(data->dir); |
| 101 | nfs_free_unlinkdata(data); | 101 | nfs_free_unlinkdata(data); |
| 102 | nfs_sb_deactive(NFS_SB(sb)); | 102 | nfs_sb_deactive(sb); |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | static const struct rpc_call_ops nfs_unlink_ops = { | 105 | static const struct rpc_call_ops nfs_unlink_ops = { |
| @@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
| 118 | .rpc_message = &msg, | 118 | .rpc_message = &msg, |
| 119 | .callback_ops = &nfs_unlink_ops, | 119 | .callback_ops = &nfs_unlink_ops, |
| 120 | .callback_data = data, | 120 | .callback_data = data, |
| 121 | .workqueue = nfsiod_workqueue, | ||
| 121 | .flags = RPC_TASK_ASYNC, | 122 | .flags = RPC_TASK_ASYNC, |
| 122 | }; | 123 | }; |
| 123 | struct rpc_task *task; | 124 | struct rpc_task *task; |
| @@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
| 149 | nfs_dec_sillycount(dir); | 150 | nfs_dec_sillycount(dir); |
| 150 | return 0; | 151 | return 0; |
| 151 | } | 152 | } |
| 152 | nfs_sb_active(NFS_SERVER(dir)); | 153 | nfs_sb_active(dir->i_sb); |
| 153 | data->args.fh = NFS_FH(dir); | 154 | data->args.fh = NFS_FH(dir); |
| 154 | nfs_fattr_init(&data->res.dir_attr); | 155 | nfs_fattr_init(&data->res.dir_attr); |
| 155 | 156 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3229e217c773..9f9845859fc1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -1427,8 +1427,9 @@ static int nfs_write_mapping(struct address_space *mapping, int how) | |||
| 1427 | .bdi = mapping->backing_dev_info, | 1427 | .bdi = mapping->backing_dev_info, |
| 1428 | .sync_mode = WB_SYNC_NONE, | 1428 | .sync_mode = WB_SYNC_NONE, |
| 1429 | .nr_to_write = LONG_MAX, | 1429 | .nr_to_write = LONG_MAX, |
| 1430 | .range_start = 0, | ||
| 1431 | .range_end = LLONG_MAX, | ||
| 1430 | .for_writepages = 1, | 1432 | .for_writepages = 1, |
| 1431 | .range_cyclic = 1, | ||
| 1432 | }; | 1433 | }; |
| 1433 | int ret; | 1434 | int ret; |
| 1434 | 1435 | ||
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 78a5922a2f11..ac8d0233b05c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
| @@ -137,7 +137,7 @@ struct nfs_inode { | |||
| 137 | unsigned long attrtimeo_timestamp; | 137 | unsigned long attrtimeo_timestamp; |
| 138 | __u64 change_attr; /* v4 only */ | 138 | __u64 change_attr; /* v4 only */ |
| 139 | 139 | ||
| 140 | unsigned long last_updated; | 140 | unsigned long attr_gencount; |
| 141 | /* "Generation counter" for the attribute cache. This is | 141 | /* "Generation counter" for the attribute cache. This is |
| 142 | * bumped whenever we update the metadata on the | 142 | * bumped whenever we update the metadata on the |
| 143 | * server. | 143 | * server. |
| @@ -200,11 +200,10 @@ struct nfs_inode { | |||
| 200 | /* | 200 | /* |
| 201 | * Bit offsets in flags field | 201 | * Bit offsets in flags field |
| 202 | */ | 202 | */ |
| 203 | #define NFS_INO_REVALIDATING (0) /* revalidating attrs */ | 203 | #define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus */ |
| 204 | #define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ | 204 | #define NFS_INO_STALE (1) /* possible stale inode */ |
| 205 | #define NFS_INO_STALE (2) /* possible stale inode */ | 205 | #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ |
| 206 | #define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ | 206 | #define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ |
| 207 | #define NFS_INO_MOUNTPOINT (4) /* inode is remote mountpoint */ | ||
| 208 | 207 | ||
| 209 | static inline struct nfs_inode *NFS_I(const struct inode *inode) | 208 | static inline struct nfs_inode *NFS_I(const struct inode *inode) |
| 210 | { | 209 | { |
| @@ -345,15 +344,11 @@ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ct | |||
| 345 | extern void put_nfs_open_context(struct nfs_open_context *ctx); | 344 | extern void put_nfs_open_context(struct nfs_open_context *ctx); |
| 346 | extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); | 345 | extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); |
| 347 | extern u64 nfs_compat_user_ino64(u64 fileid); | 346 | extern u64 nfs_compat_user_ino64(u64 fileid); |
| 347 | extern void nfs_fattr_init(struct nfs_fattr *fattr); | ||
| 348 | 348 | ||
| 349 | /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ | 349 | /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ |
| 350 | extern __be32 root_nfs_parse_addr(char *name); /*__init*/ | 350 | extern __be32 root_nfs_parse_addr(char *name); /*__init*/ |
| 351 | 351 | extern unsigned long nfs_inc_attr_generation_counter(void); | |
| 352 | static inline void nfs_fattr_init(struct nfs_fattr *fattr) | ||
| 353 | { | ||
| 354 | fattr->valid = 0; | ||
| 355 | fattr->time_start = jiffies; | ||
| 356 | } | ||
| 357 | 352 | ||
| 358 | /* | 353 | /* |
| 359 | * linux/fs/nfs/file.c | 354 | * linux/fs/nfs/file.c |
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c9beacd16c00..4e477ae58699 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
| @@ -119,7 +119,6 @@ struct nfs_server { | |||
| 119 | void (*destroy)(struct nfs_server *); | 119 | void (*destroy)(struct nfs_server *); |
| 120 | 120 | ||
| 121 | atomic_t active; /* Keep trace of any activity to this server */ | 121 | atomic_t active; /* Keep trace of any activity to this server */ |
| 122 | wait_queue_head_t active_wq; /* Wait for any activity to stop */ | ||
| 123 | 122 | ||
| 124 | /* mountd-related mount options */ | 123 | /* mountd-related mount options */ |
| 125 | struct sockaddr_storage mountd_address; | 124 | struct sockaddr_storage mountd_address; |
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index df7c6b7a7ebb..6549a06ac16e 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h | |||
| @@ -65,4 +65,8 @@ struct nfs_mount_data { | |||
| 65 | #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ | 65 | #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ |
| 66 | #define NFS_MOUNT_FLAGMASK 0xFFFF | 66 | #define NFS_MOUNT_FLAGMASK 0xFFFF |
| 67 | 67 | ||
| 68 | /* The following are for internal use only */ | ||
| 69 | #define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000 | ||
| 70 | #define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000 | ||
| 71 | |||
| 68 | #endif | 72 | #endif |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8c77c11224d1..c1c31acb8a2b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
| @@ -36,6 +36,7 @@ struct nfs_fattr { | |||
| 36 | __u32 nlink; | 36 | __u32 nlink; |
| 37 | __u32 uid; | 37 | __u32 uid; |
| 38 | __u32 gid; | 38 | __u32 gid; |
| 39 | dev_t rdev; | ||
| 39 | __u64 size; | 40 | __u64 size; |
| 40 | union { | 41 | union { |
| 41 | struct { | 42 | struct { |
| @@ -46,7 +47,6 @@ struct nfs_fattr { | |||
| 46 | __u64 used; | 47 | __u64 used; |
| 47 | } nfs3; | 48 | } nfs3; |
| 48 | } du; | 49 | } du; |
| 49 | dev_t rdev; | ||
| 50 | struct nfs_fsid fsid; | 50 | struct nfs_fsid fsid; |
| 51 | __u64 fileid; | 51 | __u64 fileid; |
| 52 | struct timespec atime; | 52 | struct timespec atime; |
| @@ -56,6 +56,7 @@ struct nfs_fattr { | |||
| 56 | __u64 change_attr; /* NFSv4 change attribute */ | 56 | __u64 change_attr; /* NFSv4 change attribute */ |
| 57 | __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ | 57 | __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ |
| 58 | unsigned long time_start; | 58 | unsigned long time_start; |
| 59 | unsigned long gencount; | ||
| 59 | }; | 60 | }; |
| 60 | 61 | ||
| 61 | #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ | 62 | #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ |
| @@ -672,16 +673,16 @@ struct nfs4_rename_res { | |||
| 672 | struct nfs_fattr * new_fattr; | 673 | struct nfs_fattr * new_fattr; |
| 673 | }; | 674 | }; |
| 674 | 675 | ||
| 675 | #define NFS4_SETCLIENTID_NAMELEN (56) | 676 | #define NFS4_SETCLIENTID_NAMELEN (127) |
| 676 | struct nfs4_setclientid { | 677 | struct nfs4_setclientid { |
| 677 | const nfs4_verifier * sc_verifier; | 678 | const nfs4_verifier * sc_verifier; |
| 678 | unsigned int sc_name_len; | 679 | unsigned int sc_name_len; |
| 679 | char sc_name[NFS4_SETCLIENTID_NAMELEN]; | 680 | char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; |
| 680 | u32 sc_prog; | 681 | u32 sc_prog; |
| 681 | unsigned int sc_netid_len; | 682 | unsigned int sc_netid_len; |
| 682 | char sc_netid[RPCBIND_MAXNETIDLEN]; | 683 | char sc_netid[RPCBIND_MAXNETIDLEN + 1]; |
| 683 | unsigned int sc_uaddr_len; | 684 | unsigned int sc_uaddr_len; |
| 684 | char sc_uaddr[RPCBIND_MAXUADDRLEN]; | 685 | char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; |
| 685 | u32 sc_cb_ident; | 686 | u32 sc_cb_ident; |
| 686 | }; | 687 | }; |
| 687 | 688 | ||
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 4de56b1d372b..54a379c9e8eb 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h | |||
| @@ -66,9 +66,6 @@ | |||
| 66 | 66 | ||
| 67 | #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ | 67 | #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ |
| 68 | 68 | ||
| 69 | #define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */ | ||
| 70 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | ||
| 71 | |||
| 72 | /* memory registration strategies */ | 69 | /* memory registration strategies */ |
| 73 | #define RPCRDMA_PERSISTENT_REGISTRATION (1) | 70 | #define RPCRDMA_PERSISTENT_REGISTRATION (1) |
| 74 | 71 | ||
| @@ -78,6 +75,7 @@ enum rpcrdma_memreg { | |||
| 78 | RPCRDMA_MEMWINDOWS, | 75 | RPCRDMA_MEMWINDOWS, |
| 79 | RPCRDMA_MEMWINDOWS_ASYNC, | 76 | RPCRDMA_MEMWINDOWS_ASYNC, |
| 80 | RPCRDMA_MTHCAFMR, | 77 | RPCRDMA_MTHCAFMR, |
| 78 | RPCRDMA_FRMR, | ||
| 81 | RPCRDMA_ALLPHYSICAL, | 79 | RPCRDMA_ALLPHYSICAL, |
| 82 | RPCRDMA_LAST | 80 | RPCRDMA_LAST |
| 83 | }; | 81 | }; |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index da0789fa1b88..4895c341e46d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
| @@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru | |||
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | /* save the nodename */ | 215 | /* save the nodename */ |
| 216 | clnt->cl_nodelen = strlen(utsname()->nodename); | 216 | clnt->cl_nodelen = strlen(init_utsname()->nodename); |
| 217 | if (clnt->cl_nodelen > UNX_MAXNODENAME) | 217 | if (clnt->cl_nodelen > UNX_MAXNODENAME) |
| 218 | clnt->cl_nodelen = UNX_MAXNODENAME; | 218 | clnt->cl_nodelen = UNX_MAXNODENAME; |
| 219 | memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); | 219 | memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen); |
| 220 | rpc_register_client(clnt); | 220 | rpc_register_client(clnt); |
| 221 | return clnt; | 221 | return clnt; |
| 222 | 222 | ||
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 34abc91058d8..41013dd66ac3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
| @@ -460,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi | |||
| 460 | return rpc_run_task(&task_setup_data); | 460 | return rpc_run_task(&task_setup_data); |
| 461 | } | 461 | } |
| 462 | 462 | ||
| 463 | /* | ||
| 464 | * In the case where rpc clients have been cloned, we want to make | ||
| 465 | * sure that we use the program number/version etc of the actual | ||
| 466 | * owner of the xprt. To do so, we walk back up the tree of parents | ||
| 467 | * to find whoever created the transport and/or whoever has the | ||
| 468 | * autobind flag set. | ||
| 469 | */ | ||
| 470 | static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) | ||
| 471 | { | ||
| 472 | struct rpc_clnt *parent = clnt->cl_parent; | ||
| 473 | |||
| 474 | while (parent != clnt) { | ||
| 475 | if (parent->cl_xprt != clnt->cl_xprt) | ||
| 476 | break; | ||
| 477 | if (clnt->cl_autobind) | ||
| 478 | break; | ||
| 479 | clnt = parent; | ||
| 480 | parent = parent->cl_parent; | ||
| 481 | } | ||
| 482 | return clnt; | ||
| 483 | } | ||
| 484 | |||
| 463 | /** | 485 | /** |
| 464 | * rpcb_getport_async - obtain the port for a given RPC service on a given host | 486 | * rpcb_getport_async - obtain the port for a given RPC service on a given host |
| 465 | * @task: task that is waiting for portmapper request | 487 | * @task: task that is waiting for portmapper request |
| @@ -469,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi | |||
| 469 | */ | 491 | */ |
| 470 | void rpcb_getport_async(struct rpc_task *task) | 492 | void rpcb_getport_async(struct rpc_task *task) |
| 471 | { | 493 | { |
| 472 | struct rpc_clnt *clnt = task->tk_client; | 494 | struct rpc_clnt *clnt; |
| 473 | struct rpc_procinfo *proc; | 495 | struct rpc_procinfo *proc; |
| 474 | u32 bind_version; | 496 | u32 bind_version; |
| 475 | struct rpc_xprt *xprt = task->tk_xprt; | 497 | struct rpc_xprt *xprt; |
| 476 | struct rpc_clnt *rpcb_clnt; | 498 | struct rpc_clnt *rpcb_clnt; |
| 477 | static struct rpcbind_args *map; | 499 | static struct rpcbind_args *map; |
| 478 | struct rpc_task *child; | 500 | struct rpc_task *child; |
| @@ -481,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task) | |||
| 481 | size_t salen; | 503 | size_t salen; |
| 482 | int status; | 504 | int status; |
| 483 | 505 | ||
| 506 | clnt = rpcb_find_transport_owner(task->tk_client); | ||
| 507 | xprt = clnt->cl_xprt; | ||
| 508 | |||
| 484 | dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", | 509 | dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", |
| 485 | task->tk_pid, __func__, | 510 | task->tk_pid, __func__, |
| 486 | clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); | 511 | clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); |
| 487 | 512 | ||
| 488 | /* Autobind on cloned rpc clients is discouraged */ | ||
| 489 | BUG_ON(clnt->cl_parent != clnt); | ||
| 490 | |||
| 491 | /* Put self on the wait queue to ensure we get notified if | 513 | /* Put self on the wait queue to ensure we get notified if |
| 492 | * some other task is already attempting to bind the port */ | 514 | * some other task is already attempting to bind the port */ |
| 493 | rpc_sleep_on(&xprt->binding, task, NULL); | 515 | rpc_sleep_on(&xprt->binding, task, NULL); |
| @@ -549,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task) | |||
| 549 | status = -ENOMEM; | 571 | status = -ENOMEM; |
| 550 | dprintk("RPC: %5u %s: no memory available\n", | 572 | dprintk("RPC: %5u %s: no memory available\n", |
| 551 | task->tk_pid, __func__); | 573 | task->tk_pid, __func__); |
| 552 | goto bailout_nofree; | 574 | goto bailout_release_client; |
| 553 | } | 575 | } |
| 554 | map->r_prog = clnt->cl_prog; | 576 | map->r_prog = clnt->cl_prog; |
| 555 | map->r_vers = clnt->cl_vers; | 577 | map->r_vers = clnt->cl_vers; |
| @@ -569,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task) | |||
| 569 | task->tk_pid, __func__); | 591 | task->tk_pid, __func__); |
| 570 | return; | 592 | return; |
| 571 | } | 593 | } |
| 572 | rpc_put_task(child); | ||
| 573 | 594 | ||
| 574 | task->tk_xprt->stat.bind_count++; | 595 | xprt->stat.bind_count++; |
| 596 | rpc_put_task(child); | ||
| 575 | return; | 597 | return; |
| 576 | 598 | ||
| 599 | bailout_release_client: | ||
| 600 | rpc_release_client(rpcb_clnt); | ||
| 577 | bailout_nofree: | 601 | bailout_nofree: |
| 578 | rpcb_wake_rpcbind_waiters(xprt, status); | 602 | rpcb_wake_rpcbind_waiters(xprt, status); |
| 579 | task->tk_status = status; | 603 | task->tk_status = status; |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 99a52aabe332..29e401bb612e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
| @@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport) | |||
| 108 | goto out; | 108 | goto out; |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | result = -EINVAL; | 111 | list_add_tail(&transport->list, &xprt_list); |
| 112 | if (try_module_get(THIS_MODULE)) { | 112 | printk(KERN_INFO "RPC: Registered %s transport module.\n", |
| 113 | list_add_tail(&transport->list, &xprt_list); | 113 | transport->name); |
| 114 | printk(KERN_INFO "RPC: Registered %s transport module.\n", | 114 | result = 0; |
| 115 | transport->name); | ||
| 116 | result = 0; | ||
| 117 | } | ||
| 118 | 115 | ||
| 119 | out: | 116 | out: |
| 120 | spin_unlock(&xprt_list_lock); | 117 | spin_unlock(&xprt_list_lock); |
| @@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport) | |||
| 143 | "RPC: Unregistered %s transport module.\n", | 140 | "RPC: Unregistered %s transport module.\n", |
| 144 | transport->name); | 141 | transport->name); |
| 145 | list_del_init(&transport->list); | 142 | list_del_init(&transport->list); |
| 146 | module_put(THIS_MODULE); | ||
| 147 | goto out; | 143 | goto out; |
| 148 | } | 144 | } |
| 149 | } | 145 | } |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 5c1954d28d09..14106d26bb95 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
| @@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
| 118 | } | 118 | } |
| 119 | 119 | ||
| 120 | if (xdrbuf->tail[0].iov_len) { | 120 | if (xdrbuf->tail[0].iov_len) { |
| 121 | /* the rpcrdma protocol allows us to omit any trailing | ||
| 122 | * xdr pad bytes, saving the server an RDMA operation. */ | ||
| 123 | if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) | ||
| 124 | return n; | ||
| 121 | if (n == nsegs) | 125 | if (n == nsegs) |
| 122 | return 0; | 126 | return 0; |
| 123 | seg[n].mr_page = NULL; | 127 | seg[n].mr_page = NULL; |
| @@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 508 | if (hdrlen == 0) | 512 | if (hdrlen == 0) |
| 509 | return -1; | 513 | return -1; |
| 510 | 514 | ||
| 511 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" | 515 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
| 512 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 516 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
| 513 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, | 517 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, |
| 514 | headerp, base, req->rl_iov.lkey); | 518 | headerp, base, req->rl_iov.lkey); |
| 515 | 519 | ||
| @@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b | |||
| 594 | * Scatter inline received data back into provided iov's. | 598 | * Scatter inline received data back into provided iov's. |
| 595 | */ | 599 | */ |
| 596 | static void | 600 | static void |
| 597 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) | 601 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) |
| 598 | { | 602 | { |
| 599 | int i, npages, curlen, olen; | 603 | int i, npages, curlen, olen; |
| 600 | char *destp; | 604 | char *destp; |
| @@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) | |||
| 660 | } else | 664 | } else |
| 661 | rqst->rq_rcv_buf.tail[0].iov_len = 0; | 665 | rqst->rq_rcv_buf.tail[0].iov_len = 0; |
| 662 | 666 | ||
| 667 | if (pad) { | ||
| 668 | /* implicit padding on terminal chunk */ | ||
| 669 | unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base; | ||
| 670 | while (pad--) | ||
| 671 | p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0; | ||
| 672 | } | ||
| 673 | |||
| 663 | if (copy_len) | 674 | if (copy_len) |
| 664 | dprintk("RPC: %s: %d bytes in" | 675 | dprintk("RPC: %s: %d bytes in" |
| 665 | " %d extra segments (%d lost)\n", | 676 | " %d extra segments (%d lost)\n", |
| @@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) | |||
| 681 | struct rpc_xprt *xprt = ep->rep_xprt; | 692 | struct rpc_xprt *xprt = ep->rep_xprt; |
| 682 | 693 | ||
| 683 | spin_lock_bh(&xprt->transport_lock); | 694 | spin_lock_bh(&xprt->transport_lock); |
| 695 | if (++xprt->connect_cookie == 0) /* maintain a reserved value */ | ||
| 696 | ++xprt->connect_cookie; | ||
| 684 | if (ep->rep_connected > 0) { | 697 | if (ep->rep_connected > 0) { |
| 685 | if (!xprt_test_and_set_connected(xprt)) | 698 | if (!xprt_test_and_set_connected(xprt)) |
| 686 | xprt_wake_pending_tasks(xprt, 0); | 699 | xprt_wake_pending_tasks(xprt, 0); |
| 687 | } else { | 700 | } else { |
| 688 | if (xprt_test_and_clear_connected(xprt)) | 701 | if (xprt_test_and_clear_connected(xprt)) |
| 689 | xprt_wake_pending_tasks(xprt, ep->rep_connected); | 702 | xprt_wake_pending_tasks(xprt, -ENOTCONN); |
| 690 | } | 703 | } |
| 691 | spin_unlock_bh(&xprt->transport_lock); | 704 | spin_unlock_bh(&xprt->transport_lock); |
| 692 | } | 705 | } |
| @@ -792,14 +805,20 @@ repost: | |||
| 792 | ((unsigned char *)iptr - (unsigned char *)headerp); | 805 | ((unsigned char *)iptr - (unsigned char *)headerp); |
| 793 | status = rep->rr_len + rdmalen; | 806 | status = rep->rr_len + rdmalen; |
| 794 | r_xprt->rx_stats.total_rdma_reply += rdmalen; | 807 | r_xprt->rx_stats.total_rdma_reply += rdmalen; |
| 808 | /* special case - last chunk may omit padding */ | ||
| 809 | if (rdmalen &= 3) { | ||
| 810 | rdmalen = 4 - rdmalen; | ||
| 811 | status += rdmalen; | ||
| 812 | } | ||
| 795 | } else { | 813 | } else { |
| 796 | /* else ordinary inline */ | 814 | /* else ordinary inline */ |
| 815 | rdmalen = 0; | ||
| 797 | iptr = (__be32 *)((unsigned char *)headerp + 28); | 816 | iptr = (__be32 *)((unsigned char *)headerp + 28); |
| 798 | rep->rr_len -= 28; /*sizeof *headerp;*/ | 817 | rep->rr_len -= 28; /*sizeof *headerp;*/ |
| 799 | status = rep->rr_len; | 818 | status = rep->rr_len; |
| 800 | } | 819 | } |
| 801 | /* Fix up the rpc results for upper layer */ | 820 | /* Fix up the rpc results for upper layer */ |
| 802 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); | 821 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); |
| 803 | break; | 822 | break; |
| 804 | 823 | ||
| 805 | case htonl(RDMA_NOMSG): | 824 | case htonl(RDMA_NOMSG): |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a564c1a39ec5..9839c3d94145 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
| @@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | |||
| 70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | 70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
| 71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
| 72 | static unsigned int xprt_rdma_inline_write_padding; | 72 | static unsigned int xprt_rdma_inline_write_padding; |
| 73 | #if !RPCRDMA_PERSISTENT_REGISTRATION | 73 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; |
| 74 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ | 74 | int xprt_rdma_pad_optimize = 0; |
| 75 | #else | ||
| 76 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; | ||
| 77 | #endif | ||
| 78 | 75 | ||
| 79 | #ifdef RPC_DEBUG | 76 | #ifdef RPC_DEBUG |
| 80 | 77 | ||
| @@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = { | |||
| 140 | .extra2 = &max_memreg, | 137 | .extra2 = &max_memreg, |
| 141 | }, | 138 | }, |
| 142 | { | 139 | { |
| 140 | .ctl_name = CTL_UNNUMBERED, | ||
| 141 | .procname = "rdma_pad_optimize", | ||
| 142 | .data = &xprt_rdma_pad_optimize, | ||
| 143 | .maxlen = sizeof(unsigned int), | ||
| 144 | .mode = 0644, | ||
| 145 | .proc_handler = &proc_dointvec, | ||
| 146 | }, | ||
| 147 | { | ||
| 143 | .ctl_name = 0, | 148 | .ctl_name = 0, |
| 144 | }, | 149 | }, |
| 145 | }; | 150 | }; |
| @@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt) | |||
| 458 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 463 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 459 | 464 | ||
| 460 | dprintk("RPC: %s: closing\n", __func__); | 465 | dprintk("RPC: %s: closing\n", __func__); |
| 466 | if (r_xprt->rx_ep.rep_connected > 0) | ||
| 467 | xprt->reestablish_timeout = 0; | ||
| 461 | xprt_disconnect_done(xprt); | 468 | xprt_disconnect_done(xprt); |
| 462 | (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); | 469 | (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); |
| 463 | } | 470 | } |
| @@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task) | |||
| 485 | /* Reconnect */ | 492 | /* Reconnect */ |
| 486 | schedule_delayed_work(&r_xprt->rdma_connect, | 493 | schedule_delayed_work(&r_xprt->rdma_connect, |
| 487 | xprt->reestablish_timeout); | 494 | xprt->reestablish_timeout); |
| 495 | xprt->reestablish_timeout <<= 1; | ||
| 496 | if (xprt->reestablish_timeout > (30 * HZ)) | ||
| 497 | xprt->reestablish_timeout = (30 * HZ); | ||
| 498 | else if (xprt->reestablish_timeout < (5 * HZ)) | ||
| 499 | xprt->reestablish_timeout = (5 * HZ); | ||
| 488 | } else { | 500 | } else { |
| 489 | schedule_delayed_work(&r_xprt->rdma_connect, 0); | 501 | schedule_delayed_work(&r_xprt->rdma_connect, 0); |
| 490 | if (!RPC_IS_ASYNC(task)) | 502 | if (!RPC_IS_ASYNC(task)) |
| @@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
| 591 | } | 603 | } |
| 592 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); | 604 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); |
| 593 | out: | 605 | out: |
| 606 | req->rl_connect_cookie = 0; /* our reserved value */ | ||
| 594 | return req->rl_xdr_buf; | 607 | return req->rl_xdr_buf; |
| 595 | 608 | ||
| 596 | outfail: | 609 | outfail: |
| @@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
| 694 | req->rl_reply->rr_xprt = xprt; | 707 | req->rl_reply->rr_xprt = xprt; |
| 695 | } | 708 | } |
| 696 | 709 | ||
| 697 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { | 710 | /* Must suppress retransmit to maintain credits */ |
| 698 | xprt_disconnect_done(xprt); | 711 | if (req->rl_connect_cookie == xprt->connect_cookie) |
| 699 | return -ENOTCONN; /* implies disconnect */ | 712 | goto drop_connection; |
| 700 | } | 713 | req->rl_connect_cookie = xprt->connect_cookie; |
| 714 | |||
| 715 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | ||
| 716 | goto drop_connection; | ||
| 701 | 717 | ||
| 718 | task->tk_bytes_sent += rqst->rq_snd_buf.len; | ||
| 702 | rqst->rq_bytes_sent = 0; | 719 | rqst->rq_bytes_sent = 0; |
| 703 | return 0; | 720 | return 0; |
| 721 | |||
| 722 | drop_connection: | ||
| 723 | xprt_disconnect_done(xprt); | ||
| 724 | return -ENOTCONN; /* implies disconnect */ | ||
| 704 | } | 725 | } |
| 705 | 726 | ||
| 706 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | 727 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
| @@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void) | |||
| 770 | { | 791 | { |
| 771 | int rc; | 792 | int rc; |
| 772 | 793 | ||
| 773 | dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); | 794 | dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); |
| 774 | #ifdef RPC_DEBUG | 795 | #ifdef RPC_DEBUG |
| 775 | if (sunrpc_table_header) { | 796 | if (sunrpc_table_header) { |
| 776 | unregister_sysctl_table(sunrpc_table_header); | 797 | unregister_sysctl_table(sunrpc_table_header); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8ea283ecc522..a5fef5e6c323 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
| @@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
| 284 | switch (event->event) { | 284 | switch (event->event) { |
| 285 | case RDMA_CM_EVENT_ADDR_RESOLVED: | 285 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
| 286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | 286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
| 287 | ia->ri_async_rc = 0; | ||
| 287 | complete(&ia->ri_done); | 288 | complete(&ia->ri_done); |
| 288 | break; | 289 | break; |
| 289 | case RDMA_CM_EVENT_ADDR_ERROR: | 290 | case RDMA_CM_EVENT_ADDR_ERROR: |
| @@ -338,13 +339,32 @@ connected: | |||
| 338 | wake_up_all(&ep->rep_connect_wait); | 339 | wake_up_all(&ep->rep_connect_wait); |
| 339 | break; | 340 | break; |
| 340 | default: | 341 | default: |
| 341 | ia->ri_async_rc = -EINVAL; | 342 | dprintk("RPC: %s: unexpected CM event %d\n", |
| 342 | dprintk("RPC: %s: unexpected CM event %X\n", | ||
| 343 | __func__, event->event); | 343 | __func__, event->event); |
| 344 | complete(&ia->ri_done); | ||
| 345 | break; | 344 | break; |
| 346 | } | 345 | } |
| 347 | 346 | ||
| 347 | #ifdef RPC_DEBUG | ||
| 348 | if (connstate == 1) { | ||
| 349 | int ird = attr.max_dest_rd_atomic; | ||
| 350 | int tird = ep->rep_remote_cma.responder_resources; | ||
| 351 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
| 352 | "on %s, memreg %d slots %d ird %d%s\n", | ||
| 353 | NIPQUAD(addr->sin_addr.s_addr), | ||
| 354 | ntohs(addr->sin_port), | ||
| 355 | ia->ri_id->device->name, | ||
| 356 | ia->ri_memreg_strategy, | ||
| 357 | xprt->rx_buf.rb_max_requests, | ||
| 358 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | ||
| 359 | } else if (connstate < 0) { | ||
| 360 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
| 361 | "closed (%d)\n", | ||
| 362 | NIPQUAD(addr->sin_addr.s_addr), | ||
| 363 | ntohs(addr->sin_port), | ||
| 364 | connstate); | ||
| 365 | } | ||
| 366 | #endif | ||
| 367 | |||
| 348 | return 0; | 368 | return 0; |
| 349 | } | 369 | } |
| 350 | 370 | ||
| @@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
| 355 | struct rdma_cm_id *id; | 375 | struct rdma_cm_id *id; |
| 356 | int rc; | 376 | int rc; |
| 357 | 377 | ||
| 378 | init_completion(&ia->ri_done); | ||
| 379 | |||
| 358 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); | 380 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); |
| 359 | if (IS_ERR(id)) { | 381 | if (IS_ERR(id)) { |
| 360 | rc = PTR_ERR(id); | 382 | rc = PTR_ERR(id); |
| @@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
| 363 | return id; | 385 | return id; |
| 364 | } | 386 | } |
| 365 | 387 | ||
| 366 | ia->ri_async_rc = 0; | 388 | ia->ri_async_rc = -ETIMEDOUT; |
| 367 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); | 389 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); |
| 368 | if (rc) { | 390 | if (rc) { |
| 369 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", | 391 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", |
| 370 | __func__, rc); | 392 | __func__, rc); |
| 371 | goto out; | 393 | goto out; |
| 372 | } | 394 | } |
| 373 | wait_for_completion(&ia->ri_done); | 395 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
| 396 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
| 374 | rc = ia->ri_async_rc; | 397 | rc = ia->ri_async_rc; |
| 375 | if (rc) | 398 | if (rc) |
| 376 | goto out; | 399 | goto out; |
| 377 | 400 | ||
| 378 | ia->ri_async_rc = 0; | 401 | ia->ri_async_rc = -ETIMEDOUT; |
| 379 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); | 402 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); |
| 380 | if (rc) { | 403 | if (rc) { |
| 381 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", | 404 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", |
| 382 | __func__, rc); | 405 | __func__, rc); |
| 383 | goto out; | 406 | goto out; |
| 384 | } | 407 | } |
| 385 | wait_for_completion(&ia->ri_done); | 408 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
| 409 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
| 386 | rc = ia->ri_async_rc; | 410 | rc = ia->ri_async_rc; |
| 387 | if (rc) | 411 | if (rc) |
| 388 | goto out; | 412 | goto out; |
| @@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq) | |||
| 423 | int | 447 | int |
| 424 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | 448 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) |
| 425 | { | 449 | { |
| 426 | int rc; | 450 | int rc, mem_priv; |
| 451 | struct ib_device_attr devattr; | ||
| 427 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 452 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
| 428 | 453 | ||
| 429 | init_completion(&ia->ri_done); | ||
| 430 | |||
| 431 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); | 454 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); |
| 432 | if (IS_ERR(ia->ri_id)) { | 455 | if (IS_ERR(ia->ri_id)) { |
| 433 | rc = PTR_ERR(ia->ri_id); | 456 | rc = PTR_ERR(ia->ri_id); |
| @@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 443 | } | 466 | } |
| 444 | 467 | ||
| 445 | /* | 468 | /* |
| 469 | * Query the device to determine if the requested memory | ||
| 470 | * registration strategy is supported. If it isn't, set the | ||
| 471 | * strategy to a globally supported model. | ||
| 472 | */ | ||
| 473 | rc = ib_query_device(ia->ri_id->device, &devattr); | ||
| 474 | if (rc) { | ||
| 475 | dprintk("RPC: %s: ib_query_device failed %d\n", | ||
| 476 | __func__, rc); | ||
| 477 | goto out2; | ||
| 478 | } | ||
| 479 | |||
| 480 | if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { | ||
| 481 | ia->ri_have_dma_lkey = 1; | ||
| 482 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; | ||
| 483 | } | ||
| 484 | |||
| 485 | switch (memreg) { | ||
| 486 | case RPCRDMA_MEMWINDOWS: | ||
| 487 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 488 | if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { | ||
| 489 | dprintk("RPC: %s: MEMWINDOWS registration " | ||
| 490 | "specified but not supported by adapter, " | ||
| 491 | "using slower RPCRDMA_REGISTER\n", | ||
| 492 | __func__); | ||
| 493 | memreg = RPCRDMA_REGISTER; | ||
| 494 | } | ||
| 495 | break; | ||
| 496 | case RPCRDMA_MTHCAFMR: | ||
| 497 | if (!ia->ri_id->device->alloc_fmr) { | ||
| 498 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 499 | dprintk("RPC: %s: MTHCAFMR registration " | ||
| 500 | "specified but not supported by adapter, " | ||
| 501 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
| 502 | __func__); | ||
| 503 | memreg = RPCRDMA_ALLPHYSICAL; | ||
| 504 | #else | ||
| 505 | dprintk("RPC: %s: MTHCAFMR registration " | ||
| 506 | "specified but not supported by adapter, " | ||
| 507 | "using slower RPCRDMA_REGISTER\n", | ||
| 508 | __func__); | ||
| 509 | memreg = RPCRDMA_REGISTER; | ||
| 510 | #endif | ||
| 511 | } | ||
| 512 | break; | ||
| 513 | case RPCRDMA_FRMR: | ||
| 514 | /* Requires both frmr reg and local dma lkey */ | ||
| 515 | if ((devattr.device_cap_flags & | ||
| 516 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | ||
| 517 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
| 518 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 519 | dprintk("RPC: %s: FRMR registration " | ||
| 520 | "specified but not supported by adapter, " | ||
| 521 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
| 522 | __func__); | ||
| 523 | memreg = RPCRDMA_ALLPHYSICAL; | ||
| 524 | #else | ||
| 525 | dprintk("RPC: %s: FRMR registration " | ||
| 526 | "specified but not supported by adapter, " | ||
| 527 | "using slower RPCRDMA_REGISTER\n", | ||
| 528 | __func__); | ||
| 529 | memreg = RPCRDMA_REGISTER; | ||
| 530 | #endif | ||
| 531 | } | ||
| 532 | break; | ||
| 533 | } | ||
| 534 | |||
| 535 | /* | ||
| 446 | * Optionally obtain an underlying physical identity mapping in | 536 | * Optionally obtain an underlying physical identity mapping in |
| 447 | * order to do a memory window-based bind. This base registration | 537 | * order to do a memory window-based bind. This base registration |
| 448 | * is protected from remote access - that is enabled only by binding | 538 | * is protected from remote access - that is enabled only by binding |
| @@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 450 | * revoked after the corresponding completion similar to a storage | 540 | * revoked after the corresponding completion similar to a storage |
| 451 | * adapter. | 541 | * adapter. |
| 452 | */ | 542 | */ |
| 453 | if (memreg > RPCRDMA_REGISTER) { | 543 | switch (memreg) { |
| 454 | int mem_priv = IB_ACCESS_LOCAL_WRITE; | 544 | case RPCRDMA_BOUNCEBUFFERS: |
| 455 | switch (memreg) { | 545 | case RPCRDMA_REGISTER: |
| 546 | case RPCRDMA_FRMR: | ||
| 547 | break; | ||
| 456 | #if RPCRDMA_PERSISTENT_REGISTRATION | 548 | #if RPCRDMA_PERSISTENT_REGISTRATION |
| 457 | case RPCRDMA_ALLPHYSICAL: | 549 | case RPCRDMA_ALLPHYSICAL: |
| 458 | mem_priv |= IB_ACCESS_REMOTE_WRITE; | 550 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
| 459 | mem_priv |= IB_ACCESS_REMOTE_READ; | 551 | IB_ACCESS_REMOTE_WRITE | |
| 460 | break; | 552 | IB_ACCESS_REMOTE_READ; |
| 553 | goto register_setup; | ||
| 461 | #endif | 554 | #endif |
| 462 | case RPCRDMA_MEMWINDOWS_ASYNC: | 555 | case RPCRDMA_MEMWINDOWS_ASYNC: |
| 463 | case RPCRDMA_MEMWINDOWS: | 556 | case RPCRDMA_MEMWINDOWS: |
| 464 | mem_priv |= IB_ACCESS_MW_BIND; | 557 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
| 465 | break; | 558 | IB_ACCESS_MW_BIND; |
| 466 | default: | 559 | goto register_setup; |
| 560 | case RPCRDMA_MTHCAFMR: | ||
| 561 | if (ia->ri_have_dma_lkey) | ||
| 467 | break; | 562 | break; |
| 468 | } | 563 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
| 564 | register_setup: | ||
| 469 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); | 565 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); |
| 470 | if (IS_ERR(ia->ri_bind_mem)) { | 566 | if (IS_ERR(ia->ri_bind_mem)) { |
| 471 | printk(KERN_ALERT "%s: ib_get_dma_mr for " | 567 | printk(KERN_ALERT "%s: ib_get_dma_mr for " |
| @@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 475 | memreg = RPCRDMA_REGISTER; | 571 | memreg = RPCRDMA_REGISTER; |
| 476 | ia->ri_bind_mem = NULL; | 572 | ia->ri_bind_mem = NULL; |
| 477 | } | 573 | } |
| 574 | break; | ||
| 575 | default: | ||
| 576 | printk(KERN_ERR "%s: invalid memory registration mode %d\n", | ||
| 577 | __func__, memreg); | ||
| 578 | rc = -EINVAL; | ||
| 579 | goto out2; | ||
| 478 | } | 580 | } |
| 581 | dprintk("RPC: %s: memory registration strategy is %d\n", | ||
| 582 | __func__, memreg); | ||
| 479 | 583 | ||
| 480 | /* Else will do memory reg/dereg for each chunk */ | 584 | /* Else will do memory reg/dereg for each chunk */ |
| 481 | ia->ri_memreg_strategy = memreg; | 585 | ia->ri_memreg_strategy = memreg; |
| @@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 483 | return 0; | 587 | return 0; |
| 484 | out2: | 588 | out2: |
| 485 | rdma_destroy_id(ia->ri_id); | 589 | rdma_destroy_id(ia->ri_id); |
| 590 | ia->ri_id = NULL; | ||
| 486 | out1: | 591 | out1: |
| 487 | return rc; | 592 | return rc; |
| 488 | } | 593 | } |
| @@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) | |||
| 503 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", | 608 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", |
| 504 | __func__, rc); | 609 | __func__, rc); |
| 505 | } | 610 | } |
| 506 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) | 611 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
| 507 | rdma_destroy_qp(ia->ri_id); | 612 | if (ia->ri_id->qp) |
| 613 | rdma_destroy_qp(ia->ri_id); | ||
| 614 | rdma_destroy_id(ia->ri_id); | ||
| 615 | ia->ri_id = NULL; | ||
| 616 | } | ||
| 508 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { | 617 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { |
| 509 | rc = ib_dealloc_pd(ia->ri_pd); | 618 | rc = ib_dealloc_pd(ia->ri_pd); |
| 510 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", | 619 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", |
| 511 | __func__, rc); | 620 | __func__, rc); |
| 512 | } | 621 | } |
| 513 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) | ||
| 514 | rdma_destroy_id(ia->ri_id); | ||
| 515 | } | 622 | } |
| 516 | 623 | ||
| 517 | /* | 624 | /* |
| @@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 541 | ep->rep_attr.srq = NULL; | 648 | ep->rep_attr.srq = NULL; |
| 542 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 649 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
| 543 | switch (ia->ri_memreg_strategy) { | 650 | switch (ia->ri_memreg_strategy) { |
| 651 | case RPCRDMA_FRMR: | ||
| 652 | /* Add room for frmr register and invalidate WRs */ | ||
| 653 | ep->rep_attr.cap.max_send_wr *= 3; | ||
| 654 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
| 655 | return -EINVAL; | ||
| 656 | break; | ||
| 544 | case RPCRDMA_MEMWINDOWS_ASYNC: | 657 | case RPCRDMA_MEMWINDOWS_ASYNC: |
| 545 | case RPCRDMA_MEMWINDOWS: | 658 | case RPCRDMA_MEMWINDOWS: |
| 546 | /* Add room for mw_binds+unbinds - overkill! */ | 659 | /* Add room for mw_binds+unbinds - overkill! */ |
| @@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 617 | ep->rep_remote_cma.private_data_len = 0; | 730 | ep->rep_remote_cma.private_data_len = 0; |
| 618 | 731 | ||
| 619 | /* Client offers RDMA Read but does not initiate */ | 732 | /* Client offers RDMA Read but does not initiate */ |
| 620 | switch (ia->ri_memreg_strategy) { | 733 | ep->rep_remote_cma.initiator_depth = 0; |
| 621 | case RPCRDMA_BOUNCEBUFFERS: | 734 | if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) |
| 622 | ep->rep_remote_cma.responder_resources = 0; | 735 | ep->rep_remote_cma.responder_resources = 0; |
| 623 | break; | 736 | else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ |
| 624 | case RPCRDMA_MTHCAFMR: | 737 | ep->rep_remote_cma.responder_resources = 32; |
| 625 | case RPCRDMA_REGISTER: | 738 | else |
| 626 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
| 627 | (RPCRDMA_MAX_DATA_SEGS / 8); | ||
| 628 | break; | ||
| 629 | case RPCRDMA_MEMWINDOWS: | ||
| 630 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 631 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 632 | case RPCRDMA_ALLPHYSICAL: | ||
| 633 | #endif | ||
| 634 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
| 635 | (RPCRDMA_MAX_DATA_SEGS / 2); | ||
| 636 | break; | ||
| 637 | default: | ||
| 638 | break; | ||
| 639 | } | ||
| 640 | if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) | ||
| 641 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; | 739 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; |
| 642 | ep->rep_remote_cma.initiator_depth = 0; | ||
| 643 | 740 | ||
| 644 | ep->rep_remote_cma.retry_count = 7; | 741 | ep->rep_remote_cma.retry_count = 7; |
| 645 | ep->rep_remote_cma.flow_control = 0; | 742 | ep->rep_remote_cma.flow_control = 0; |
| @@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 679 | if (rc) | 776 | if (rc) |
| 680 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | 777 | dprintk("RPC: %s: rpcrdma_ep_disconnect" |
| 681 | " returned %i\n", __func__, rc); | 778 | " returned %i\n", __func__, rc); |
| 779 | rdma_destroy_qp(ia->ri_id); | ||
| 780 | ia->ri_id->qp = NULL; | ||
| 682 | } | 781 | } |
| 683 | 782 | ||
| 684 | ep->rep_func = NULL; | ||
| 685 | |||
| 686 | /* padding - could be done in rpcrdma_buffer_destroy... */ | 783 | /* padding - could be done in rpcrdma_buffer_destroy... */ |
| 687 | if (ep->rep_pad_mr) { | 784 | if (ep->rep_pad_mr) { |
| 688 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); | 785 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); |
| 689 | ep->rep_pad_mr = NULL; | 786 | ep->rep_pad_mr = NULL; |
| 690 | } | 787 | } |
| 691 | 788 | ||
| 692 | if (ia->ri_id->qp) { | ||
| 693 | rdma_destroy_qp(ia->ri_id); | ||
| 694 | ia->ri_id->qp = NULL; | ||
| 695 | } | ||
| 696 | |||
| 697 | rpcrdma_clean_cq(ep->rep_cq); | 789 | rpcrdma_clean_cq(ep->rep_cq); |
| 698 | rc = ib_destroy_cq(ep->rep_cq); | 790 | rc = ib_destroy_cq(ep->rep_cq); |
| 699 | if (rc) | 791 | if (rc) |
| @@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 712 | struct rdma_cm_id *id; | 804 | struct rdma_cm_id *id; |
| 713 | int rc = 0; | 805 | int rc = 0; |
| 714 | int retry_count = 0; | 806 | int retry_count = 0; |
| 715 | int reconnect = (ep->rep_connected != 0); | ||
| 716 | 807 | ||
| 717 | if (reconnect) { | 808 | if (ep->rep_connected != 0) { |
| 718 | struct rpcrdma_xprt *xprt; | 809 | struct rpcrdma_xprt *xprt; |
| 719 | retry: | 810 | retry: |
| 720 | rc = rpcrdma_ep_disconnect(ep, ia); | 811 | rc = rpcrdma_ep_disconnect(ep, ia); |
| @@ -745,6 +836,7 @@ retry: | |||
| 745 | goto out; | 836 | goto out; |
| 746 | } | 837 | } |
| 747 | /* END TEMP */ | 838 | /* END TEMP */ |
| 839 | rdma_destroy_qp(ia->ri_id); | ||
| 748 | rdma_destroy_id(ia->ri_id); | 840 | rdma_destroy_id(ia->ri_id); |
| 749 | ia->ri_id = id; | 841 | ia->ri_id = id; |
| 750 | } | 842 | } |
| @@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
| 769 | } | 861 | } |
| 770 | } | 862 | } |
| 771 | 863 | ||
| 772 | /* Theoretically a client initiator_depth > 0 is not needed, | ||
| 773 | * but many peers fail to complete the connection unless they | ||
| 774 | * == responder_resources! */ | ||
| 775 | if (ep->rep_remote_cma.initiator_depth != | ||
| 776 | ep->rep_remote_cma.responder_resources) | ||
| 777 | ep->rep_remote_cma.initiator_depth = | ||
| 778 | ep->rep_remote_cma.responder_resources; | ||
| 779 | |||
| 780 | ep->rep_connected = 0; | 864 | ep->rep_connected = 0; |
| 781 | 865 | ||
| 782 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | 866 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
| @@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
| 786 | goto out; | 870 | goto out; |
| 787 | } | 871 | } |
| 788 | 872 | ||
| 789 | if (reconnect) | ||
| 790 | return 0; | ||
| 791 | |||
| 792 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | 873 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
| 793 | 874 | ||
| 794 | /* | 875 | /* |
| @@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
| 805 | if (ep->rep_connected <= 0) { | 886 | if (ep->rep_connected <= 0) { |
| 806 | /* Sometimes, the only way to reliably connect to remote | 887 | /* Sometimes, the only way to reliably connect to remote |
| 807 | * CMs is to use same nonzero values for ORD and IRD. */ | 888 | * CMs is to use same nonzero values for ORD and IRD. */ |
| 808 | ep->rep_remote_cma.initiator_depth = | 889 | if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && |
| 809 | ep->rep_remote_cma.responder_resources; | 890 | (ep->rep_remote_cma.responder_resources == 0 || |
| 810 | if (ep->rep_remote_cma.initiator_depth == 0) | 891 | ep->rep_remote_cma.initiator_depth != |
| 811 | ++ep->rep_remote_cma.initiator_depth; | 892 | ep->rep_remote_cma.responder_resources)) { |
| 812 | if (ep->rep_remote_cma.responder_resources == 0) | 893 | if (ep->rep_remote_cma.responder_resources == 0) |
| 813 | ++ep->rep_remote_cma.responder_resources; | 894 | ep->rep_remote_cma.responder_resources = 1; |
| 814 | if (retry_count++ == 0) | 895 | ep->rep_remote_cma.initiator_depth = |
| 896 | ep->rep_remote_cma.responder_resources; | ||
| 815 | goto retry; | 897 | goto retry; |
| 898 | } | ||
| 816 | rc = ep->rep_connected; | 899 | rc = ep->rep_connected; |
| 817 | } else { | 900 | } else { |
| 818 | dprintk("RPC: %s: connected\n", __func__); | 901 | dprintk("RPC: %s: connected\n", __func__); |
| @@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 863 | char *p; | 946 | char *p; |
| 864 | size_t len; | 947 | size_t len; |
| 865 | int i, rc; | 948 | int i, rc; |
| 949 | struct rpcrdma_mw *r; | ||
| 866 | 950 | ||
| 867 | buf->rb_max_requests = cdata->max_requests; | 951 | buf->rb_max_requests = cdata->max_requests; |
| 868 | spin_lock_init(&buf->rb_lock); | 952 | spin_lock_init(&buf->rb_lock); |
| @@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 873 | * 2. arrays of struct rpcrdma_req to fill in pointers | 957 | * 2. arrays of struct rpcrdma_req to fill in pointers |
| 874 | * 3. array of struct rpcrdma_rep for replies | 958 | * 3. array of struct rpcrdma_rep for replies |
| 875 | * 4. padding, if any | 959 | * 4. padding, if any |
| 876 | * 5. mw's, if any | 960 | * 5. mw's, fmr's or frmr's, if any |
| 877 | * Send/recv buffers in req/rep need to be registered | 961 | * Send/recv buffers in req/rep need to be registered |
| 878 | */ | 962 | */ |
| 879 | 963 | ||
| @@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 881 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); | 965 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); |
| 882 | len += cdata->padding; | 966 | len += cdata->padding; |
| 883 | switch (ia->ri_memreg_strategy) { | 967 | switch (ia->ri_memreg_strategy) { |
| 968 | case RPCRDMA_FRMR: | ||
| 969 | len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * | ||
| 970 | sizeof(struct rpcrdma_mw); | ||
| 971 | break; | ||
| 884 | case RPCRDMA_MTHCAFMR: | 972 | case RPCRDMA_MTHCAFMR: |
| 885 | /* TBD we are perhaps overallocating here */ | 973 | /* TBD we are perhaps overallocating here */ |
| 886 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | 974 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * |
| @@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 927 | * and also reduce unbind-to-bind collision. | 1015 | * and also reduce unbind-to-bind collision. |
| 928 | */ | 1016 | */ |
| 929 | INIT_LIST_HEAD(&buf->rb_mws); | 1017 | INIT_LIST_HEAD(&buf->rb_mws); |
| 1018 | r = (struct rpcrdma_mw *)p; | ||
| 930 | switch (ia->ri_memreg_strategy) { | 1019 | switch (ia->ri_memreg_strategy) { |
| 1020 | case RPCRDMA_FRMR: | ||
| 1021 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | ||
| 1022 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
| 1023 | RPCRDMA_MAX_SEGS); | ||
| 1024 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
| 1025 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
| 1026 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
| 1027 | " failed %i\n", __func__, rc); | ||
| 1028 | goto out; | ||
| 1029 | } | ||
| 1030 | r->r.frmr.fr_pgl = | ||
| 1031 | ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
| 1032 | RPCRDMA_MAX_SEGS); | ||
| 1033 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
| 1034 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
| 1035 | dprintk("RPC: %s: " | ||
| 1036 | "ib_alloc_fast_reg_page_list " | ||
| 1037 | "failed %i\n", __func__, rc); | ||
| 1038 | goto out; | ||
| 1039 | } | ||
| 1040 | list_add(&r->mw_list, &buf->rb_mws); | ||
| 1041 | ++r; | ||
| 1042 | } | ||
| 1043 | break; | ||
| 931 | case RPCRDMA_MTHCAFMR: | 1044 | case RPCRDMA_MTHCAFMR: |
| 932 | { | ||
| 933 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
| 934 | struct ib_fmr_attr fa = { | ||
| 935 | RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT | ||
| 936 | }; | ||
| 937 | /* TBD we are perhaps overallocating here */ | 1045 | /* TBD we are perhaps overallocating here */ |
| 938 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1046 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
| 1047 | static struct ib_fmr_attr fa = | ||
| 1048 | { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; | ||
| 939 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, | 1049 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, |
| 940 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, | 1050 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, |
| 941 | &fa); | 1051 | &fa); |
| @@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 948 | list_add(&r->mw_list, &buf->rb_mws); | 1058 | list_add(&r->mw_list, &buf->rb_mws); |
| 949 | ++r; | 1059 | ++r; |
| 950 | } | 1060 | } |
| 951 | } | ||
| 952 | break; | 1061 | break; |
| 953 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1062 | case RPCRDMA_MEMWINDOWS_ASYNC: |
| 954 | case RPCRDMA_MEMWINDOWS: | 1063 | case RPCRDMA_MEMWINDOWS: |
| 955 | { | ||
| 956 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
| 957 | /* Allocate one extra request's worth, for full cycling */ | 1064 | /* Allocate one extra request's worth, for full cycling */ |
| 958 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1065 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
| 959 | r->r.mw = ib_alloc_mw(ia->ri_pd); | 1066 | r->r.mw = ib_alloc_mw(ia->ri_pd); |
| @@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 966 | list_add(&r->mw_list, &buf->rb_mws); | 1073 | list_add(&r->mw_list, &buf->rb_mws); |
| 967 | ++r; | 1074 | ++r; |
| 968 | } | 1075 | } |
| 969 | } | ||
| 970 | break; | 1076 | break; |
| 971 | default: | 1077 | default: |
| 972 | break; | 1078 | break; |
| @@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1046 | { | 1152 | { |
| 1047 | int rc, i; | 1153 | int rc, i; |
| 1048 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | 1154 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
| 1155 | struct rpcrdma_mw *r; | ||
| 1049 | 1156 | ||
| 1050 | /* clean up in reverse order from create | 1157 | /* clean up in reverse order from create |
| 1051 | * 1. recv mr memory (mr free, then kfree) | 1158 | * 1. recv mr memory (mr free, then kfree) |
| @@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1065 | } | 1172 | } |
| 1066 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { | 1173 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { |
| 1067 | while (!list_empty(&buf->rb_mws)) { | 1174 | while (!list_empty(&buf->rb_mws)) { |
| 1068 | struct rpcrdma_mw *r; | ||
| 1069 | r = list_entry(buf->rb_mws.next, | 1175 | r = list_entry(buf->rb_mws.next, |
| 1070 | struct rpcrdma_mw, mw_list); | 1176 | struct rpcrdma_mw, mw_list); |
| 1071 | list_del(&r->mw_list); | 1177 | list_del(&r->mw_list); |
| 1072 | switch (ia->ri_memreg_strategy) { | 1178 | switch (ia->ri_memreg_strategy) { |
| 1179 | case RPCRDMA_FRMR: | ||
| 1180 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1181 | if (rc) | ||
| 1182 | dprintk("RPC: %s:" | ||
| 1183 | " ib_dereg_mr" | ||
| 1184 | " failed %i\n", | ||
| 1185 | __func__, rc); | ||
| 1186 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
| 1187 | break; | ||
| 1073 | case RPCRDMA_MTHCAFMR: | 1188 | case RPCRDMA_MTHCAFMR: |
| 1074 | rc = ib_dealloc_fmr(r->r.fmr); | 1189 | rc = ib_dealloc_fmr(r->r.fmr); |
| 1075 | if (rc) | 1190 | if (rc) |
| @@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
| 1115 | { | 1230 | { |
| 1116 | struct rpcrdma_req *req; | 1231 | struct rpcrdma_req *req; |
| 1117 | unsigned long flags; | 1232 | unsigned long flags; |
| 1233 | int i; | ||
| 1234 | struct rpcrdma_mw *r; | ||
| 1118 | 1235 | ||
| 1119 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1236 | spin_lock_irqsave(&buffers->rb_lock, flags); |
| 1120 | if (buffers->rb_send_index == buffers->rb_max_requests) { | 1237 | if (buffers->rb_send_index == buffers->rb_max_requests) { |
| @@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
| 1135 | } | 1252 | } |
| 1136 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; | 1253 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; |
| 1137 | if (!list_empty(&buffers->rb_mws)) { | 1254 | if (!list_empty(&buffers->rb_mws)) { |
| 1138 | int i = RPCRDMA_MAX_SEGS - 1; | 1255 | i = RPCRDMA_MAX_SEGS - 1; |
| 1139 | do { | 1256 | do { |
| 1140 | struct rpcrdma_mw *r; | ||
| 1141 | r = list_entry(buffers->rb_mws.next, | 1257 | r = list_entry(buffers->rb_mws.next, |
| 1142 | struct rpcrdma_mw, mw_list); | 1258 | struct rpcrdma_mw, mw_list); |
| 1143 | list_del(&r->mw_list); | 1259 | list_del(&r->mw_list); |
| @@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) | |||
| 1171 | req->rl_reply = NULL; | 1287 | req->rl_reply = NULL; |
| 1172 | } | 1288 | } |
| 1173 | switch (ia->ri_memreg_strategy) { | 1289 | switch (ia->ri_memreg_strategy) { |
| 1290 | case RPCRDMA_FRMR: | ||
| 1174 | case RPCRDMA_MTHCAFMR: | 1291 | case RPCRDMA_MTHCAFMR: |
| 1175 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1292 | case RPCRDMA_MEMWINDOWS_ASYNC: |
| 1176 | case RPCRDMA_MEMWINDOWS: | 1293 | case RPCRDMA_MEMWINDOWS: |
| @@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | |||
| 1252 | va, len, DMA_BIDIRECTIONAL); | 1369 | va, len, DMA_BIDIRECTIONAL); |
| 1253 | iov->length = len; | 1370 | iov->length = len; |
| 1254 | 1371 | ||
| 1255 | if (ia->ri_bind_mem != NULL) { | 1372 | if (ia->ri_have_dma_lkey) { |
| 1373 | *mrp = NULL; | ||
| 1374 | iov->lkey = ia->ri_dma_lkey; | ||
| 1375 | return 0; | ||
| 1376 | } else if (ia->ri_bind_mem != NULL) { | ||
| 1256 | *mrp = NULL; | 1377 | *mrp = NULL; |
| 1257 | iov->lkey = ia->ri_bind_mem->lkey; | 1378 | iov->lkey = ia->ri_bind_mem->lkey; |
| 1258 | return 0; | 1379 | return 0; |
| @@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | |||
| 1329 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | 1450 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); |
| 1330 | } | 1451 | } |
| 1331 | 1452 | ||
| 1453 | static int | ||
| 1454 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
| 1455 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
| 1456 | struct rpcrdma_xprt *r_xprt) | ||
| 1457 | { | ||
| 1458 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1459 | struct ib_send_wr frmr_wr, *bad_wr; | ||
| 1460 | u8 key; | ||
| 1461 | int len, pageoff; | ||
| 1462 | int i, rc; | ||
| 1463 | |||
| 1464 | pageoff = offset_in_page(seg1->mr_offset); | ||
| 1465 | seg1->mr_offset -= pageoff; /* start of page */ | ||
| 1466 | seg1->mr_len += pageoff; | ||
| 1467 | len = -pageoff; | ||
| 1468 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
| 1469 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
| 1470 | for (i = 0; i < *nsegs;) { | ||
| 1471 | rpcrdma_map_one(ia, seg, writing); | ||
| 1472 | seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; | ||
| 1473 | len += seg->mr_len; | ||
| 1474 | ++seg; | ||
| 1475 | ++i; | ||
| 1476 | /* Check for holes */ | ||
| 1477 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
| 1478 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
| 1479 | break; | ||
| 1480 | } | ||
| 1481 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
| 1482 | __func__, seg1->mr_chunk.rl_mw, i); | ||
| 1483 | |||
| 1484 | /* Bump the key */ | ||
| 1485 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
| 1486 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
| 1487 | |||
| 1488 | /* Prepare FRMR WR */ | ||
| 1489 | memset(&frmr_wr, 0, sizeof frmr_wr); | ||
| 1490 | frmr_wr.opcode = IB_WR_FAST_REG_MR; | ||
| 1491 | frmr_wr.send_flags = 0; /* unsignaled */ | ||
| 1492 | frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; | ||
| 1493 | frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; | ||
| 1494 | frmr_wr.wr.fast_reg.page_list_len = i; | ||
| 1495 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
| 1496 | frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; | ||
| 1497 | frmr_wr.wr.fast_reg.access_flags = (writing ? | ||
| 1498 | IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); | ||
| 1499 | frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
| 1500 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1501 | |||
| 1502 | rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); | ||
| 1503 | |||
| 1504 | if (rc) { | ||
| 1505 | dprintk("RPC: %s: failed ib_post_send for register," | ||
| 1506 | " status %i\n", __func__, rc); | ||
| 1507 | while (i--) | ||
| 1508 | rpcrdma_unmap_one(ia, --seg); | ||
| 1509 | } else { | ||
| 1510 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
| 1511 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
| 1512 | seg1->mr_nsegs = i; | ||
| 1513 | seg1->mr_len = len; | ||
| 1514 | } | ||
| 1515 | *nsegs = i; | ||
| 1516 | return rc; | ||
| 1517 | } | ||
| 1518 | |||
| 1519 | static int | ||
| 1520 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
| 1521 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
| 1522 | { | ||
| 1523 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1524 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
| 1525 | int rc; | ||
| 1526 | |||
| 1527 | while (seg1->mr_nsegs--) | ||
| 1528 | rpcrdma_unmap_one(ia, seg++); | ||
| 1529 | |||
| 1530 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
| 1531 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
| 1532 | invalidate_wr.send_flags = 0; /* unsignaled */ | ||
| 1533 | invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
| 1534 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1535 | |||
| 1536 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
| 1537 | if (rc) | ||
| 1538 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
| 1539 | " status %i\n", __func__, rc); | ||
| 1540 | return rc; | ||
| 1541 | } | ||
| 1542 | |||
| 1543 | static int | ||
| 1544 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
| 1545 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
| 1546 | { | ||
| 1547 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1548 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
| 1549 | int len, pageoff, i, rc; | ||
| 1550 | |||
| 1551 | pageoff = offset_in_page(seg1->mr_offset); | ||
| 1552 | seg1->mr_offset -= pageoff; /* start of page */ | ||
| 1553 | seg1->mr_len += pageoff; | ||
| 1554 | len = -pageoff; | ||
| 1555 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
| 1556 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
| 1557 | for (i = 0; i < *nsegs;) { | ||
| 1558 | rpcrdma_map_one(ia, seg, writing); | ||
| 1559 | physaddrs[i] = seg->mr_dma; | ||
| 1560 | len += seg->mr_len; | ||
| 1561 | ++seg; | ||
| 1562 | ++i; | ||
| 1563 | /* Check for holes */ | ||
| 1564 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
| 1565 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
| 1566 | break; | ||
| 1567 | } | ||
| 1568 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
| 1569 | physaddrs, i, seg1->mr_dma); | ||
| 1570 | if (rc) { | ||
| 1571 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
| 1572 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
| 1573 | len, (unsigned long long)seg1->mr_dma, | ||
| 1574 | pageoff, i, rc); | ||
| 1575 | while (i--) | ||
| 1576 | rpcrdma_unmap_one(ia, --seg); | ||
| 1577 | } else { | ||
| 1578 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
| 1579 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
| 1580 | seg1->mr_nsegs = i; | ||
| 1581 | seg1->mr_len = len; | ||
| 1582 | } | ||
| 1583 | *nsegs = i; | ||
| 1584 | return rc; | ||
| 1585 | } | ||
| 1586 | |||
| 1587 | static int | ||
| 1588 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
| 1589 | struct rpcrdma_ia *ia) | ||
| 1590 | { | ||
| 1591 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1592 | LIST_HEAD(l); | ||
| 1593 | int rc; | ||
| 1594 | |||
| 1595 | list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); | ||
| 1596 | rc = ib_unmap_fmr(&l); | ||
| 1597 | while (seg1->mr_nsegs--) | ||
| 1598 | rpcrdma_unmap_one(ia, seg++); | ||
| 1599 | if (rc) | ||
| 1600 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
| 1601 | " status %i\n", __func__, rc); | ||
| 1602 | return rc; | ||
| 1603 | } | ||
| 1604 | |||
| 1605 | static int | ||
| 1606 | rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, | ||
| 1607 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
| 1608 | struct rpcrdma_xprt *r_xprt) | ||
| 1609 | { | ||
| 1610 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
| 1611 | IB_ACCESS_REMOTE_READ); | ||
| 1612 | struct ib_mw_bind param; | ||
| 1613 | int rc; | ||
| 1614 | |||
| 1615 | *nsegs = 1; | ||
| 1616 | rpcrdma_map_one(ia, seg, writing); | ||
| 1617 | param.mr = ia->ri_bind_mem; | ||
| 1618 | param.wr_id = 0ULL; /* no send cookie */ | ||
| 1619 | param.addr = seg->mr_dma; | ||
| 1620 | param.length = seg->mr_len; | ||
| 1621 | param.send_flags = 0; | ||
| 1622 | param.mw_access_flags = mem_priv; | ||
| 1623 | |||
| 1624 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1625 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
| 1626 | if (rc) { | ||
| 1627 | dprintk("RPC: %s: failed ib_bind_mw " | ||
| 1628 | "%u@0x%llx status %i\n", | ||
| 1629 | __func__, seg->mr_len, | ||
| 1630 | (unsigned long long)seg->mr_dma, rc); | ||
| 1631 | rpcrdma_unmap_one(ia, seg); | ||
| 1632 | } else { | ||
| 1633 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
| 1634 | seg->mr_base = param.addr; | ||
| 1635 | seg->mr_nsegs = 1; | ||
| 1636 | } | ||
| 1637 | return rc; | ||
| 1638 | } | ||
| 1639 | |||
| 1640 | static int | ||
| 1641 | rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, | ||
| 1642 | struct rpcrdma_ia *ia, | ||
| 1643 | struct rpcrdma_xprt *r_xprt, void **r) | ||
| 1644 | { | ||
| 1645 | struct ib_mw_bind param; | ||
| 1646 | LIST_HEAD(l); | ||
| 1647 | int rc; | ||
| 1648 | |||
| 1649 | BUG_ON(seg->mr_nsegs != 1); | ||
| 1650 | param.mr = ia->ri_bind_mem; | ||
| 1651 | param.addr = 0ULL; /* unbind */ | ||
| 1652 | param.length = 0; | ||
| 1653 | param.mw_access_flags = 0; | ||
| 1654 | if (*r) { | ||
| 1655 | param.wr_id = (u64) (unsigned long) *r; | ||
| 1656 | param.send_flags = IB_SEND_SIGNALED; | ||
| 1657 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
| 1658 | } else { | ||
| 1659 | param.wr_id = 0ULL; | ||
| 1660 | param.send_flags = 0; | ||
| 1661 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1662 | } | ||
| 1663 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
| 1664 | rpcrdma_unmap_one(ia, seg); | ||
| 1665 | if (rc) | ||
| 1666 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
| 1667 | " status %i\n", __func__, rc); | ||
| 1668 | else | ||
| 1669 | *r = NULL; /* will upcall on completion */ | ||
| 1670 | return rc; | ||
| 1671 | } | ||
| 1672 | |||
| 1673 | static int | ||
| 1674 | rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, | ||
| 1675 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
| 1676 | { | ||
| 1677 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
| 1678 | IB_ACCESS_REMOTE_READ); | ||
| 1679 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1680 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
| 1681 | int len, i, rc = 0; | ||
| 1682 | |||
| 1683 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
| 1684 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
| 1685 | for (len = 0, i = 0; i < *nsegs;) { | ||
| 1686 | rpcrdma_map_one(ia, seg, writing); | ||
| 1687 | ipb[i].addr = seg->mr_dma; | ||
| 1688 | ipb[i].size = seg->mr_len; | ||
| 1689 | len += seg->mr_len; | ||
| 1690 | ++seg; | ||
| 1691 | ++i; | ||
| 1692 | /* Check for holes */ | ||
| 1693 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
| 1694 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
| 1695 | break; | ||
| 1696 | } | ||
| 1697 | seg1->mr_base = seg1->mr_dma; | ||
| 1698 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
| 1699 | ipb, i, mem_priv, &seg1->mr_base); | ||
| 1700 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
| 1701 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
| 1702 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
| 1703 | "%u@0x%llx (%d)... status %i\n", | ||
| 1704 | __func__, len, | ||
| 1705 | (unsigned long long)seg1->mr_dma, i, rc); | ||
| 1706 | while (i--) | ||
| 1707 | rpcrdma_unmap_one(ia, --seg); | ||
| 1708 | } else { | ||
| 1709 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
| 1710 | seg1->mr_nsegs = i; | ||
| 1711 | seg1->mr_len = len; | ||
| 1712 | } | ||
| 1713 | *nsegs = i; | ||
| 1714 | return rc; | ||
| 1715 | } | ||
| 1716 | |||
| 1717 | static int | ||
| 1718 | rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, | ||
| 1719 | struct rpcrdma_ia *ia) | ||
| 1720 | { | ||
| 1721 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1722 | int rc; | ||
| 1723 | |||
| 1724 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | ||
| 1725 | seg1->mr_chunk.rl_mr = NULL; | ||
| 1726 | while (seg1->mr_nsegs--) | ||
| 1727 | rpcrdma_unmap_one(ia, seg++); | ||
| 1728 | if (rc) | ||
| 1729 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
| 1730 | " status %i\n", __func__, rc); | ||
| 1731 | return rc; | ||
| 1732 | } | ||
| 1733 | |||
| 1332 | int | 1734 | int |
| 1333 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | 1735 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, |
| 1334 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | 1736 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) |
| 1335 | { | 1737 | { |
| 1336 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1738 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
| 1337 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
| 1338 | IB_ACCESS_REMOTE_READ); | ||
| 1339 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1340 | int i; | ||
| 1341 | int rc = 0; | 1739 | int rc = 0; |
| 1342 | 1740 | ||
| 1343 | switch (ia->ri_memreg_strategy) { | 1741 | switch (ia->ri_memreg_strategy) { |
| @@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
| 1352 | break; | 1750 | break; |
| 1353 | #endif | 1751 | #endif |
| 1354 | 1752 | ||
| 1355 | /* Registration using fast memory registration */ | 1753 | /* Registration using frmr registration */ |
| 1754 | case RPCRDMA_FRMR: | ||
| 1755 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
| 1756 | break; | ||
| 1757 | |||
| 1758 | /* Registration using fmr memory registration */ | ||
| 1356 | case RPCRDMA_MTHCAFMR: | 1759 | case RPCRDMA_MTHCAFMR: |
| 1357 | { | 1760 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); |
| 1358 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
| 1359 | int len, pageoff = offset_in_page(seg->mr_offset); | ||
| 1360 | seg1->mr_offset -= pageoff; /* start of page */ | ||
| 1361 | seg1->mr_len += pageoff; | ||
| 1362 | len = -pageoff; | ||
| 1363 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
| 1364 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
| 1365 | for (i = 0; i < nsegs;) { | ||
| 1366 | rpcrdma_map_one(ia, seg, writing); | ||
| 1367 | physaddrs[i] = seg->mr_dma; | ||
| 1368 | len += seg->mr_len; | ||
| 1369 | ++seg; | ||
| 1370 | ++i; | ||
| 1371 | /* Check for holes */ | ||
| 1372 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
| 1373 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
| 1374 | break; | ||
| 1375 | } | ||
| 1376 | nsegs = i; | ||
| 1377 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
| 1378 | physaddrs, nsegs, seg1->mr_dma); | ||
| 1379 | if (rc) { | ||
| 1380 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
| 1381 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
| 1382 | len, (unsigned long long)seg1->mr_dma, | ||
| 1383 | pageoff, nsegs, rc); | ||
| 1384 | while (nsegs--) | ||
| 1385 | rpcrdma_unmap_one(ia, --seg); | ||
| 1386 | } else { | ||
| 1387 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
| 1388 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
| 1389 | seg1->mr_nsegs = nsegs; | ||
| 1390 | seg1->mr_len = len; | ||
| 1391 | } | ||
| 1392 | } | ||
| 1393 | break; | 1761 | break; |
| 1394 | 1762 | ||
| 1395 | /* Registration using memory windows */ | 1763 | /* Registration using memory windows */ |
| 1396 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1764 | case RPCRDMA_MEMWINDOWS_ASYNC: |
| 1397 | case RPCRDMA_MEMWINDOWS: | 1765 | case RPCRDMA_MEMWINDOWS: |
| 1398 | { | 1766 | rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); |
| 1399 | struct ib_mw_bind param; | ||
| 1400 | rpcrdma_map_one(ia, seg, writing); | ||
| 1401 | param.mr = ia->ri_bind_mem; | ||
| 1402 | param.wr_id = 0ULL; /* no send cookie */ | ||
| 1403 | param.addr = seg->mr_dma; | ||
| 1404 | param.length = seg->mr_len; | ||
| 1405 | param.send_flags = 0; | ||
| 1406 | param.mw_access_flags = mem_priv; | ||
| 1407 | |||
| 1408 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1409 | rc = ib_bind_mw(ia->ri_id->qp, | ||
| 1410 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
| 1411 | if (rc) { | ||
| 1412 | dprintk("RPC: %s: failed ib_bind_mw " | ||
| 1413 | "%u@0x%llx status %i\n", | ||
| 1414 | __func__, seg->mr_len, | ||
| 1415 | (unsigned long long)seg->mr_dma, rc); | ||
| 1416 | rpcrdma_unmap_one(ia, seg); | ||
| 1417 | } else { | ||
| 1418 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
| 1419 | seg->mr_base = param.addr; | ||
| 1420 | seg->mr_nsegs = 1; | ||
| 1421 | nsegs = 1; | ||
| 1422 | } | ||
| 1423 | } | ||
| 1424 | break; | 1767 | break; |
| 1425 | 1768 | ||
| 1426 | /* Default registration each time */ | 1769 | /* Default registration each time */ |
| 1427 | default: | 1770 | default: |
| 1428 | { | 1771 | rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); |
| 1429 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
| 1430 | int len = 0; | ||
| 1431 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
| 1432 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
| 1433 | for (i = 0; i < nsegs;) { | ||
| 1434 | rpcrdma_map_one(ia, seg, writing); | ||
| 1435 | ipb[i].addr = seg->mr_dma; | ||
| 1436 | ipb[i].size = seg->mr_len; | ||
| 1437 | len += seg->mr_len; | ||
| 1438 | ++seg; | ||
| 1439 | ++i; | ||
| 1440 | /* Check for holes */ | ||
| 1441 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
| 1442 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
| 1443 | break; | ||
| 1444 | } | ||
| 1445 | nsegs = i; | ||
| 1446 | seg1->mr_base = seg1->mr_dma; | ||
| 1447 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
| 1448 | ipb, nsegs, mem_priv, &seg1->mr_base); | ||
| 1449 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
| 1450 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
| 1451 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
| 1452 | "%u@0x%llx (%d)... status %i\n", | ||
| 1453 | __func__, len, | ||
| 1454 | (unsigned long long)seg1->mr_dma, nsegs, rc); | ||
| 1455 | while (nsegs--) | ||
| 1456 | rpcrdma_unmap_one(ia, --seg); | ||
| 1457 | } else { | ||
| 1458 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
| 1459 | seg1->mr_nsegs = nsegs; | ||
| 1460 | seg1->mr_len = len; | ||
| 1461 | } | ||
| 1462 | } | ||
| 1463 | break; | 1772 | break; |
| 1464 | } | 1773 | } |
| 1465 | if (rc) | 1774 | if (rc) |
| @@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
| 1473 | struct rpcrdma_xprt *r_xprt, void *r) | 1782 | struct rpcrdma_xprt *r_xprt, void *r) |
| 1474 | { | 1783 | { |
| 1475 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1784 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
| 1476 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1477 | int nsegs = seg->mr_nsegs, rc; | 1785 | int nsegs = seg->mr_nsegs, rc; |
| 1478 | 1786 | ||
| 1479 | switch (ia->ri_memreg_strategy) { | 1787 | switch (ia->ri_memreg_strategy) { |
| @@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
| 1486 | break; | 1794 | break; |
| 1487 | #endif | 1795 | #endif |
| 1488 | 1796 | ||
| 1797 | case RPCRDMA_FRMR: | ||
| 1798 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
| 1799 | break; | ||
| 1800 | |||
| 1489 | case RPCRDMA_MTHCAFMR: | 1801 | case RPCRDMA_MTHCAFMR: |
| 1490 | { | 1802 | rc = rpcrdma_deregister_fmr_external(seg, ia); |
| 1491 | LIST_HEAD(l); | ||
| 1492 | list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); | ||
| 1493 | rc = ib_unmap_fmr(&l); | ||
| 1494 | while (seg1->mr_nsegs--) | ||
| 1495 | rpcrdma_unmap_one(ia, seg++); | ||
| 1496 | } | ||
| 1497 | if (rc) | ||
| 1498 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
| 1499 | " status %i\n", __func__, rc); | ||
| 1500 | break; | 1803 | break; |
| 1501 | 1804 | ||
| 1502 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1805 | case RPCRDMA_MEMWINDOWS_ASYNC: |
| 1503 | case RPCRDMA_MEMWINDOWS: | 1806 | case RPCRDMA_MEMWINDOWS: |
| 1504 | { | 1807 | rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); |
| 1505 | struct ib_mw_bind param; | ||
| 1506 | BUG_ON(nsegs != 1); | ||
| 1507 | param.mr = ia->ri_bind_mem; | ||
| 1508 | param.addr = 0ULL; /* unbind */ | ||
| 1509 | param.length = 0; | ||
| 1510 | param.mw_access_flags = 0; | ||
| 1511 | if (r) { | ||
| 1512 | param.wr_id = (u64) (unsigned long) r; | ||
| 1513 | param.send_flags = IB_SEND_SIGNALED; | ||
| 1514 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
| 1515 | } else { | ||
| 1516 | param.wr_id = 0ULL; | ||
| 1517 | param.send_flags = 0; | ||
| 1518 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1519 | } | ||
| 1520 | rc = ib_bind_mw(ia->ri_id->qp, | ||
| 1521 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
| 1522 | rpcrdma_unmap_one(ia, seg); | ||
| 1523 | } | ||
| 1524 | if (rc) | ||
| 1525 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
| 1526 | " status %i\n", __func__, rc); | ||
| 1527 | else | ||
| 1528 | r = NULL; /* will upcall on completion */ | ||
| 1529 | break; | 1808 | break; |
| 1530 | 1809 | ||
| 1531 | default: | 1810 | default: |
| 1532 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | 1811 | rc = rpcrdma_deregister_default_external(seg, ia); |
| 1533 | seg1->mr_chunk.rl_mr = NULL; | ||
| 1534 | while (seg1->mr_nsegs--) | ||
| 1535 | rpcrdma_unmap_one(ia, seg++); | ||
| 1536 | if (rc) | ||
| 1537 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
| 1538 | " status %i\n", __func__, rc); | ||
| 1539 | break; | 1812 | break; |
| 1540 | } | 1813 | } |
| 1541 | if (r) { | 1814 | if (r) { |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2427822f8bd4..c7a7eba991bc 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
| @@ -51,6 +51,9 @@ | |||
| 51 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | 51 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ |
| 52 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | 52 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ |
| 53 | 53 | ||
| 54 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ | ||
| 55 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | ||
| 56 | |||
| 54 | /* | 57 | /* |
| 55 | * Interface Adapter -- one per transport instance | 58 | * Interface Adapter -- one per transport instance |
| 56 | */ | 59 | */ |
| @@ -58,6 +61,8 @@ struct rpcrdma_ia { | |||
| 58 | struct rdma_cm_id *ri_id; | 61 | struct rdma_cm_id *ri_id; |
| 59 | struct ib_pd *ri_pd; | 62 | struct ib_pd *ri_pd; |
| 60 | struct ib_mr *ri_bind_mem; | 63 | struct ib_mr *ri_bind_mem; |
| 64 | u32 ri_dma_lkey; | ||
| 65 | int ri_have_dma_lkey; | ||
| 61 | struct completion ri_done; | 66 | struct completion ri_done; |
| 62 | int ri_async_rc; | 67 | int ri_async_rc; |
| 63 | enum rpcrdma_memreg ri_memreg_strategy; | 68 | enum rpcrdma_memreg ri_memreg_strategy; |
| @@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ | |||
| 156 | union { | 161 | union { |
| 157 | struct ib_mw *mw; | 162 | struct ib_mw *mw; |
| 158 | struct ib_fmr *fmr; | 163 | struct ib_fmr *fmr; |
| 164 | struct { | ||
| 165 | struct ib_fast_reg_page_list *fr_pgl; | ||
| 166 | struct ib_mr *fr_mr; | ||
| 167 | } frmr; | ||
| 159 | } r; | 168 | } r; |
| 160 | struct list_head mw_list; | 169 | struct list_head mw_list; |
| 161 | } *rl_mw; | 170 | } *rl_mw; |
| @@ -175,6 +184,7 @@ struct rpcrdma_req { | |||
| 175 | size_t rl_size; /* actual length of buffer */ | 184 | size_t rl_size; /* actual length of buffer */ |
| 176 | unsigned int rl_niovs; /* 0, 2 or 4 */ | 185 | unsigned int rl_niovs; /* 0, 2 or 4 */ |
| 177 | unsigned int rl_nchunks; /* non-zero if chunks */ | 186 | unsigned int rl_nchunks; /* non-zero if chunks */ |
| 187 | unsigned int rl_connect_cookie; /* retry detection */ | ||
| 178 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | 188 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ |
| 179 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 189 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
| 180 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ | 190 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ |
| @@ -198,7 +208,7 @@ struct rpcrdma_buffer { | |||
| 198 | atomic_t rb_credits; /* most recent server credits */ | 208 | atomic_t rb_credits; /* most recent server credits */ |
| 199 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ | 209 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ |
| 200 | int rb_max_requests;/* client max requests */ | 210 | int rb_max_requests;/* client max requests */ |
| 201 | struct list_head rb_mws; /* optional memory windows/fmrs */ | 211 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ |
| 202 | int rb_send_index; | 212 | int rb_send_index; |
| 203 | struct rpcrdma_req **rb_send_bufs; | 213 | struct rpcrdma_req **rb_send_bufs; |
| 204 | int rb_recv_index; | 214 | int rb_recv_index; |
| @@ -273,6 +283,11 @@ struct rpcrdma_xprt { | |||
| 273 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) | 283 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) |
| 274 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) | 284 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
| 275 | 285 | ||
| 286 | /* Setting this to 0 ensures interoperability with early servers. | ||
| 287 | * Setting this to 1 enhances certain unaligned read/write performance. | ||
| 288 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | ||
| 289 | extern int xprt_rdma_pad_optimize; | ||
| 290 | |||
| 276 | /* | 291 | /* |
| 277 | * Interface Adapter calls - xprtrdma/verbs.c | 292 | * Interface Adapter calls - xprtrdma/verbs.c |
| 278 | */ | 293 | */ |
