diff options
57 files changed, 5467 insertions, 1123 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c323778270ff..fdd6dbcf864e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1083,6 +1083,13 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1083 | [NFS] set the maximum lifetime for idmapper cache | 1083 | [NFS] set the maximum lifetime for idmapper cache |
1084 | entries. | 1084 | entries. |
1085 | 1085 | ||
1086 | nfs.enable_ino64= | ||
1087 | [NFS] enable 64-bit inode numbers. | ||
1088 | If zero, the NFS client will fake up a 32-bit inode | ||
1089 | number for the readdir() and stat() syscalls instead | ||
1090 | of returning the full 64-bit number. | ||
1091 | The default is to return 64-bit inode numbers. | ||
1092 | |||
1086 | nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels | 1093 | nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels |
1087 | 1094 | ||
1088 | no387 [BUGS=X86-32] Tells the kernel to use the 387 maths | 1095 | no387 [BUGS=X86-32] Tells the kernel to use the 387 maths |
diff --git a/fs/Kconfig b/fs/Kconfig index bb02b39380a3..815d201d8600 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -1755,6 +1755,14 @@ config SUNRPC | |||
1755 | config SUNRPC_GSS | 1755 | config SUNRPC_GSS |
1756 | tristate | 1756 | tristate |
1757 | 1757 | ||
1758 | config SUNRPC_XPRT_RDMA | ||
1759 | tristate "RDMA transport for sunrpc (EXPERIMENTAL)" | ||
1760 | depends on SUNRPC && INFINIBAND && EXPERIMENTAL | ||
1761 | default m | ||
1762 | help | ||
1763 | Adds a client RPC transport for supporting kernel NFS over RDMA | ||
1764 | mounts, including Infiniband and iWARP. Experimental. | ||
1765 | |||
1758 | config SUNRPC_BIND34 | 1766 | config SUNRPC_BIND34 |
1759 | bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" | 1767 | bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" |
1760 | depends on SUNRPC && EXPERIMENTAL | 1768 | depends on SUNRPC && EXPERIMENTAL |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 3353ed8421a7..908b23fadd05 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/utsname.h> | 10 | #include <linux/utsname.h> |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/sunrpc/clnt.h> | 12 | #include <linux/sunrpc/clnt.h> |
13 | #include <linux/sunrpc/xprtsock.h> | ||
13 | #include <linux/sunrpc/svc.h> | 14 | #include <linux/sunrpc/svc.h> |
14 | #include <linux/lockd/lockd.h> | 15 | #include <linux/lockd/lockd.h> |
15 | #include <linux/lockd/sm_inter.h> | 16 | #include <linux/lockd/sm_inter.h> |
@@ -132,7 +133,7 @@ nsm_create(void) | |||
132 | .sin_port = 0, | 133 | .sin_port = 0, |
133 | }; | 134 | }; |
134 | struct rpc_create_args args = { | 135 | struct rpc_create_args args = { |
135 | .protocol = IPPROTO_UDP, | 136 | .protocol = XPRT_TRANSPORT_UDP, |
136 | .address = (struct sockaddr *)&sin, | 137 | .address = (struct sockaddr *)&sin, |
137 | .addrsize = sizeof(sin), | 138 | .addrsize = sizeof(sin), |
138 | .servername = "localhost", | 139 | .servername = "localhost", |
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 5316e307a49d..633653bff944 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c | |||
@@ -62,8 +62,9 @@ static __be32 *nlm_decode_cookie(__be32 *p, struct nlm_cookie *c) | |||
62 | } | 62 | } |
63 | else | 63 | else |
64 | { | 64 | { |
65 | printk(KERN_NOTICE | 65 | dprintk("lockd: bad cookie size %d (only cookies under " |
66 | "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN); | 66 | "%d bytes are supported.)\n", |
67 | len, NLM_MAXCOOKIELEN); | ||
67 | return NULL; | 68 | return NULL; |
68 | } | 69 | } |
69 | return p; | 70 | return p; |
@@ -84,8 +85,7 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f) | |||
84 | unsigned int len; | 85 | unsigned int len; |
85 | 86 | ||
86 | if ((len = ntohl(*p++)) != NFS2_FHSIZE) { | 87 | if ((len = ntohl(*p++)) != NFS2_FHSIZE) { |
87 | printk(KERN_NOTICE | 88 | dprintk("lockd: bad fhandle size %d (should be %d)\n", |
88 | "lockd: bad fhandle size %d (should be %d)\n", | ||
89 | len, NFS2_FHSIZE); | 89 | len, NFS2_FHSIZE); |
90 | return NULL; | 90 | return NULL; |
91 | } | 91 | } |
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 846fc1d639dd..43ff9397e6c6 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c | |||
@@ -64,8 +64,9 @@ nlm4_decode_cookie(__be32 *p, struct nlm_cookie *c) | |||
64 | } | 64 | } |
65 | else | 65 | else |
66 | { | 66 | { |
67 | printk(KERN_NOTICE | 67 | dprintk("lockd: bad cookie size %d (only cookies under " |
68 | "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN); | 68 | "%d bytes are supported.)\n", |
69 | len, NLM_MAXCOOKIELEN); | ||
69 | return NULL; | 70 | return NULL; |
70 | } | 71 | } |
71 | return p; | 72 | return p; |
@@ -86,8 +87,7 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f) | |||
86 | memset(f->data, 0, sizeof(f->data)); | 87 | memset(f->data, 0, sizeof(f->data)); |
87 | f->size = ntohl(*p++); | 88 | f->size = ntohl(*p++); |
88 | if (f->size > NFS_MAXFHSIZE) { | 89 | if (f->size > NFS_MAXFHSIZE) { |
89 | printk(KERN_NOTICE | 90 | dprintk("lockd: bad fhandle size %d (should be <=%d)\n", |
90 | "lockd: bad fhandle size %d (should be <=%d)\n", | ||
91 | f->size, NFS_MAXFHSIZE); | 91 | f->size, NFS_MAXFHSIZE); |
92 | return NULL; | 92 | return NULL; |
93 | } | 93 | } |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b55cb236cf74..df0f41e09885 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -16,4 +16,3 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ | |||
16 | nfs4namespace.o | 16 | nfs4namespace.o |
17 | nfs-$(CONFIG_NFS_DIRECTIO) += direct.o | 17 | nfs-$(CONFIG_NFS_DIRECTIO) += direct.o |
18 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 18 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
19 | nfs-objs := $(nfs-y) | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a204484072f3..a532ee12740a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -23,6 +23,8 @@ | |||
23 | #include <linux/sunrpc/clnt.h> | 23 | #include <linux/sunrpc/clnt.h> |
24 | #include <linux/sunrpc/stats.h> | 24 | #include <linux/sunrpc/stats.h> |
25 | #include <linux/sunrpc/metrics.h> | 25 | #include <linux/sunrpc/metrics.h> |
26 | #include <linux/sunrpc/xprtsock.h> | ||
27 | #include <linux/sunrpc/xprtrdma.h> | ||
26 | #include <linux/nfs_fs.h> | 28 | #include <linux/nfs_fs.h> |
27 | #include <linux/nfs_mount.h> | 29 | #include <linux/nfs_mount.h> |
28 | #include <linux/nfs4_mount.h> | 30 | #include <linux/nfs4_mount.h> |
@@ -340,7 +342,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, | |||
340 | to->to_retries = 2; | 342 | to->to_retries = 2; |
341 | 343 | ||
342 | switch (proto) { | 344 | switch (proto) { |
343 | case IPPROTO_TCP: | 345 | case XPRT_TRANSPORT_TCP: |
346 | case XPRT_TRANSPORT_RDMA: | ||
344 | if (!to->to_initval) | 347 | if (!to->to_initval) |
345 | to->to_initval = 60 * HZ; | 348 | to->to_initval = 60 * HZ; |
346 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) | 349 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) |
@@ -349,7 +352,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, | |||
349 | to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); | 352 | to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); |
350 | to->to_exponential = 0; | 353 | to->to_exponential = 0; |
351 | break; | 354 | break; |
352 | case IPPROTO_UDP: | 355 | case XPRT_TRANSPORT_UDP: |
353 | default: | 356 | default: |
354 | if (!to->to_initval) | 357 | if (!to->to_initval) |
355 | to->to_initval = 11 * HZ / 10; | 358 | to->to_initval = 11 * HZ / 10; |
@@ -501,9 +504,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t | |||
501 | /* | 504 | /* |
502 | * Initialise an NFS2 or NFS3 client | 505 | * Initialise an NFS2 or NFS3 client |
503 | */ | 506 | */ |
504 | static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data) | 507 | static int nfs_init_client(struct nfs_client *clp, |
508 | const struct nfs_parsed_mount_data *data) | ||
505 | { | 509 | { |
506 | int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; | ||
507 | int error; | 510 | int error; |
508 | 511 | ||
509 | if (clp->cl_cons_state == NFS_CS_READY) { | 512 | if (clp->cl_cons_state == NFS_CS_READY) { |
@@ -522,8 +525,8 @@ static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data * | |||
522 | * Create a client RPC handle for doing FSSTAT with UNIX auth only | 525 | * Create a client RPC handle for doing FSSTAT with UNIX auth only |
523 | * - RFC 2623, sec 2.3.2 | 526 | * - RFC 2623, sec 2.3.2 |
524 | */ | 527 | */ |
525 | error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans, | 528 | error = nfs_create_rpc_client(clp, data->nfs_server.protocol, |
526 | RPC_AUTH_UNIX, 0); | 529 | data->timeo, data->retrans, RPC_AUTH_UNIX, 0); |
527 | if (error < 0) | 530 | if (error < 0) |
528 | goto error; | 531 | goto error; |
529 | nfs_mark_client_ready(clp, NFS_CS_READY); | 532 | nfs_mark_client_ready(clp, NFS_CS_READY); |
@@ -538,7 +541,8 @@ error: | |||
538 | /* | 541 | /* |
539 | * Create a version 2 or 3 client | 542 | * Create a version 2 or 3 client |
540 | */ | 543 | */ |
541 | static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data) | 544 | static int nfs_init_server(struct nfs_server *server, |
545 | const struct nfs_parsed_mount_data *data) | ||
542 | { | 546 | { |
543 | struct nfs_client *clp; | 547 | struct nfs_client *clp; |
544 | int error, nfsvers = 2; | 548 | int error, nfsvers = 2; |
@@ -551,7 +555,8 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat | |||
551 | #endif | 555 | #endif |
552 | 556 | ||
553 | /* Allocate or find a client reference we can use */ | 557 | /* Allocate or find a client reference we can use */ |
554 | clp = nfs_get_client(data->hostname, &data->addr, nfsvers); | 558 | clp = nfs_get_client(data->nfs_server.hostname, |
559 | &data->nfs_server.address, nfsvers); | ||
555 | if (IS_ERR(clp)) { | 560 | if (IS_ERR(clp)) { |
556 | dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); | 561 | dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); |
557 | return PTR_ERR(clp); | 562 | return PTR_ERR(clp); |
@@ -581,7 +586,7 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat | |||
581 | if (error < 0) | 586 | if (error < 0) |
582 | goto error; | 587 | goto error; |
583 | 588 | ||
584 | error = nfs_init_server_rpcclient(server, data->pseudoflavor); | 589 | error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); |
585 | if (error < 0) | 590 | if (error < 0) |
586 | goto error; | 591 | goto error; |
587 | 592 | ||
@@ -760,7 +765,7 @@ void nfs_free_server(struct nfs_server *server) | |||
760 | * Create a version 2 or 3 volume record | 765 | * Create a version 2 or 3 volume record |
761 | * - keyed on server and FSID | 766 | * - keyed on server and FSID |
762 | */ | 767 | */ |
763 | struct nfs_server *nfs_create_server(const struct nfs_mount_data *data, | 768 | struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, |
764 | struct nfs_fh *mntfh) | 769 | struct nfs_fh *mntfh) |
765 | { | 770 | { |
766 | struct nfs_server *server; | 771 | struct nfs_server *server; |
@@ -906,7 +911,7 @@ error: | |||
906 | * Create a version 4 volume record | 911 | * Create a version 4 volume record |
907 | */ | 912 | */ |
908 | static int nfs4_init_server(struct nfs_server *server, | 913 | static int nfs4_init_server(struct nfs_server *server, |
909 | const struct nfs4_mount_data *data, rpc_authflavor_t authflavour) | 914 | const struct nfs_parsed_mount_data *data) |
910 | { | 915 | { |
911 | int error; | 916 | int error; |
912 | 917 | ||
@@ -926,7 +931,7 @@ static int nfs4_init_server(struct nfs_server *server, | |||
926 | server->acdirmin = data->acdirmin * HZ; | 931 | server->acdirmin = data->acdirmin * HZ; |
927 | server->acdirmax = data->acdirmax * HZ; | 932 | server->acdirmax = data->acdirmax * HZ; |
928 | 933 | ||
929 | error = nfs_init_server_rpcclient(server, authflavour); | 934 | error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); |
930 | 935 | ||
931 | /* Done */ | 936 | /* Done */ |
932 | dprintk("<-- nfs4_init_server() = %d\n", error); | 937 | dprintk("<-- nfs4_init_server() = %d\n", error); |
@@ -937,12 +942,7 @@ static int nfs4_init_server(struct nfs_server *server, | |||
937 | * Create a version 4 volume record | 942 | * Create a version 4 volume record |
938 | * - keyed on server and FSID | 943 | * - keyed on server and FSID |
939 | */ | 944 | */ |
940 | struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, | 945 | struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, |
941 | const char *hostname, | ||
942 | const struct sockaddr_in *addr, | ||
943 | const char *mntpath, | ||
944 | const char *ip_addr, | ||
945 | rpc_authflavor_t authflavour, | ||
946 | struct nfs_fh *mntfh) | 946 | struct nfs_fh *mntfh) |
947 | { | 947 | { |
948 | struct nfs_fattr fattr; | 948 | struct nfs_fattr fattr; |
@@ -956,13 +956,18 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, | |||
956 | return ERR_PTR(-ENOMEM); | 956 | return ERR_PTR(-ENOMEM); |
957 | 957 | ||
958 | /* Get a client record */ | 958 | /* Get a client record */ |
959 | error = nfs4_set_client(server, hostname, addr, ip_addr, authflavour, | 959 | error = nfs4_set_client(server, |
960 | data->proto, data->timeo, data->retrans); | 960 | data->nfs_server.hostname, |
961 | &data->nfs_server.address, | ||
962 | data->client_address, | ||
963 | data->auth_flavors[0], | ||
964 | data->nfs_server.protocol, | ||
965 | data->timeo, data->retrans); | ||
961 | if (error < 0) | 966 | if (error < 0) |
962 | goto error; | 967 | goto error; |
963 | 968 | ||
964 | /* set up the general RPC client */ | 969 | /* set up the general RPC client */ |
965 | error = nfs4_init_server(server, data, authflavour); | 970 | error = nfs4_init_server(server, data); |
966 | if (error < 0) | 971 | if (error < 0) |
967 | goto error; | 972 | goto error; |
968 | 973 | ||
@@ -971,7 +976,7 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, | |||
971 | BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); | 976 | BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); |
972 | 977 | ||
973 | /* Probe the root fh to retrieve its FSID */ | 978 | /* Probe the root fh to retrieve its FSID */ |
974 | error = nfs4_path_walk(server, mntfh, mntpath); | 979 | error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); |
975 | if (error < 0) | 980 | if (error < 0) |
976 | goto error; | 981 | goto error; |
977 | 982 | ||
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index c55a761c22bb..af8b235d405d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ | |||
52 | for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { | 52 | for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { |
53 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | 53 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) |
54 | continue; | 54 | continue; |
55 | if ((struct nfs_open_context *)fl->fl_file->private_data != ctx) | 55 | if (nfs_file_open_context(fl->fl_file) != ctx) |
56 | continue; | 56 | continue; |
57 | status = nfs4_lock_delegation_recall(state, fl); | 57 | status = nfs4_lock_delegation_recall(state, fl); |
58 | if (status >= 0) | 58 | if (status >= 0) |
@@ -109,6 +109,7 @@ again: | |||
109 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) | 109 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) |
110 | { | 110 | { |
111 | struct nfs_delegation *delegation = NFS_I(inode)->delegation; | 111 | struct nfs_delegation *delegation = NFS_I(inode)->delegation; |
112 | struct rpc_cred *oldcred; | ||
112 | 113 | ||
113 | if (delegation == NULL) | 114 | if (delegation == NULL) |
114 | return; | 115 | return; |
@@ -116,11 +117,12 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st | |||
116 | sizeof(delegation->stateid.data)); | 117 | sizeof(delegation->stateid.data)); |
117 | delegation->type = res->delegation_type; | 118 | delegation->type = res->delegation_type; |
118 | delegation->maxsize = res->maxsize; | 119 | delegation->maxsize = res->maxsize; |
119 | put_rpccred(cred); | 120 | oldcred = delegation->cred; |
120 | delegation->cred = get_rpccred(cred); | 121 | delegation->cred = get_rpccred(cred); |
121 | delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; | 122 | delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; |
122 | NFS_I(inode)->delegation_state = delegation->type; | 123 | NFS_I(inode)->delegation_state = delegation->type; |
123 | smp_wmb(); | 124 | smp_wmb(); |
125 | put_rpccred(oldcred); | ||
124 | } | 126 | } |
125 | 127 | ||
126 | /* | 128 | /* |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e4a04d16b8b0..8ec7fbd8240c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -200,9 +200,6 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
200 | desc->timestamp = timestamp; | 200 | desc->timestamp = timestamp; |
201 | desc->timestamp_valid = 1; | 201 | desc->timestamp_valid = 1; |
202 | SetPageUptodate(page); | 202 | SetPageUptodate(page); |
203 | spin_lock(&inode->i_lock); | ||
204 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; | ||
205 | spin_unlock(&inode->i_lock); | ||
206 | /* Ensure consistent page alignment of the data. | 203 | /* Ensure consistent page alignment of the data. |
207 | * Note: assumes we have exclusive access to this mapping either | 204 | * Note: assumes we have exclusive access to this mapping either |
208 | * through inode->i_mutex or some other mechanism. | 205 | * through inode->i_mutex or some other mechanism. |
@@ -214,9 +211,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
214 | unlock_page(page); | 211 | unlock_page(page); |
215 | return 0; | 212 | return 0; |
216 | error: | 213 | error: |
217 | SetPageError(page); | ||
218 | unlock_page(page); | 214 | unlock_page(page); |
219 | nfs_zap_caches(inode); | ||
220 | desc->error = error; | 215 | desc->error = error; |
221 | return -EIO; | 216 | return -EIO; |
222 | } | 217 | } |
@@ -407,7 +402,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
407 | struct file *file = desc->file; | 402 | struct file *file = desc->file; |
408 | struct nfs_entry *entry = desc->entry; | 403 | struct nfs_entry *entry = desc->entry; |
409 | struct dentry *dentry = NULL; | 404 | struct dentry *dentry = NULL; |
410 | unsigned long fileid; | 405 | u64 fileid; |
411 | int loop_count = 0, | 406 | int loop_count = 0, |
412 | res; | 407 | res; |
413 | 408 | ||
@@ -418,7 +413,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
418 | unsigned d_type = DT_UNKNOWN; | 413 | unsigned d_type = DT_UNKNOWN; |
419 | /* Note: entry->prev_cookie contains the cookie for | 414 | /* Note: entry->prev_cookie contains the cookie for |
420 | * retrieving the current dirent on the server */ | 415 | * retrieving the current dirent on the server */ |
421 | fileid = nfs_fileid_to_ino_t(entry->ino); | 416 | fileid = entry->ino; |
422 | 417 | ||
423 | /* Get a dentry if we have one */ | 418 | /* Get a dentry if we have one */ |
424 | if (dentry != NULL) | 419 | if (dentry != NULL) |
@@ -428,11 +423,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
428 | /* Use readdirplus info */ | 423 | /* Use readdirplus info */ |
429 | if (dentry != NULL && dentry->d_inode != NULL) { | 424 | if (dentry != NULL && dentry->d_inode != NULL) { |
430 | d_type = dt_type(dentry->d_inode); | 425 | d_type = dt_type(dentry->d_inode); |
431 | fileid = dentry->d_inode->i_ino; | 426 | fileid = NFS_FILEID(dentry->d_inode); |
432 | } | 427 | } |
433 | 428 | ||
434 | res = filldir(dirent, entry->name, entry->len, | 429 | res = filldir(dirent, entry->name, entry->len, |
435 | file->f_pos, fileid, d_type); | 430 | file->f_pos, nfs_compat_user_ino64(fileid), |
431 | d_type); | ||
436 | if (res < 0) | 432 | if (res < 0) |
437 | break; | 433 | break; |
438 | file->f_pos++; | 434 | file->f_pos++; |
@@ -490,9 +486,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
490 | page, | 486 | page, |
491 | NFS_SERVER(inode)->dtsize, | 487 | NFS_SERVER(inode)->dtsize, |
492 | desc->plus); | 488 | desc->plus); |
493 | spin_lock(&inode->i_lock); | ||
494 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; | ||
495 | spin_unlock(&inode->i_lock); | ||
496 | desc->page = page; | 489 | desc->page = page; |
497 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ | 490 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ |
498 | if (desc->error >= 0) { | 491 | if (desc->error >= 0) { |
@@ -558,7 +551,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
558 | memset(desc, 0, sizeof(*desc)); | 551 | memset(desc, 0, sizeof(*desc)); |
559 | 552 | ||
560 | desc->file = filp; | 553 | desc->file = filp; |
561 | desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; | 554 | desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; |
562 | desc->decode = NFS_PROTO(inode)->decode_dirent; | 555 | desc->decode = NFS_PROTO(inode)->decode_dirent; |
563 | desc->plus = NFS_USE_READDIRPLUS(inode); | 556 | desc->plus = NFS_USE_READDIRPLUS(inode); |
564 | 557 | ||
@@ -623,7 +616,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) | |||
623 | } | 616 | } |
624 | if (offset != filp->f_pos) { | 617 | if (offset != filp->f_pos) { |
625 | filp->f_pos = offset; | 618 | filp->f_pos = offset; |
626 | ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; | 619 | nfs_file_open_context(filp)->dir_cookie = 0; |
627 | } | 620 | } |
628 | out: | 621 | out: |
629 | mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); | 622 | mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); |
@@ -650,36 +643,18 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) | |||
650 | */ | 643 | */ |
651 | static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) | 644 | static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) |
652 | { | 645 | { |
653 | unsigned long verf; | ||
654 | |||
655 | if (IS_ROOT(dentry)) | 646 | if (IS_ROOT(dentry)) |
656 | return 1; | 647 | return 1; |
657 | verf = dentry->d_time; | 648 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) |
658 | if (nfs_caches_unstable(dir) | 649 | return 0; |
659 | || verf != NFS_I(dir)->cache_change_attribute) | 650 | /* Revalidate nfsi->cache_change_attribute before we declare a match */ |
651 | if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) | ||
652 | return 0; | ||
653 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) | ||
660 | return 0; | 654 | return 0; |
661 | return 1; | 655 | return 1; |
662 | } | 656 | } |
663 | 657 | ||
664 | static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) | ||
665 | { | ||
666 | dentry->d_time = verf; | ||
667 | } | ||
668 | |||
669 | static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf) | ||
670 | { | ||
671 | nfs_set_verifier(dentry, verf); | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | * Whenever an NFS operation succeeds, we know that the dentry | ||
676 | * is valid, so we update the revalidation timestamp. | ||
677 | */ | ||
678 | static inline void nfs_renew_times(struct dentry * dentry) | ||
679 | { | ||
680 | dentry->d_time = jiffies; | ||
681 | } | ||
682 | |||
683 | /* | 658 | /* |
684 | * Return the intent data that applies to this particular path component | 659 | * Return the intent data that applies to this particular path component |
685 | * | 660 | * |
@@ -695,6 +670,19 @@ static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigne | |||
695 | } | 670 | } |
696 | 671 | ||
697 | /* | 672 | /* |
673 | * Use intent information to check whether or not we're going to do | ||
674 | * an O_EXCL create using this path component. | ||
675 | */ | ||
676 | static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) | ||
677 | { | ||
678 | if (NFS_PROTO(dir)->version == 2) | ||
679 | return 0; | ||
680 | if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) | ||
681 | return 0; | ||
682 | return (nd->intent.open.flags & O_EXCL) != 0; | ||
683 | } | ||
684 | |||
685 | /* | ||
698 | * Inode and filehandle revalidation for lookups. | 686 | * Inode and filehandle revalidation for lookups. |
699 | * | 687 | * |
700 | * We force revalidation in the cases where the VFS sets LOOKUP_REVAL, | 688 | * We force revalidation in the cases where the VFS sets LOOKUP_REVAL, |
@@ -717,6 +705,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) | |||
717 | (S_ISREG(inode->i_mode) || | 705 | (S_ISREG(inode->i_mode) || |
718 | S_ISDIR(inode->i_mode))) | 706 | S_ISDIR(inode->i_mode))) |
719 | goto out_force; | 707 | goto out_force; |
708 | return 0; | ||
720 | } | 709 | } |
721 | return nfs_revalidate_inode(server, inode); | 710 | return nfs_revalidate_inode(server, inode); |
722 | out_force: | 711 | out_force: |
@@ -759,7 +748,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
759 | int error; | 748 | int error; |
760 | struct nfs_fh fhandle; | 749 | struct nfs_fh fhandle; |
761 | struct nfs_fattr fattr; | 750 | struct nfs_fattr fattr; |
762 | unsigned long verifier; | ||
763 | 751 | ||
764 | parent = dget_parent(dentry); | 752 | parent = dget_parent(dentry); |
765 | lock_kernel(); | 753 | lock_kernel(); |
@@ -767,10 +755,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
767 | nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); | 755 | nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); |
768 | inode = dentry->d_inode; | 756 | inode = dentry->d_inode; |
769 | 757 | ||
770 | /* Revalidate parent directory attribute cache */ | ||
771 | if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) | ||
772 | goto out_zap_parent; | ||
773 | |||
774 | if (!inode) { | 758 | if (!inode) { |
775 | if (nfs_neg_need_reval(dir, dentry, nd)) | 759 | if (nfs_neg_need_reval(dir, dentry, nd)) |
776 | goto out_bad; | 760 | goto out_bad; |
@@ -785,7 +769,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
785 | } | 769 | } |
786 | 770 | ||
787 | /* Force a full look up iff the parent directory has changed */ | 771 | /* Force a full look up iff the parent directory has changed */ |
788 | if (nfs_check_verifier(dir, dentry)) { | 772 | if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { |
789 | if (nfs_lookup_verify_inode(inode, nd)) | 773 | if (nfs_lookup_verify_inode(inode, nd)) |
790 | goto out_zap_parent; | 774 | goto out_zap_parent; |
791 | goto out_valid; | 775 | goto out_valid; |
@@ -794,7 +778,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
794 | if (NFS_STALE(inode)) | 778 | if (NFS_STALE(inode)) |
795 | goto out_bad; | 779 | goto out_bad; |
796 | 780 | ||
797 | verifier = nfs_save_change_attribute(dir); | ||
798 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); | 781 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); |
799 | if (error) | 782 | if (error) |
800 | goto out_bad; | 783 | goto out_bad; |
@@ -803,8 +786,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
803 | if ((error = nfs_refresh_inode(inode, &fattr)) != 0) | 786 | if ((error = nfs_refresh_inode(inode, &fattr)) != 0) |
804 | goto out_bad; | 787 | goto out_bad; |
805 | 788 | ||
806 | nfs_renew_times(dentry); | 789 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
807 | nfs_refresh_verifier(dentry, verifier); | ||
808 | out_valid: | 790 | out_valid: |
809 | unlock_kernel(); | 791 | unlock_kernel(); |
810 | dput(parent); | 792 | dput(parent); |
@@ -815,7 +797,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
815 | out_zap_parent: | 797 | out_zap_parent: |
816 | nfs_zap_caches(dir); | 798 | nfs_zap_caches(dir); |
817 | out_bad: | 799 | out_bad: |
818 | NFS_CACHEINV(dir); | 800 | nfs_mark_for_revalidate(dir); |
819 | if (inode && S_ISDIR(inode->i_mode)) { | 801 | if (inode && S_ISDIR(inode->i_mode)) { |
820 | /* Purge readdir caches. */ | 802 | /* Purge readdir caches. */ |
821 | nfs_zap_caches(inode); | 803 | nfs_zap_caches(inode); |
@@ -872,8 +854,6 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) | |||
872 | nfs_complete_unlink(dentry, inode); | 854 | nfs_complete_unlink(dentry, inode); |
873 | unlock_kernel(); | 855 | unlock_kernel(); |
874 | } | 856 | } |
875 | /* When creating a negative dentry, we want to renew d_time */ | ||
876 | nfs_renew_times(dentry); | ||
877 | iput(inode); | 857 | iput(inode); |
878 | } | 858 | } |
879 | 859 | ||
@@ -883,30 +863,6 @@ struct dentry_operations nfs_dentry_operations = { | |||
883 | .d_iput = nfs_dentry_iput, | 863 | .d_iput = nfs_dentry_iput, |
884 | }; | 864 | }; |
885 | 865 | ||
886 | /* | ||
887 | * Use intent information to check whether or not we're going to do | ||
888 | * an O_EXCL create using this path component. | ||
889 | */ | ||
890 | static inline | ||
891 | int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) | ||
892 | { | ||
893 | if (NFS_PROTO(dir)->version == 2) | ||
894 | return 0; | ||
895 | if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) | ||
896 | return 0; | ||
897 | return (nd->intent.open.flags & O_EXCL) != 0; | ||
898 | } | ||
899 | |||
900 | static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr) | ||
901 | { | ||
902 | struct nfs_server *server = NFS_SERVER(dir); | ||
903 | |||
904 | if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) | ||
905 | /* Revalidate fsid using the parent directory */ | ||
906 | return __nfs_revalidate_inode(server, dir); | ||
907 | return 0; | ||
908 | } | ||
909 | |||
910 | static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 866 | static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
911 | { | 867 | { |
912 | struct dentry *res; | 868 | struct dentry *res; |
@@ -945,11 +901,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru | |||
945 | res = ERR_PTR(error); | 901 | res = ERR_PTR(error); |
946 | goto out_unlock; | 902 | goto out_unlock; |
947 | } | 903 | } |
948 | error = nfs_reval_fsid(dir, &fattr); | ||
949 | if (error < 0) { | ||
950 | res = ERR_PTR(error); | ||
951 | goto out_unlock; | ||
952 | } | ||
953 | inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); | 904 | inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); |
954 | res = (struct dentry *)inode; | 905 | res = (struct dentry *)inode; |
955 | if (IS_ERR(res)) | 906 | if (IS_ERR(res)) |
@@ -958,17 +909,10 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru | |||
958 | no_entry: | 909 | no_entry: |
959 | res = d_materialise_unique(dentry, inode); | 910 | res = d_materialise_unique(dentry, inode); |
960 | if (res != NULL) { | 911 | if (res != NULL) { |
961 | struct dentry *parent; | ||
962 | if (IS_ERR(res)) | 912 | if (IS_ERR(res)) |
963 | goto out_unlock; | 913 | goto out_unlock; |
964 | /* Was a directory renamed! */ | ||
965 | parent = dget_parent(res); | ||
966 | if (!IS_ROOT(parent)) | ||
967 | nfs_mark_for_revalidate(parent->d_inode); | ||
968 | dput(parent); | ||
969 | dentry = res; | 914 | dentry = res; |
970 | } | 915 | } |
971 | nfs_renew_times(dentry); | ||
972 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 916 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
973 | out_unlock: | 917 | out_unlock: |
974 | unlock_kernel(); | 918 | unlock_kernel(); |
@@ -1020,28 +964,16 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1020 | } | 964 | } |
1021 | dentry->d_op = NFS_PROTO(dir)->dentry_ops; | 965 | dentry->d_op = NFS_PROTO(dir)->dentry_ops; |
1022 | 966 | ||
1023 | /* Let vfs_create() deal with O_EXCL */ | 967 | /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash |
968 | * the dentry. */ | ||
1024 | if (nd->intent.open.flags & O_EXCL) { | 969 | if (nd->intent.open.flags & O_EXCL) { |
1025 | d_add(dentry, NULL); | 970 | d_instantiate(dentry, NULL); |
1026 | goto out; | 971 | goto out; |
1027 | } | 972 | } |
1028 | 973 | ||
1029 | /* Open the file on the server */ | 974 | /* Open the file on the server */ |
1030 | lock_kernel(); | 975 | lock_kernel(); |
1031 | /* Revalidate parent directory attribute cache */ | 976 | res = nfs4_atomic_open(dir, dentry, nd); |
1032 | error = nfs_revalidate_inode(NFS_SERVER(dir), dir); | ||
1033 | if (error < 0) { | ||
1034 | res = ERR_PTR(error); | ||
1035 | unlock_kernel(); | ||
1036 | goto out; | ||
1037 | } | ||
1038 | |||
1039 | if (nd->intent.open.flags & O_CREAT) { | ||
1040 | nfs_begin_data_update(dir); | ||
1041 | res = nfs4_atomic_open(dir, dentry, nd); | ||
1042 | nfs_end_data_update(dir); | ||
1043 | } else | ||
1044 | res = nfs4_atomic_open(dir, dentry, nd); | ||
1045 | unlock_kernel(); | 977 | unlock_kernel(); |
1046 | if (IS_ERR(res)) { | 978 | if (IS_ERR(res)) { |
1047 | error = PTR_ERR(res); | 979 | error = PTR_ERR(res); |
@@ -1063,8 +995,6 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1063 | } | 995 | } |
1064 | } else if (res != NULL) | 996 | } else if (res != NULL) |
1065 | dentry = res; | 997 | dentry = res; |
1066 | nfs_renew_times(dentry); | ||
1067 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1068 | out: | 998 | out: |
1069 | return res; | 999 | return res; |
1070 | no_open: | 1000 | no_open: |
@@ -1076,7 +1006,6 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1076 | struct dentry *parent = NULL; | 1006 | struct dentry *parent = NULL; |
1077 | struct inode *inode = dentry->d_inode; | 1007 | struct inode *inode = dentry->d_inode; |
1078 | struct inode *dir; | 1008 | struct inode *dir; |
1079 | unsigned long verifier; | ||
1080 | int openflags, ret = 0; | 1009 | int openflags, ret = 0; |
1081 | 1010 | ||
1082 | parent = dget_parent(dentry); | 1011 | parent = dget_parent(dentry); |
@@ -1086,8 +1015,12 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1086 | /* We can't create new files in nfs_open_revalidate(), so we | 1015 | /* We can't create new files in nfs_open_revalidate(), so we |
1087 | * optimize away revalidation of negative dentries. | 1016 | * optimize away revalidation of negative dentries. |
1088 | */ | 1017 | */ |
1089 | if (inode == NULL) | 1018 | if (inode == NULL) { |
1019 | if (!nfs_neg_need_reval(dir, dentry, nd)) | ||
1020 | ret = 1; | ||
1090 | goto out; | 1021 | goto out; |
1022 | } | ||
1023 | |||
1091 | /* NFS only supports OPEN on regular files */ | 1024 | /* NFS only supports OPEN on regular files */ |
1092 | if (!S_ISREG(inode->i_mode)) | 1025 | if (!S_ISREG(inode->i_mode)) |
1093 | goto no_open; | 1026 | goto no_open; |
@@ -1104,10 +1037,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1104 | * change attribute *before* we do the RPC call. | 1037 | * change attribute *before* we do the RPC call. |
1105 | */ | 1038 | */ |
1106 | lock_kernel(); | 1039 | lock_kernel(); |
1107 | verifier = nfs_save_change_attribute(dir); | ||
1108 | ret = nfs4_open_revalidate(dir, dentry, openflags, nd); | 1040 | ret = nfs4_open_revalidate(dir, dentry, openflags, nd); |
1109 | if (!ret) | ||
1110 | nfs_refresh_verifier(dentry, verifier); | ||
1111 | unlock_kernel(); | 1041 | unlock_kernel(); |
1112 | out: | 1042 | out: |
1113 | dput(parent); | 1043 | dput(parent); |
@@ -1133,6 +1063,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) | |||
1133 | .len = entry->len, | 1063 | .len = entry->len, |
1134 | }; | 1064 | }; |
1135 | struct inode *inode; | 1065 | struct inode *inode; |
1066 | unsigned long verf = nfs_save_change_attribute(dir); | ||
1136 | 1067 | ||
1137 | switch (name.len) { | 1068 | switch (name.len) { |
1138 | case 2: | 1069 | case 2: |
@@ -1143,6 +1074,14 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) | |||
1143 | if (name.name[0] == '.') | 1074 | if (name.name[0] == '.') |
1144 | return dget(parent); | 1075 | return dget(parent); |
1145 | } | 1076 | } |
1077 | |||
1078 | spin_lock(&dir->i_lock); | ||
1079 | if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { | ||
1080 | spin_unlock(&dir->i_lock); | ||
1081 | return NULL; | ||
1082 | } | ||
1083 | spin_unlock(&dir->i_lock); | ||
1084 | |||
1146 | name.hash = full_name_hash(name.name, name.len); | 1085 | name.hash = full_name_hash(name.name, name.len); |
1147 | dentry = d_lookup(parent, &name); | 1086 | dentry = d_lookup(parent, &name); |
1148 | if (dentry != NULL) { | 1087 | if (dentry != NULL) { |
@@ -1183,12 +1122,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) | |||
1183 | dentry = alias; | 1122 | dentry = alias; |
1184 | } | 1123 | } |
1185 | 1124 | ||
1186 | nfs_renew_times(dentry); | ||
1187 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1188 | return dentry; | ||
1189 | out_renew: | 1125 | out_renew: |
1190 | nfs_renew_times(dentry); | 1126 | nfs_set_verifier(dentry, verf); |
1191 | nfs_refresh_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1192 | return dentry; | 1127 | return dentry; |
1193 | } | 1128 | } |
1194 | 1129 | ||
@@ -1198,32 +1133,40 @@ out_renew: | |||
1198 | int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | 1133 | int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, |
1199 | struct nfs_fattr *fattr) | 1134 | struct nfs_fattr *fattr) |
1200 | { | 1135 | { |
1136 | struct dentry *parent = dget_parent(dentry); | ||
1137 | struct inode *dir = parent->d_inode; | ||
1201 | struct inode *inode; | 1138 | struct inode *inode; |
1202 | int error = -EACCES; | 1139 | int error = -EACCES; |
1203 | 1140 | ||
1141 | d_drop(dentry); | ||
1142 | |||
1204 | /* We may have been initialized further down */ | 1143 | /* We may have been initialized further down */ |
1205 | if (dentry->d_inode) | 1144 | if (dentry->d_inode) |
1206 | return 0; | 1145 | goto out; |
1207 | if (fhandle->size == 0) { | 1146 | if (fhandle->size == 0) { |
1208 | struct inode *dir = dentry->d_parent->d_inode; | ||
1209 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1147 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); |
1210 | if (error) | 1148 | if (error) |
1211 | return error; | 1149 | goto out_error; |
1212 | } | 1150 | } |
1151 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1213 | if (!(fattr->valid & NFS_ATTR_FATTR)) { | 1152 | if (!(fattr->valid & NFS_ATTR_FATTR)) { |
1214 | struct nfs_server *server = NFS_SB(dentry->d_sb); | 1153 | struct nfs_server *server = NFS_SB(dentry->d_sb); |
1215 | error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); | 1154 | error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); |
1216 | if (error < 0) | 1155 | if (error < 0) |
1217 | return error; | 1156 | goto out_error; |
1218 | } | 1157 | } |
1219 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr); | 1158 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr); |
1220 | error = PTR_ERR(inode); | 1159 | error = PTR_ERR(inode); |
1221 | if (IS_ERR(inode)) | 1160 | if (IS_ERR(inode)) |
1222 | return error; | 1161 | goto out_error; |
1223 | d_instantiate(dentry, inode); | 1162 | d_add(dentry, inode); |
1224 | if (d_unhashed(dentry)) | 1163 | out: |
1225 | d_rehash(dentry); | 1164 | dput(parent); |
1226 | return 0; | 1165 | return 0; |
1166 | out_error: | ||
1167 | nfs_mark_for_revalidate(dir); | ||
1168 | dput(parent); | ||
1169 | return error; | ||
1227 | } | 1170 | } |
1228 | 1171 | ||
1229 | /* | 1172 | /* |
@@ -1249,13 +1192,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1249 | open_flags = nd->intent.open.flags; | 1192 | open_flags = nd->intent.open.flags; |
1250 | 1193 | ||
1251 | lock_kernel(); | 1194 | lock_kernel(); |
1252 | nfs_begin_data_update(dir); | ||
1253 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); | 1195 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); |
1254 | nfs_end_data_update(dir); | ||
1255 | if (error != 0) | 1196 | if (error != 0) |
1256 | goto out_err; | 1197 | goto out_err; |
1257 | nfs_renew_times(dentry); | ||
1258 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1259 | unlock_kernel(); | 1198 | unlock_kernel(); |
1260 | return 0; | 1199 | return 0; |
1261 | out_err: | 1200 | out_err: |
@@ -1283,13 +1222,9 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | |||
1283 | attr.ia_valid = ATTR_MODE; | 1222 | attr.ia_valid = ATTR_MODE; |
1284 | 1223 | ||
1285 | lock_kernel(); | 1224 | lock_kernel(); |
1286 | nfs_begin_data_update(dir); | ||
1287 | status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); | 1225 | status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); |
1288 | nfs_end_data_update(dir); | ||
1289 | if (status != 0) | 1226 | if (status != 0) |
1290 | goto out_err; | 1227 | goto out_err; |
1291 | nfs_renew_times(dentry); | ||
1292 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1293 | unlock_kernel(); | 1228 | unlock_kernel(); |
1294 | return 0; | 1229 | return 0; |
1295 | out_err: | 1230 | out_err: |
@@ -1313,13 +1248,9 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1313 | attr.ia_mode = mode | S_IFDIR; | 1248 | attr.ia_mode = mode | S_IFDIR; |
1314 | 1249 | ||
1315 | lock_kernel(); | 1250 | lock_kernel(); |
1316 | nfs_begin_data_update(dir); | ||
1317 | error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); | 1251 | error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); |
1318 | nfs_end_data_update(dir); | ||
1319 | if (error != 0) | 1252 | if (error != 0) |
1320 | goto out_err; | 1253 | goto out_err; |
1321 | nfs_renew_times(dentry); | ||
1322 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1323 | unlock_kernel(); | 1254 | unlock_kernel(); |
1324 | return 0; | 1255 | return 0; |
1325 | out_err: | 1256 | out_err: |
@@ -1336,12 +1267,10 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1336 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); | 1267 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); |
1337 | 1268 | ||
1338 | lock_kernel(); | 1269 | lock_kernel(); |
1339 | nfs_begin_data_update(dir); | ||
1340 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | 1270 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); |
1341 | /* Ensure the VFS deletes this inode */ | 1271 | /* Ensure the VFS deletes this inode */ |
1342 | if (error == 0 && dentry->d_inode != NULL) | 1272 | if (error == 0 && dentry->d_inode != NULL) |
1343 | clear_nlink(dentry->d_inode); | 1273 | clear_nlink(dentry->d_inode); |
1344 | nfs_end_data_update(dir); | ||
1345 | unlock_kernel(); | 1274 | unlock_kernel(); |
1346 | 1275 | ||
1347 | return error; | 1276 | return error; |
@@ -1350,9 +1279,9 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1350 | static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) | 1279 | static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) |
1351 | { | 1280 | { |
1352 | static unsigned int sillycounter; | 1281 | static unsigned int sillycounter; |
1353 | const int i_inosize = sizeof(dir->i_ino)*2; | 1282 | const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; |
1354 | const int countersize = sizeof(sillycounter)*2; | 1283 | const int countersize = sizeof(sillycounter)*2; |
1355 | const int slen = sizeof(".nfs") + i_inosize + countersize - 1; | 1284 | const int slen = sizeof(".nfs")+fileidsize+countersize-1; |
1356 | char silly[slen+1]; | 1285 | char silly[slen+1]; |
1357 | struct qstr qsilly; | 1286 | struct qstr qsilly; |
1358 | struct dentry *sdentry; | 1287 | struct dentry *sdentry; |
@@ -1370,8 +1299,9 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) | |||
1370 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) | 1299 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) |
1371 | goto out; | 1300 | goto out; |
1372 | 1301 | ||
1373 | sprintf(silly, ".nfs%*.*lx", | 1302 | sprintf(silly, ".nfs%*.*Lx", |
1374 | i_inosize, i_inosize, dentry->d_inode->i_ino); | 1303 | fileidsize, fileidsize, |
1304 | (unsigned long long)NFS_FILEID(dentry->d_inode)); | ||
1375 | 1305 | ||
1376 | /* Return delegation in anticipation of the rename */ | 1306 | /* Return delegation in anticipation of the rename */ |
1377 | nfs_inode_return_delegation(dentry->d_inode); | 1307 | nfs_inode_return_delegation(dentry->d_inode); |
@@ -1398,19 +1328,14 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) | |||
1398 | 1328 | ||
1399 | qsilly.name = silly; | 1329 | qsilly.name = silly; |
1400 | qsilly.len = strlen(silly); | 1330 | qsilly.len = strlen(silly); |
1401 | nfs_begin_data_update(dir); | ||
1402 | if (dentry->d_inode) { | 1331 | if (dentry->d_inode) { |
1403 | nfs_begin_data_update(dentry->d_inode); | ||
1404 | error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, | 1332 | error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, |
1405 | dir, &qsilly); | 1333 | dir, &qsilly); |
1406 | nfs_mark_for_revalidate(dentry->d_inode); | 1334 | nfs_mark_for_revalidate(dentry->d_inode); |
1407 | nfs_end_data_update(dentry->d_inode); | ||
1408 | } else | 1335 | } else |
1409 | error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, | 1336 | error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, |
1410 | dir, &qsilly); | 1337 | dir, &qsilly); |
1411 | nfs_end_data_update(dir); | ||
1412 | if (!error) { | 1338 | if (!error) { |
1413 | nfs_renew_times(dentry); | ||
1414 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1339 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
1415 | d_move(dentry, sdentry); | 1340 | d_move(dentry, sdentry); |
1416 | error = nfs_async_unlink(dir, dentry); | 1341 | error = nfs_async_unlink(dir, dentry); |
@@ -1443,19 +1368,15 @@ static int nfs_safe_remove(struct dentry *dentry) | |||
1443 | goto out; | 1368 | goto out; |
1444 | } | 1369 | } |
1445 | 1370 | ||
1446 | nfs_begin_data_update(dir); | ||
1447 | if (inode != NULL) { | 1371 | if (inode != NULL) { |
1448 | nfs_inode_return_delegation(inode); | 1372 | nfs_inode_return_delegation(inode); |
1449 | nfs_begin_data_update(inode); | ||
1450 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); | 1373 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); |
1451 | /* The VFS may want to delete this inode */ | 1374 | /* The VFS may want to delete this inode */ |
1452 | if (error == 0) | 1375 | if (error == 0) |
1453 | drop_nlink(inode); | 1376 | drop_nlink(inode); |
1454 | nfs_mark_for_revalidate(inode); | 1377 | nfs_mark_for_revalidate(inode); |
1455 | nfs_end_data_update(inode); | ||
1456 | } else | 1378 | } else |
1457 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); | 1379 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); |
1458 | nfs_end_data_update(dir); | ||
1459 | out: | 1380 | out: |
1460 | return error; | 1381 | return error; |
1461 | } | 1382 | } |
@@ -1493,7 +1414,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) | |||
1493 | spin_unlock(&dcache_lock); | 1414 | spin_unlock(&dcache_lock); |
1494 | error = nfs_safe_remove(dentry); | 1415 | error = nfs_safe_remove(dentry); |
1495 | if (!error) { | 1416 | if (!error) { |
1496 | nfs_renew_times(dentry); | ||
1497 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1417 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
1498 | } else if (need_rehash) | 1418 | } else if (need_rehash) |
1499 | d_rehash(dentry); | 1419 | d_rehash(dentry); |
@@ -1548,9 +1468,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym | |||
1548 | memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); | 1468 | memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); |
1549 | kunmap_atomic(kaddr, KM_USER0); | 1469 | kunmap_atomic(kaddr, KM_USER0); |
1550 | 1470 | ||
1551 | nfs_begin_data_update(dir); | ||
1552 | error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); | 1471 | error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); |
1553 | nfs_end_data_update(dir); | ||
1554 | if (error != 0) { | 1472 | if (error != 0) { |
1555 | dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", | 1473 | dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", |
1556 | dir->i_sb->s_id, dir->i_ino, | 1474 | dir->i_sb->s_id, dir->i_ino, |
@@ -1590,15 +1508,12 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) | |||
1590 | dentry->d_parent->d_name.name, dentry->d_name.name); | 1508 | dentry->d_parent->d_name.name, dentry->d_name.name); |
1591 | 1509 | ||
1592 | lock_kernel(); | 1510 | lock_kernel(); |
1593 | nfs_begin_data_update(dir); | 1511 | d_drop(dentry); |
1594 | nfs_begin_data_update(inode); | ||
1595 | error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); | 1512 | error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); |
1596 | if (error == 0) { | 1513 | if (error == 0) { |
1597 | atomic_inc(&inode->i_count); | 1514 | atomic_inc(&inode->i_count); |
1598 | d_instantiate(dentry, inode); | 1515 | d_add(dentry, inode); |
1599 | } | 1516 | } |
1600 | nfs_end_data_update(inode); | ||
1601 | nfs_end_data_update(dir); | ||
1602 | unlock_kernel(); | 1517 | unlock_kernel(); |
1603 | return error; | 1518 | return error; |
1604 | } | 1519 | } |
@@ -1701,22 +1616,16 @@ go_ahead: | |||
1701 | d_delete(new_dentry); | 1616 | d_delete(new_dentry); |
1702 | } | 1617 | } |
1703 | 1618 | ||
1704 | nfs_begin_data_update(old_dir); | ||
1705 | nfs_begin_data_update(new_dir); | ||
1706 | nfs_begin_data_update(old_inode); | ||
1707 | error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, | 1619 | error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, |
1708 | new_dir, &new_dentry->d_name); | 1620 | new_dir, &new_dentry->d_name); |
1709 | nfs_mark_for_revalidate(old_inode); | 1621 | nfs_mark_for_revalidate(old_inode); |
1710 | nfs_end_data_update(old_inode); | ||
1711 | nfs_end_data_update(new_dir); | ||
1712 | nfs_end_data_update(old_dir); | ||
1713 | out: | 1622 | out: |
1714 | if (rehash) | 1623 | if (rehash) |
1715 | d_rehash(rehash); | 1624 | d_rehash(rehash); |
1716 | if (!error) { | 1625 | if (!error) { |
1717 | d_move(old_dentry, new_dentry); | 1626 | d_move(old_dentry, new_dentry); |
1718 | nfs_renew_times(new_dentry); | 1627 | nfs_set_verifier(new_dentry, |
1719 | nfs_refresh_verifier(new_dentry, nfs_save_change_attribute(new_dir)); | 1628 | nfs_save_change_attribute(new_dir)); |
1720 | } | 1629 | } |
1721 | 1630 | ||
1722 | /* new dentry created? */ | 1631 | /* new dentry created? */ |
@@ -1842,7 +1751,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st | |||
1842 | return NULL; | 1751 | return NULL; |
1843 | } | 1752 | } |
1844 | 1753 | ||
1845 | int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) | 1754 | static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) |
1846 | { | 1755 | { |
1847 | struct nfs_inode *nfsi = NFS_I(inode); | 1756 | struct nfs_inode *nfsi = NFS_I(inode); |
1848 | struct nfs_access_entry *cache; | 1757 | struct nfs_access_entry *cache; |
@@ -1854,7 +1763,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs | |||
1854 | cache = nfs_access_search_rbtree(inode, cred); | 1763 | cache = nfs_access_search_rbtree(inode, cred); |
1855 | if (cache == NULL) | 1764 | if (cache == NULL) |
1856 | goto out; | 1765 | goto out; |
1857 | if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) | 1766 | if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) |
1858 | goto out_stale; | 1767 | goto out_stale; |
1859 | res->jiffies = cache->jiffies; | 1768 | res->jiffies = cache->jiffies; |
1860 | res->cred = cache->cred; | 1769 | res->cred = cache->cred; |
@@ -1909,7 +1818,7 @@ found: | |||
1909 | nfs_access_free_entry(entry); | 1818 | nfs_access_free_entry(entry); |
1910 | } | 1819 | } |
1911 | 1820 | ||
1912 | void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) | 1821 | static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) |
1913 | { | 1822 | { |
1914 | struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); | 1823 | struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); |
1915 | if (cache == NULL) | 1824 | if (cache == NULL) |
@@ -1957,6 +1866,24 @@ out: | |||
1957 | return -EACCES; | 1866 | return -EACCES; |
1958 | } | 1867 | } |
1959 | 1868 | ||
1869 | static int nfs_open_permission_mask(int openflags) | ||
1870 | { | ||
1871 | int mask = 0; | ||
1872 | |||
1873 | if (openflags & FMODE_READ) | ||
1874 | mask |= MAY_READ; | ||
1875 | if (openflags & FMODE_WRITE) | ||
1876 | mask |= MAY_WRITE; | ||
1877 | if (openflags & FMODE_EXEC) | ||
1878 | mask |= MAY_EXEC; | ||
1879 | return mask; | ||
1880 | } | ||
1881 | |||
1882 | int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) | ||
1883 | { | ||
1884 | return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); | ||
1885 | } | ||
1886 | |||
1960 | int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) | 1887 | int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) |
1961 | { | 1888 | { |
1962 | struct rpc_cred *cred; | 1889 | struct rpc_cred *cred; |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fcf4d384610e..32fe97211eea 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -368,7 +368,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size | |||
368 | return -ENOMEM; | 368 | return -ENOMEM; |
369 | 369 | ||
370 | dreq->inode = inode; | 370 | dreq->inode = inode; |
371 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); | 371 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
372 | if (!is_sync_kiocb(iocb)) | 372 | if (!is_sync_kiocb(iocb)) |
373 | dreq->iocb = iocb; | 373 | dreq->iocb = iocb; |
374 | 374 | ||
@@ -510,7 +510,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
510 | nfs_direct_write_reschedule(dreq); | 510 | nfs_direct_write_reschedule(dreq); |
511 | break; | 511 | break; |
512 | default: | 512 | default: |
513 | nfs_end_data_update(inode); | ||
514 | if (dreq->commit_data != NULL) | 513 | if (dreq->commit_data != NULL) |
515 | nfs_commit_free(dreq->commit_data); | 514 | nfs_commit_free(dreq->commit_data); |
516 | nfs_direct_free_writedata(dreq); | 515 | nfs_direct_free_writedata(dreq); |
@@ -533,7 +532,6 @@ static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) | |||
533 | 532 | ||
534 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) | 533 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) |
535 | { | 534 | { |
536 | nfs_end_data_update(inode); | ||
537 | nfs_direct_free_writedata(dreq); | 535 | nfs_direct_free_writedata(dreq); |
538 | nfs_zap_mapping(inode, inode->i_mapping); | 536 | nfs_zap_mapping(inode, inode->i_mapping); |
539 | nfs_direct_complete(dreq); | 537 | nfs_direct_complete(dreq); |
@@ -718,14 +716,12 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
718 | sync = FLUSH_STABLE; | 716 | sync = FLUSH_STABLE; |
719 | 717 | ||
720 | dreq->inode = inode; | 718 | dreq->inode = inode; |
721 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); | 719 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
722 | if (!is_sync_kiocb(iocb)) | 720 | if (!is_sync_kiocb(iocb)) |
723 | dreq->iocb = iocb; | 721 | dreq->iocb = iocb; |
724 | 722 | ||
725 | nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); | 723 | nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); |
726 | 724 | ||
727 | nfs_begin_data_update(inode); | ||
728 | |||
729 | rpc_clnt_sigmask(clnt, &oldset); | 725 | rpc_clnt_sigmask(clnt, &oldset); |
730 | result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); | 726 | result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); |
731 | if (!result) | 727 | if (!result) |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 579cf8a7d4a7..c664bb921425 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/system.h> | 33 | #include <asm/system.h> |
34 | 34 | ||
35 | #include "delegation.h" | 35 | #include "delegation.h" |
36 | #include "internal.h" | ||
36 | #include "iostat.h" | 37 | #include "iostat.h" |
37 | 38 | ||
38 | #define NFSDBG_FACILITY NFSDBG_FILE | 39 | #define NFSDBG_FACILITY NFSDBG_FILE |
@@ -55,6 +56,8 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); | |||
55 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); | 56 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); |
56 | static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); | 57 | static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); |
57 | 58 | ||
59 | static struct vm_operations_struct nfs_file_vm_ops; | ||
60 | |||
58 | const struct file_operations nfs_file_operations = { | 61 | const struct file_operations nfs_file_operations = { |
59 | .llseek = nfs_file_llseek, | 62 | .llseek = nfs_file_llseek, |
60 | .read = do_sync_read, | 63 | .read = do_sync_read, |
@@ -174,13 +177,38 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
174 | } | 177 | } |
175 | 178 | ||
176 | /* | 179 | /* |
180 | * Helper for nfs_file_flush() and nfs_fsync() | ||
181 | * | ||
182 | * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to | ||
183 | * disk, but it retrieves and clears ctx->error after synching, despite | ||
184 | * the two being set at the same time in nfs_context_set_write_error(). | ||
185 | * This is because the former is used to notify the _next_ call to | ||
186 | * nfs_file_write() that a write error occured, and hence cause it to | ||
187 | * fall back to doing a synchronous write. | ||
188 | */ | ||
189 | static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) | ||
190 | { | ||
191 | int have_error, status; | ||
192 | int ret = 0; | ||
193 | |||
194 | have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | ||
195 | status = nfs_wb_all(inode); | ||
196 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | ||
197 | if (have_error) | ||
198 | ret = xchg(&ctx->error, 0); | ||
199 | if (!ret) | ||
200 | ret = status; | ||
201 | return ret; | ||
202 | } | ||
203 | |||
204 | /* | ||
177 | * Flush all dirty pages, and check for write errors. | 205 | * Flush all dirty pages, and check for write errors. |
178 | * | 206 | * |
179 | */ | 207 | */ |
180 | static int | 208 | static int |
181 | nfs_file_flush(struct file *file, fl_owner_t id) | 209 | nfs_file_flush(struct file *file, fl_owner_t id) |
182 | { | 210 | { |
183 | struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; | 211 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
184 | struct inode *inode = file->f_path.dentry->d_inode; | 212 | struct inode *inode = file->f_path.dentry->d_inode; |
185 | int status; | 213 | int status; |
186 | 214 | ||
@@ -189,16 +217,11 @@ nfs_file_flush(struct file *file, fl_owner_t id) | |||
189 | if ((file->f_mode & FMODE_WRITE) == 0) | 217 | if ((file->f_mode & FMODE_WRITE) == 0) |
190 | return 0; | 218 | return 0; |
191 | nfs_inc_stats(inode, NFSIOS_VFSFLUSH); | 219 | nfs_inc_stats(inode, NFSIOS_VFSFLUSH); |
192 | lock_kernel(); | 220 | |
193 | /* Ensure that data+attribute caches are up to date after close() */ | 221 | /* Ensure that data+attribute caches are up to date after close() */ |
194 | status = nfs_wb_all(inode); | 222 | status = nfs_do_fsync(ctx, inode); |
195 | if (!status) { | 223 | if (!status) |
196 | status = ctx->error; | 224 | nfs_revalidate_inode(NFS_SERVER(inode), inode); |
197 | ctx->error = 0; | ||
198 | if (!status) | ||
199 | nfs_revalidate_inode(NFS_SERVER(inode), inode); | ||
200 | } | ||
201 | unlock_kernel(); | ||
202 | return status; | 225 | return status; |
203 | } | 226 | } |
204 | 227 | ||
@@ -257,8 +280,11 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
257 | dentry->d_parent->d_name.name, dentry->d_name.name); | 280 | dentry->d_parent->d_name.name, dentry->d_name.name); |
258 | 281 | ||
259 | status = nfs_revalidate_mapping(inode, file->f_mapping); | 282 | status = nfs_revalidate_mapping(inode, file->f_mapping); |
260 | if (!status) | 283 | if (!status) { |
261 | status = generic_file_mmap(file, vma); | 284 | vma->vm_ops = &nfs_file_vm_ops; |
285 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
286 | file_accessed(file); | ||
287 | } | ||
262 | return status; | 288 | return status; |
263 | } | 289 | } |
264 | 290 | ||
@@ -270,21 +296,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
270 | static int | 296 | static int |
271 | nfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 297 | nfs_fsync(struct file *file, struct dentry *dentry, int datasync) |
272 | { | 298 | { |
273 | struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; | 299 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
274 | struct inode *inode = dentry->d_inode; | 300 | struct inode *inode = dentry->d_inode; |
275 | int status; | ||
276 | 301 | ||
277 | dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); | 302 | dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); |
278 | 303 | ||
279 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); | 304 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); |
280 | lock_kernel(); | 305 | return nfs_do_fsync(ctx, inode); |
281 | status = nfs_wb_all(inode); | ||
282 | if (!status) { | ||
283 | status = ctx->error; | ||
284 | ctx->error = 0; | ||
285 | } | ||
286 | unlock_kernel(); | ||
287 | return status; | ||
288 | } | 306 | } |
289 | 307 | ||
290 | /* | 308 | /* |
@@ -333,7 +351,7 @@ static int nfs_launder_page(struct page *page) | |||
333 | const struct address_space_operations nfs_file_aops = { | 351 | const struct address_space_operations nfs_file_aops = { |
334 | .readpage = nfs_readpage, | 352 | .readpage = nfs_readpage, |
335 | .readpages = nfs_readpages, | 353 | .readpages = nfs_readpages, |
336 | .set_page_dirty = nfs_set_page_dirty, | 354 | .set_page_dirty = __set_page_dirty_nobuffers, |
337 | .writepage = nfs_writepage, | 355 | .writepage = nfs_writepage, |
338 | .writepages = nfs_writepages, | 356 | .writepages = nfs_writepages, |
339 | .prepare_write = nfs_prepare_write, | 357 | .prepare_write = nfs_prepare_write, |
@@ -346,6 +364,43 @@ const struct address_space_operations nfs_file_aops = { | |||
346 | .launder_page = nfs_launder_page, | 364 | .launder_page = nfs_launder_page, |
347 | }; | 365 | }; |
348 | 366 | ||
367 | static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
368 | { | ||
369 | struct file *filp = vma->vm_file; | ||
370 | unsigned pagelen; | ||
371 | int ret = -EINVAL; | ||
372 | |||
373 | lock_page(page); | ||
374 | if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) | ||
375 | goto out_unlock; | ||
376 | pagelen = nfs_page_length(page); | ||
377 | if (pagelen == 0) | ||
378 | goto out_unlock; | ||
379 | ret = nfs_prepare_write(filp, page, 0, pagelen); | ||
380 | if (!ret) | ||
381 | ret = nfs_commit_write(filp, page, 0, pagelen); | ||
382 | out_unlock: | ||
383 | unlock_page(page); | ||
384 | return ret; | ||
385 | } | ||
386 | |||
387 | static struct vm_operations_struct nfs_file_vm_ops = { | ||
388 | .fault = filemap_fault, | ||
389 | .page_mkwrite = nfs_vm_page_mkwrite, | ||
390 | }; | ||
391 | |||
392 | static int nfs_need_sync_write(struct file *filp, struct inode *inode) | ||
393 | { | ||
394 | struct nfs_open_context *ctx; | ||
395 | |||
396 | if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) | ||
397 | return 1; | ||
398 | ctx = nfs_file_open_context(filp); | ||
399 | if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) | ||
400 | return 1; | ||
401 | return 0; | ||
402 | } | ||
403 | |||
349 | static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, | 404 | static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, |
350 | unsigned long nr_segs, loff_t pos) | 405 | unsigned long nr_segs, loff_t pos) |
351 | { | 406 | { |
@@ -382,8 +437,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
382 | nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); | 437 | nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); |
383 | result = generic_file_aio_write(iocb, iov, nr_segs, pos); | 438 | result = generic_file_aio_write(iocb, iov, nr_segs, pos); |
384 | /* Return error values for O_SYNC and IS_SYNC() */ | 439 | /* Return error values for O_SYNC and IS_SYNC() */ |
385 | if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) { | 440 | if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { |
386 | int err = nfs_fsync(iocb->ki_filp, dentry, 1); | 441 | int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); |
387 | if (err < 0) | 442 | if (err < 0) |
388 | result = err; | 443 | result = err; |
389 | } | 444 | } |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 71a49c3acabd..035c769b715e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -49,6 +49,11 @@ | |||
49 | 49 | ||
50 | #define NFSDBG_FACILITY NFSDBG_VFS | 50 | #define NFSDBG_FACILITY NFSDBG_VFS |
51 | 51 | ||
52 | #define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 | ||
53 | |||
54 | /* Default is to see 64-bit inode numbers */ | ||
55 | static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; | ||
56 | |||
52 | static void nfs_invalidate_inode(struct inode *); | 57 | static void nfs_invalidate_inode(struct inode *); |
53 | static int nfs_update_inode(struct inode *, struct nfs_fattr *); | 58 | static int nfs_update_inode(struct inode *, struct nfs_fattr *); |
54 | 59 | ||
@@ -62,6 +67,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) | |||
62 | return nfs_fileid_to_ino_t(fattr->fileid); | 67 | return nfs_fileid_to_ino_t(fattr->fileid); |
63 | } | 68 | } |
64 | 69 | ||
70 | /** | ||
71 | * nfs_compat_user_ino64 - returns the user-visible inode number | ||
72 | * @fileid: 64-bit fileid | ||
73 | * | ||
74 | * This function returns a 32-bit inode number if the boot parameter | ||
75 | * nfs.enable_ino64 is zero. | ||
76 | */ | ||
77 | u64 nfs_compat_user_ino64(u64 fileid) | ||
78 | { | ||
79 | int ino; | ||
80 | |||
81 | if (enable_ino64) | ||
82 | return fileid; | ||
83 | ino = fileid; | ||
84 | if (sizeof(ino) < sizeof(fileid)) | ||
85 | ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8; | ||
86 | return ino; | ||
87 | } | ||
88 | |||
65 | int nfs_write_inode(struct inode *inode, int sync) | 89 | int nfs_write_inode(struct inode *inode, int sync) |
66 | { | 90 | { |
67 | int ret; | 91 | int ret; |
@@ -85,7 +109,6 @@ void nfs_clear_inode(struct inode *inode) | |||
85 | */ | 109 | */ |
86 | BUG_ON(nfs_have_writebacks(inode)); | 110 | BUG_ON(nfs_have_writebacks(inode)); |
87 | BUG_ON(!list_empty(&NFS_I(inode)->open_files)); | 111 | BUG_ON(!list_empty(&NFS_I(inode)->open_files)); |
88 | BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0); | ||
89 | nfs_zap_acl_cache(inode); | 112 | nfs_zap_acl_cache(inode); |
90 | nfs_access_zap_cache(inode); | 113 | nfs_access_zap_cache(inode); |
91 | } | 114 | } |
@@ -118,8 +141,8 @@ static void nfs_zap_caches_locked(struct inode *inode) | |||
118 | 141 | ||
119 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); | 142 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); |
120 | 143 | ||
121 | NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); | 144 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
122 | NFS_ATTRTIMEO_UPDATE(inode) = jiffies; | 145 | nfsi->attrtimeo_timestamp = jiffies; |
123 | 146 | ||
124 | memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); | 147 | memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); |
125 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) | 148 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) |
@@ -156,6 +179,13 @@ static void nfs_zap_acl_cache(struct inode *inode) | |||
156 | spin_unlock(&inode->i_lock); | 179 | spin_unlock(&inode->i_lock); |
157 | } | 180 | } |
158 | 181 | ||
182 | void nfs_invalidate_atime(struct inode *inode) | ||
183 | { | ||
184 | spin_lock(&inode->i_lock); | ||
185 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; | ||
186 | spin_unlock(&inode->i_lock); | ||
187 | } | ||
188 | |||
159 | /* | 189 | /* |
160 | * Invalidate, but do not unhash, the inode. | 190 | * Invalidate, but do not unhash, the inode. |
161 | * NB: must be called with inode->i_lock held! | 191 | * NB: must be called with inode->i_lock held! |
@@ -338,7 +368,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
338 | return 0; | 368 | return 0; |
339 | 369 | ||
340 | lock_kernel(); | 370 | lock_kernel(); |
341 | nfs_begin_data_update(inode); | ||
342 | /* Write all dirty data */ | 371 | /* Write all dirty data */ |
343 | if (S_ISREG(inode->i_mode)) { | 372 | if (S_ISREG(inode->i_mode)) { |
344 | filemap_write_and_wait(inode->i_mapping); | 373 | filemap_write_and_wait(inode->i_mapping); |
@@ -352,7 +381,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
352 | error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); | 381 | error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); |
353 | if (error == 0) | 382 | if (error == 0) |
354 | nfs_refresh_inode(inode, &fattr); | 383 | nfs_refresh_inode(inode, &fattr); |
355 | nfs_end_data_update(inode); | ||
356 | unlock_kernel(); | 384 | unlock_kernel(); |
357 | return error; | 385 | return error; |
358 | } | 386 | } |
@@ -431,7 +459,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
431 | 459 | ||
432 | /* Flush out writes to the server in order to update c/mtime */ | 460 | /* Flush out writes to the server in order to update c/mtime */ |
433 | if (S_ISREG(inode->i_mode)) | 461 | if (S_ISREG(inode->i_mode)) |
434 | nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); | 462 | nfs_wb_nocommit(inode); |
435 | 463 | ||
436 | /* | 464 | /* |
437 | * We may force a getattr if the user cares about atime. | 465 | * We may force a getattr if the user cares about atime. |
@@ -450,8 +478,10 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
450 | err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); | 478 | err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); |
451 | else | 479 | else |
452 | err = nfs_revalidate_inode(NFS_SERVER(inode), inode); | 480 | err = nfs_revalidate_inode(NFS_SERVER(inode), inode); |
453 | if (!err) | 481 | if (!err) { |
454 | generic_fillattr(inode, stat); | 482 | generic_fillattr(inode, stat); |
483 | stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); | ||
484 | } | ||
455 | return err; | 485 | return err; |
456 | } | 486 | } |
457 | 487 | ||
@@ -536,7 +566,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c | |||
536 | static void nfs_file_clear_open_context(struct file *filp) | 566 | static void nfs_file_clear_open_context(struct file *filp) |
537 | { | 567 | { |
538 | struct inode *inode = filp->f_path.dentry->d_inode; | 568 | struct inode *inode = filp->f_path.dentry->d_inode; |
539 | struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; | 569 | struct nfs_open_context *ctx = nfs_file_open_context(filp); |
540 | 570 | ||
541 | if (ctx) { | 571 | if (ctx) { |
542 | filp->private_data = NULL; | 572 | filp->private_data = NULL; |
@@ -598,16 +628,10 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
598 | status = nfs_wait_on_inode(inode); | 628 | status = nfs_wait_on_inode(inode); |
599 | if (status < 0) | 629 | if (status < 0) |
600 | goto out; | 630 | goto out; |
601 | if (NFS_STALE(inode)) { | 631 | |
602 | status = -ESTALE; | 632 | status = -ESTALE; |
603 | /* Do we trust the cached ESTALE? */ | 633 | if (NFS_STALE(inode)) |
604 | if (NFS_ATTRTIMEO(inode) != 0) { | 634 | goto out; |
605 | if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) { | ||
606 | /* no */ | ||
607 | } else | ||
608 | goto out; | ||
609 | } | ||
610 | } | ||
611 | 635 | ||
612 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); | 636 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); |
613 | if (status != 0) { | 637 | if (status != 0) { |
@@ -654,7 +678,7 @@ int nfs_attribute_timeout(struct inode *inode) | |||
654 | 678 | ||
655 | if (nfs_have_delegation(inode, FMODE_READ)) | 679 | if (nfs_have_delegation(inode, FMODE_READ)) |
656 | return 0; | 680 | return 0; |
657 | return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); | 681 | return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); |
658 | } | 682 | } |
659 | 683 | ||
660 | /** | 684 | /** |
@@ -683,11 +707,8 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa | |||
683 | } | 707 | } |
684 | spin_lock(&inode->i_lock); | 708 | spin_lock(&inode->i_lock); |
685 | nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; | 709 | nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; |
686 | if (S_ISDIR(inode->i_mode)) { | 710 | if (S_ISDIR(inode->i_mode)) |
687 | memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); | 711 | memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); |
688 | /* This ensures we revalidate child dentries */ | ||
689 | nfsi->cache_change_attribute = jiffies; | ||
690 | } | ||
691 | spin_unlock(&inode->i_lock); | 712 | spin_unlock(&inode->i_lock); |
692 | nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); | 713 | nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); |
693 | dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", | 714 | dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", |
@@ -756,56 +777,27 @@ out: | |||
756 | return ret; | 777 | return ret; |
757 | } | 778 | } |
758 | 779 | ||
759 | /** | ||
760 | * nfs_begin_data_update | ||
761 | * @inode - pointer to inode | ||
762 | * Declare that a set of operations will update file data on the server | ||
763 | */ | ||
764 | void nfs_begin_data_update(struct inode *inode) | ||
765 | { | ||
766 | atomic_inc(&NFS_I(inode)->data_updates); | ||
767 | } | ||
768 | |||
769 | /** | ||
770 | * nfs_end_data_update | ||
771 | * @inode - pointer to inode | ||
772 | * Declare end of the operations that will update file data | ||
773 | * This will mark the inode as immediately needing revalidation | ||
774 | * of its attribute cache. | ||
775 | */ | ||
776 | void nfs_end_data_update(struct inode *inode) | ||
777 | { | ||
778 | struct nfs_inode *nfsi = NFS_I(inode); | ||
779 | |||
780 | /* Directories: invalidate page cache */ | ||
781 | if (S_ISDIR(inode->i_mode)) { | ||
782 | spin_lock(&inode->i_lock); | ||
783 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
784 | spin_unlock(&inode->i_lock); | ||
785 | } | ||
786 | nfsi->cache_change_attribute = jiffies; | ||
787 | atomic_dec(&nfsi->data_updates); | ||
788 | } | ||
789 | |||
790 | static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) | 780 | static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) |
791 | { | 781 | { |
792 | struct nfs_inode *nfsi = NFS_I(inode); | 782 | struct nfs_inode *nfsi = NFS_I(inode); |
793 | unsigned long now = jiffies; | ||
794 | 783 | ||
784 | if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 && | ||
785 | nfsi->change_attr == fattr->pre_change_attr) { | ||
786 | nfsi->change_attr = fattr->change_attr; | ||
787 | if (S_ISDIR(inode->i_mode)) | ||
788 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
789 | } | ||
795 | /* If we have atomic WCC data, we may update some attributes */ | 790 | /* If we have atomic WCC data, we may update some attributes */ |
796 | if ((fattr->valid & NFS_ATTR_WCC) != 0) { | 791 | if ((fattr->valid & NFS_ATTR_WCC) != 0) { |
797 | if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { | 792 | if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) |
798 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); | 793 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); |
799 | nfsi->cache_change_attribute = now; | ||
800 | } | ||
801 | if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { | 794 | if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { |
802 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); | 795 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); |
803 | nfsi->cache_change_attribute = now; | 796 | if (S_ISDIR(inode->i_mode)) |
797 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
804 | } | 798 | } |
805 | if (inode->i_size == fattr->pre_size && nfsi->npages == 0) { | 799 | if (inode->i_size == fattr->pre_size && nfsi->npages == 0) |
806 | inode->i_size = fattr->size; | 800 | inode->i_size = fattr->size; |
807 | nfsi->cache_change_attribute = now; | ||
808 | } | ||
809 | } | 801 | } |
810 | } | 802 | } |
811 | 803 | ||
@@ -822,7 +814,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
822 | { | 814 | { |
823 | struct nfs_inode *nfsi = NFS_I(inode); | 815 | struct nfs_inode *nfsi = NFS_I(inode); |
824 | loff_t cur_size, new_isize; | 816 | loff_t cur_size, new_isize; |
825 | int data_unstable; | 817 | unsigned long invalid = 0; |
826 | 818 | ||
827 | 819 | ||
828 | /* Has the inode gone and changed behind our back? */ | 820 | /* Has the inode gone and changed behind our back? */ |
@@ -831,37 +823,41 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
831 | return -EIO; | 823 | return -EIO; |
832 | } | 824 | } |
833 | 825 | ||
834 | /* Are we in the process of updating data on the server? */ | ||
835 | data_unstable = nfs_caches_unstable(inode); | ||
836 | |||
837 | /* Do atomic weak cache consistency updates */ | 826 | /* Do atomic weak cache consistency updates */ |
838 | nfs_wcc_update_inode(inode, fattr); | 827 | nfs_wcc_update_inode(inode, fattr); |
839 | 828 | ||
840 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | 829 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && |
841 | nfsi->change_attr != fattr->change_attr) | 830 | nfsi->change_attr != fattr->change_attr) |
842 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 831 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
843 | 832 | ||
844 | /* Verify a few of the more important attributes */ | 833 | /* Verify a few of the more important attributes */ |
845 | if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) | 834 | if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) |
846 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 835 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
847 | 836 | ||
848 | cur_size = i_size_read(inode); | 837 | cur_size = i_size_read(inode); |
849 | new_isize = nfs_size_to_loff_t(fattr->size); | 838 | new_isize = nfs_size_to_loff_t(fattr->size); |
850 | if (cur_size != new_isize && nfsi->npages == 0) | 839 | if (cur_size != new_isize && nfsi->npages == 0) |
851 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 840 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
852 | 841 | ||
853 | /* Have any file permissions changed? */ | 842 | /* Have any file permissions changed? */ |
854 | if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) | 843 | if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) |
855 | || inode->i_uid != fattr->uid | 844 | || inode->i_uid != fattr->uid |
856 | || inode->i_gid != fattr->gid) | 845 | || inode->i_gid != fattr->gid) |
857 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; | 846 | invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; |
858 | 847 | ||
859 | /* Has the link count changed? */ | 848 | /* Has the link count changed? */ |
860 | if (inode->i_nlink != fattr->nlink) | 849 | if (inode->i_nlink != fattr->nlink) |
861 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 850 | invalid |= NFS_INO_INVALID_ATTR; |
862 | 851 | ||
863 | if (!timespec_equal(&inode->i_atime, &fattr->atime)) | 852 | if (!timespec_equal(&inode->i_atime, &fattr->atime)) |
864 | nfsi->cache_validity |= NFS_INO_INVALID_ATIME; | 853 | invalid |= NFS_INO_INVALID_ATIME; |
854 | |||
855 | if (invalid != 0) | ||
856 | nfsi->cache_validity |= invalid; | ||
857 | else | ||
858 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | ||
859 | | NFS_INO_INVALID_ATIME | ||
860 | | NFS_INO_REVAL_PAGECACHE); | ||
865 | 861 | ||
866 | nfsi->read_cache_jiffies = fattr->time_start; | 862 | nfsi->read_cache_jiffies = fattr->time_start; |
867 | return 0; | 863 | return 0; |
@@ -911,17 +907,41 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
911 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | 907 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) |
912 | { | 908 | { |
913 | struct nfs_inode *nfsi = NFS_I(inode); | 909 | struct nfs_inode *nfsi = NFS_I(inode); |
914 | int status = 0; | ||
915 | 910 | ||
916 | spin_lock(&inode->i_lock); | 911 | spin_lock(&inode->i_lock); |
917 | if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { | 912 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
918 | nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 913 | if (S_ISDIR(inode->i_mode)) |
919 | goto out; | 914 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; |
920 | } | ||
921 | status = nfs_update_inode(inode, fattr); | ||
922 | out: | ||
923 | spin_unlock(&inode->i_lock); | 915 | spin_unlock(&inode->i_lock); |
924 | return status; | 916 | return nfs_refresh_inode(inode, fattr); |
917 | } | ||
918 | |||
919 | /** | ||
920 | * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache | ||
921 | * @inode - pointer to inode | ||
922 | * @fattr - updated attributes | ||
923 | * | ||
924 | * After an operation that has changed the inode metadata, mark the | ||
925 | * attribute cache as being invalid, then try to update it. Fake up | ||
926 | * weak cache consistency data, if none exist. | ||
927 | * | ||
928 | * This function is mainly designed to be used by the ->write_done() functions. | ||
929 | */ | ||
930 | int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) | ||
931 | { | ||
932 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | ||
933 | (fattr->valid & NFS_ATTR_WCC_V4) == 0) { | ||
934 | fattr->pre_change_attr = NFS_I(inode)->change_attr; | ||
935 | fattr->valid |= NFS_ATTR_WCC_V4; | ||
936 | } | ||
937 | if ((fattr->valid & NFS_ATTR_FATTR) != 0 && | ||
938 | (fattr->valid & NFS_ATTR_WCC) == 0) { | ||
939 | memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); | ||
940 | memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); | ||
941 | fattr->pre_size = inode->i_size; | ||
942 | fattr->valid |= NFS_ATTR_WCC; | ||
943 | } | ||
944 | return nfs_post_op_update_inode(inode, fattr); | ||
925 | } | 945 | } |
926 | 946 | ||
927 | /* | 947 | /* |
@@ -941,9 +961,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
941 | struct nfs_server *server; | 961 | struct nfs_server *server; |
942 | struct nfs_inode *nfsi = NFS_I(inode); | 962 | struct nfs_inode *nfsi = NFS_I(inode); |
943 | loff_t cur_isize, new_isize; | 963 | loff_t cur_isize, new_isize; |
944 | unsigned int invalid = 0; | 964 | unsigned long invalid = 0; |
945 | unsigned long now = jiffies; | 965 | unsigned long now = jiffies; |
946 | int data_stable; | ||
947 | 966 | ||
948 | dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", | 967 | dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", |
949 | __FUNCTION__, inode->i_sb->s_id, inode->i_ino, | 968 | __FUNCTION__, inode->i_sb->s_id, inode->i_ino, |
@@ -968,57 +987,51 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
968 | * Update the read time so we don't revalidate too often. | 987 | * Update the read time so we don't revalidate too often. |
969 | */ | 988 | */ |
970 | nfsi->read_cache_jiffies = fattr->time_start; | 989 | nfsi->read_cache_jiffies = fattr->time_start; |
971 | nfsi->last_updated = now; | ||
972 | 990 | ||
973 | /* Fix a wraparound issue with nfsi->cache_change_attribute */ | 991 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME |
974 | if (time_before(now, nfsi->cache_change_attribute)) | 992 | | NFS_INO_REVAL_PAGECACHE); |
975 | nfsi->cache_change_attribute = now - 600*HZ; | ||
976 | |||
977 | /* Are we racing with known updates of the metadata on the server? */ | ||
978 | data_stable = nfs_verify_change_attribute(inode, fattr->time_start); | ||
979 | if (data_stable) | ||
980 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME); | ||
981 | 993 | ||
982 | /* Do atomic weak cache consistency updates */ | 994 | /* Do atomic weak cache consistency updates */ |
983 | nfs_wcc_update_inode(inode, fattr); | 995 | nfs_wcc_update_inode(inode, fattr); |
984 | 996 | ||
997 | /* More cache consistency checks */ | ||
998 | if (!(fattr->valid & NFS_ATTR_FATTR_V4)) { | ||
999 | /* NFSv2/v3: Check if the mtime agrees */ | ||
1000 | if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { | ||
1001 | dprintk("NFS: mtime change on server for file %s/%ld\n", | ||
1002 | inode->i_sb->s_id, inode->i_ino); | ||
1003 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | ||
1004 | nfsi->cache_change_attribute = now; | ||
1005 | } | ||
1006 | /* If ctime has changed we should definitely clear access+acl caches */ | ||
1007 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) | ||
1008 | invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | ||
1009 | } else if (nfsi->change_attr != fattr->change_attr) { | ||
1010 | dprintk("NFS: change_attr change on server for file %s/%ld\n", | ||
1011 | inode->i_sb->s_id, inode->i_ino); | ||
1012 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | ||
1013 | nfsi->cache_change_attribute = now; | ||
1014 | } | ||
1015 | |||
985 | /* Check if our cached file size is stale */ | 1016 | /* Check if our cached file size is stale */ |
986 | new_isize = nfs_size_to_loff_t(fattr->size); | 1017 | new_isize = nfs_size_to_loff_t(fattr->size); |
987 | cur_isize = i_size_read(inode); | 1018 | cur_isize = i_size_read(inode); |
988 | if (new_isize != cur_isize) { | 1019 | if (new_isize != cur_isize) { |
989 | /* Do we perhaps have any outstanding writes? */ | 1020 | /* Do we perhaps have any outstanding writes, or has |
990 | if (nfsi->npages == 0) { | 1021 | * the file grown beyond our last write? */ |
991 | /* No, but did we race with nfs_end_data_update()? */ | 1022 | if (nfsi->npages == 0 || new_isize > cur_isize) { |
992 | if (data_stable) { | ||
993 | inode->i_size = new_isize; | ||
994 | invalid |= NFS_INO_INVALID_DATA; | ||
995 | } | ||
996 | invalid |= NFS_INO_INVALID_ATTR; | ||
997 | } else if (new_isize > cur_isize) { | ||
998 | inode->i_size = new_isize; | 1023 | inode->i_size = new_isize; |
999 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | 1024 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; |
1000 | } | 1025 | } |
1001 | nfsi->cache_change_attribute = now; | ||
1002 | dprintk("NFS: isize change on server for file %s/%ld\n", | 1026 | dprintk("NFS: isize change on server for file %s/%ld\n", |
1003 | inode->i_sb->s_id, inode->i_ino); | 1027 | inode->i_sb->s_id, inode->i_ino); |
1004 | } | 1028 | } |
1005 | 1029 | ||
1006 | /* Check if the mtime agrees */ | ||
1007 | if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { | ||
1008 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); | ||
1009 | dprintk("NFS: mtime change on server for file %s/%ld\n", | ||
1010 | inode->i_sb->s_id, inode->i_ino); | ||
1011 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | ||
1012 | nfsi->cache_change_attribute = now; | ||
1013 | } | ||
1014 | 1030 | ||
1015 | /* If ctime has changed we should definitely clear access+acl caches */ | 1031 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); |
1016 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { | 1032 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); |
1017 | invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | ||
1018 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); | ||
1019 | nfsi->cache_change_attribute = now; | ||
1020 | } | ||
1021 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); | 1033 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); |
1034 | nfsi->change_attr = fattr->change_attr; | ||
1022 | 1035 | ||
1023 | if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || | 1036 | if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || |
1024 | inode->i_uid != fattr->uid || | 1037 | inode->i_uid != fattr->uid || |
@@ -1039,31 +1052,29 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1039 | inode->i_blocks = fattr->du.nfs2.blocks; | 1052 | inode->i_blocks = fattr->du.nfs2.blocks; |
1040 | } | 1053 | } |
1041 | 1054 | ||
1042 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | ||
1043 | nfsi->change_attr != fattr->change_attr) { | ||
1044 | dprintk("NFS: change_attr change on server for file %s/%ld\n", | ||
1045 | inode->i_sb->s_id, inode->i_ino); | ||
1046 | nfsi->change_attr = fattr->change_attr; | ||
1047 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | ||
1048 | nfsi->cache_change_attribute = now; | ||
1049 | } | ||
1050 | |||
1051 | /* Update attrtimeo value if we're out of the unstable period */ | 1055 | /* Update attrtimeo value if we're out of the unstable period */ |
1052 | if (invalid & NFS_INO_INVALID_ATTR) { | 1056 | if (invalid & NFS_INO_INVALID_ATTR) { |
1053 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); | 1057 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); |
1054 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 1058 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
1055 | nfsi->attrtimeo_timestamp = now; | 1059 | nfsi->attrtimeo_timestamp = now; |
1056 | } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { | 1060 | nfsi->last_updated = now; |
1057 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) | 1061 | } else { |
1058 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); | 1062 | if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { |
1059 | nfsi->attrtimeo_timestamp = now; | 1063 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) |
1064 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); | ||
1065 | nfsi->attrtimeo_timestamp = now; | ||
1066 | } | ||
1067 | /* | ||
1068 | * Avoid jiffy wraparound issues with nfsi->last_updated | ||
1069 | */ | ||
1070 | if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now)) | ||
1071 | nfsi->last_updated = nfsi->read_cache_jiffies; | ||
1060 | } | 1072 | } |
1073 | invalid &= ~NFS_INO_INVALID_ATTR; | ||
1061 | /* Don't invalidate the data if we were to blame */ | 1074 | /* Don't invalidate the data if we were to blame */ |
1062 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | 1075 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) |
1063 | || S_ISLNK(inode->i_mode))) | 1076 | || S_ISLNK(inode->i_mode))) |
1064 | invalid &= ~NFS_INO_INVALID_DATA; | 1077 | invalid &= ~NFS_INO_INVALID_DATA; |
1065 | if (data_stable) | ||
1066 | invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); | ||
1067 | if (!nfs_have_delegation(inode, FMODE_READ) || | 1078 | if (!nfs_have_delegation(inode, FMODE_READ) || |
1068 | (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) | 1079 | (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) |
1069 | nfsi->cache_validity |= invalid; | 1080 | nfsi->cache_validity |= invalid; |
@@ -1152,7 +1163,6 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag | |||
1152 | INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); | 1163 | INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); |
1153 | INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); | 1164 | INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); |
1154 | INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); | 1165 | INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); |
1155 | atomic_set(&nfsi->data_updates, 0); | ||
1156 | nfsi->ncommit = 0; | 1166 | nfsi->ncommit = 0; |
1157 | nfsi->npages = 0; | 1167 | nfsi->npages = 0; |
1158 | nfs4_init_once(nfsi); | 1168 | nfs4_init_once(nfsi); |
@@ -1249,6 +1259,7 @@ static void __exit exit_nfs_fs(void) | |||
1249 | /* Not quite true; I just maintain it */ | 1259 | /* Not quite true; I just maintain it */ |
1250 | MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); | 1260 | MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); |
1251 | MODULE_LICENSE("GPL"); | 1261 | MODULE_LICENSE("GPL"); |
1262 | module_param(enable_ino64, bool, 0644); | ||
1252 | 1263 | ||
1253 | module_init(init_nfs_fs) | 1264 | module_init(init_nfs_fs) |
1254 | module_exit(exit_nfs_fs) | 1265 | module_exit(exit_nfs_fs) |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 76cf55d57101..f3acf48412be 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -5,8 +5,6 @@ | |||
5 | #include <linux/mount.h> | 5 | #include <linux/mount.h> |
6 | 6 | ||
7 | struct nfs_string; | 7 | struct nfs_string; |
8 | struct nfs_mount_data; | ||
9 | struct nfs4_mount_data; | ||
10 | 8 | ||
11 | /* Maximum number of readahead requests | 9 | /* Maximum number of readahead requests |
12 | * FIXME: this should really be a sysctl so that users may tune it to suit | 10 | * FIXME: this should really be a sysctl so that users may tune it to suit |
@@ -27,20 +25,50 @@ struct nfs_clone_mount { | |||
27 | rpc_authflavor_t authflavor; | 25 | rpc_authflavor_t authflavor; |
28 | }; | 26 | }; |
29 | 27 | ||
28 | /* | ||
29 | * In-kernel mount arguments | ||
30 | */ | ||
31 | struct nfs_parsed_mount_data { | ||
32 | int flags; | ||
33 | int rsize, wsize; | ||
34 | int timeo, retrans; | ||
35 | int acregmin, acregmax, | ||
36 | acdirmin, acdirmax; | ||
37 | int namlen; | ||
38 | unsigned int bsize; | ||
39 | unsigned int auth_flavor_len; | ||
40 | rpc_authflavor_t auth_flavors[1]; | ||
41 | char *client_address; | ||
42 | |||
43 | struct { | ||
44 | struct sockaddr_in address; | ||
45 | char *hostname; | ||
46 | unsigned int program; | ||
47 | unsigned int version; | ||
48 | unsigned short port; | ||
49 | int protocol; | ||
50 | } mount_server; | ||
51 | |||
52 | struct { | ||
53 | struct sockaddr_in address; | ||
54 | char *hostname; | ||
55 | char *export_path; | ||
56 | unsigned int program; | ||
57 | int protocol; | ||
58 | } nfs_server; | ||
59 | }; | ||
60 | |||
30 | /* client.c */ | 61 | /* client.c */ |
31 | extern struct rpc_program nfs_program; | 62 | extern struct rpc_program nfs_program; |
32 | 63 | ||
33 | extern void nfs_put_client(struct nfs_client *); | 64 | extern void nfs_put_client(struct nfs_client *); |
34 | extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); | 65 | extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); |
35 | extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *, | 66 | extern struct nfs_server *nfs_create_server( |
36 | struct nfs_fh *); | 67 | const struct nfs_parsed_mount_data *, |
37 | extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *, | 68 | struct nfs_fh *); |
38 | const char *, | 69 | extern struct nfs_server *nfs4_create_server( |
39 | const struct sockaddr_in *, | 70 | const struct nfs_parsed_mount_data *, |
40 | const char *, | 71 | struct nfs_fh *); |
41 | const char *, | ||
42 | rpc_authflavor_t, | ||
43 | struct nfs_fh *); | ||
44 | extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, | 72 | extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, |
45 | struct nfs_fh *); | 73 | struct nfs_fh *); |
46 | extern void nfs_free_server(struct nfs_server *server); | 74 | extern void nfs_free_server(struct nfs_server *server); |
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index c5fce7567200..668ab96c7b59 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
@@ -251,6 +251,7 @@ nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) | |||
251 | replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; | 251 | replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; |
252 | xdr_inline_pages(&req->rq_rcv_buf, replen, | 252 | xdr_inline_pages(&req->rq_rcv_buf, replen, |
253 | args->pages, args->pgbase, count); | 253 | args->pages, args->pgbase, count); |
254 | req->rq_rcv_buf.flags |= XDRBUF_READ; | ||
254 | return 0; | 255 | return 0; |
255 | } | 256 | } |
256 | 257 | ||
@@ -271,7 +272,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | |||
271 | res->eof = 0; | 272 | res->eof = 0; |
272 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 273 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
273 | if (iov->iov_len < hdrlen) { | 274 | if (iov->iov_len < hdrlen) { |
274 | printk(KERN_WARNING "NFS: READ reply header overflowed:" | 275 | dprintk("NFS: READ reply header overflowed:" |
275 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 276 | "length %d > %Zu\n", hdrlen, iov->iov_len); |
276 | return -errno_NFSERR_IO; | 277 | return -errno_NFSERR_IO; |
277 | } else if (iov->iov_len != hdrlen) { | 278 | } else if (iov->iov_len != hdrlen) { |
@@ -281,7 +282,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | |||
281 | 282 | ||
282 | recvd = req->rq_rcv_buf.len - hdrlen; | 283 | recvd = req->rq_rcv_buf.len - hdrlen; |
283 | if (count > recvd) { | 284 | if (count > recvd) { |
284 | printk(KERN_WARNING "NFS: server cheating in read reply: " | 285 | dprintk("NFS: server cheating in read reply: " |
285 | "count %d > recvd %d\n", count, recvd); | 286 | "count %d > recvd %d\n", count, recvd); |
286 | count = recvd; | 287 | count = recvd; |
287 | } | 288 | } |
@@ -313,6 +314,7 @@ nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) | |||
313 | 314 | ||
314 | /* Copy the page array */ | 315 | /* Copy the page array */ |
315 | xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); | 316 | xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); |
317 | sndbuf->flags |= XDRBUF_WRITE; | ||
316 | return 0; | 318 | return 0; |
317 | } | 319 | } |
318 | 320 | ||
@@ -431,7 +433,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
431 | 433 | ||
432 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 434 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
433 | if (iov->iov_len < hdrlen) { | 435 | if (iov->iov_len < hdrlen) { |
434 | printk(KERN_WARNING "NFS: READDIR reply header overflowed:" | 436 | dprintk("NFS: READDIR reply header overflowed:" |
435 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 437 | "length %d > %Zu\n", hdrlen, iov->iov_len); |
436 | return -errno_NFSERR_IO; | 438 | return -errno_NFSERR_IO; |
437 | } else if (iov->iov_len != hdrlen) { | 439 | } else if (iov->iov_len != hdrlen) { |
@@ -454,7 +456,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
454 | len = ntohl(*p++); | 456 | len = ntohl(*p++); |
455 | p += XDR_QUADLEN(len) + 1; /* name plus cookie */ | 457 | p += XDR_QUADLEN(len) + 1; /* name plus cookie */ |
456 | if (len > NFS2_MAXNAMLEN) { | 458 | if (len > NFS2_MAXNAMLEN) { |
457 | printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n", | 459 | dprintk("NFS: giant filename in readdir (len 0x%x)!\n", |
458 | len); | 460 | len); |
459 | goto err_unmap; | 461 | goto err_unmap; |
460 | } | 462 | } |
@@ -471,7 +473,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
471 | entry[0] = entry[1] = 0; | 473 | entry[0] = entry[1] = 0; |
472 | /* truncate listing ? */ | 474 | /* truncate listing ? */ |
473 | if (!nr) { | 475 | if (!nr) { |
474 | printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); | 476 | dprintk("NFS: readdir reply truncated!\n"); |
475 | entry[1] = 1; | 477 | entry[1] = 1; |
476 | } | 478 | } |
477 | goto out; | 479 | goto out; |
@@ -583,12 +585,12 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
583 | /* Convert length of symlink */ | 585 | /* Convert length of symlink */ |
584 | len = ntohl(*p++); | 586 | len = ntohl(*p++); |
585 | if (len >= rcvbuf->page_len || len <= 0) { | 587 | if (len >= rcvbuf->page_len || len <= 0) { |
586 | dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); | 588 | dprintk("nfs: server returned giant symlink!\n"); |
587 | return -ENAMETOOLONG; | 589 | return -ENAMETOOLONG; |
588 | } | 590 | } |
589 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 591 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
590 | if (iov->iov_len < hdrlen) { | 592 | if (iov->iov_len < hdrlen) { |
591 | printk(KERN_WARNING "NFS: READLINK reply header overflowed:" | 593 | dprintk("NFS: READLINK reply header overflowed:" |
592 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 594 | "length %d > %Zu\n", hdrlen, iov->iov_len); |
593 | return -errno_NFSERR_IO; | 595 | return -errno_NFSERR_IO; |
594 | } else if (iov->iov_len != hdrlen) { | 596 | } else if (iov->iov_len != hdrlen) { |
@@ -597,7 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
597 | } | 599 | } |
598 | recvd = req->rq_rcv_buf.len - hdrlen; | 600 | recvd = req->rq_rcv_buf.len - hdrlen; |
599 | if (recvd < len) { | 601 | if (recvd < len) { |
600 | printk(KERN_WARNING "NFS: server cheating in readlink reply: " | 602 | dprintk("NFS: server cheating in readlink reply: " |
601 | "count %u > recvd %u\n", len, recvd); | 603 | "count %u > recvd %u\n", len, recvd); |
602 | return -EIO; | 604 | return -EIO; |
603 | } | 605 | } |
@@ -695,7 +697,7 @@ nfs_stat_to_errno(int stat) | |||
695 | if (nfs_errtbl[i].stat == stat) | 697 | if (nfs_errtbl[i].stat == stat) |
696 | return nfs_errtbl[i].errno; | 698 | return nfs_errtbl[i].errno; |
697 | } | 699 | } |
698 | printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat); | 700 | dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); |
699 | return nfs_errtbl[i].errno; | 701 | return nfs_errtbl[i].errno; |
700 | } | 702 | } |
701 | 703 | ||
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 7322da4d2055..9b7362565c0c 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -317,13 +317,11 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | |||
317 | } | 317 | } |
318 | 318 | ||
319 | dprintk("NFS call setacl\n"); | 319 | dprintk("NFS call setacl\n"); |
320 | nfs_begin_data_update(inode); | ||
321 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; | 320 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; |
322 | status = rpc_call_sync(server->client_acl, &msg, 0); | 321 | status = rpc_call_sync(server->client_acl, &msg, 0); |
323 | spin_lock(&inode->i_lock); | 322 | spin_lock(&inode->i_lock); |
324 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; | 323 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; |
325 | spin_unlock(&inode->i_lock); | 324 | spin_unlock(&inode->i_lock); |
326 | nfs_end_data_update(inode); | ||
327 | dprintk("NFS reply setacl: %d\n", status); | 325 | dprintk("NFS reply setacl: %d\n", status); |
328 | 326 | ||
329 | /* pages may have been allocated at the xdr layer. */ | 327 | /* pages may have been allocated at the xdr layer. */ |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c7ca5d70870b..4cdc2361a669 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -166,6 +166,7 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name, | |||
166 | nfs_fattr_init(&dir_attr); | 166 | nfs_fattr_init(&dir_attr); |
167 | nfs_fattr_init(fattr); | 167 | nfs_fattr_init(fattr); |
168 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 168 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
169 | nfs_refresh_inode(dir, &dir_attr); | ||
169 | if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { | 170 | if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { |
170 | msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; | 171 | msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; |
171 | msg.rpc_argp = fhandle; | 172 | msg.rpc_argp = fhandle; |
@@ -173,8 +174,6 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name, | |||
173 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 174 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
174 | } | 175 | } |
175 | dprintk("NFS reply lookup: %d\n", status); | 176 | dprintk("NFS reply lookup: %d\n", status); |
176 | if (status >= 0) | ||
177 | status = nfs_refresh_inode(dir, &dir_attr); | ||
178 | return status; | 177 | return status; |
179 | } | 178 | } |
180 | 179 | ||
@@ -607,6 +606,9 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
607 | 606 | ||
608 | nfs_fattr_init(&dir_attr); | 607 | nfs_fattr_init(&dir_attr); |
609 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 608 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
609 | |||
610 | nfs_invalidate_atime(dir); | ||
611 | |||
610 | nfs_refresh_inode(dir, &dir_attr); | 612 | nfs_refresh_inode(dir, &dir_attr); |
611 | dprintk("NFS reply readdir: %d\n", status); | 613 | dprintk("NFS reply readdir: %d\n", status); |
612 | return status; | 614 | return status; |
@@ -724,9 +726,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
724 | { | 726 | { |
725 | if (nfs3_async_handle_jukebox(task, data->inode)) | 727 | if (nfs3_async_handle_jukebox(task, data->inode)) |
726 | return -EAGAIN; | 728 | return -EAGAIN; |
727 | /* Call back common NFS readpage processing */ | 729 | |
728 | if (task->tk_status >= 0) | 730 | nfs_invalidate_atime(data->inode); |
729 | nfs_refresh_inode(data->inode, &data->fattr); | 731 | nfs_refresh_inode(data->inode, &data->fattr); |
730 | return 0; | 732 | return 0; |
731 | } | 733 | } |
732 | 734 | ||
@@ -747,7 +749,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
747 | if (nfs3_async_handle_jukebox(task, data->inode)) | 749 | if (nfs3_async_handle_jukebox(task, data->inode)) |
748 | return -EAGAIN; | 750 | return -EAGAIN; |
749 | if (task->tk_status >= 0) | 751 | if (task->tk_status >= 0) |
750 | nfs_post_op_update_inode(data->inode, data->res.fattr); | 752 | nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); |
751 | return 0; | 753 | return 0; |
752 | } | 754 | } |
753 | 755 | ||
@@ -775,8 +777,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) | |||
775 | { | 777 | { |
776 | if (nfs3_async_handle_jukebox(task, data->inode)) | 778 | if (nfs3_async_handle_jukebox(task, data->inode)) |
777 | return -EAGAIN; | 779 | return -EAGAIN; |
778 | if (task->tk_status >= 0) | 780 | nfs_refresh_inode(data->inode, data->res.fattr); |
779 | nfs_post_op_update_inode(data->inode, data->res.fattr); | ||
780 | return 0; | 781 | return 0; |
781 | } | 782 | } |
782 | 783 | ||
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index d9e08f0cf2a0..616d3267b7e7 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -346,6 +346,7 @@ nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) | |||
346 | replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; | 346 | replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; |
347 | xdr_inline_pages(&req->rq_rcv_buf, replen, | 347 | xdr_inline_pages(&req->rq_rcv_buf, replen, |
348 | args->pages, args->pgbase, count); | 348 | args->pages, args->pgbase, count); |
349 | req->rq_rcv_buf.flags |= XDRBUF_READ; | ||
349 | return 0; | 350 | return 0; |
350 | } | 351 | } |
351 | 352 | ||
@@ -367,6 +368,7 @@ nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) | |||
367 | 368 | ||
368 | /* Copy the page array */ | 369 | /* Copy the page array */ |
369 | xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); | 370 | xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); |
371 | sndbuf->flags |= XDRBUF_WRITE; | ||
370 | return 0; | 372 | return 0; |
371 | } | 373 | } |
372 | 374 | ||
@@ -524,7 +526,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
524 | 526 | ||
525 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 527 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
526 | if (iov->iov_len < hdrlen) { | 528 | if (iov->iov_len < hdrlen) { |
527 | printk(KERN_WARNING "NFS: READDIR reply header overflowed:" | 529 | dprintk("NFS: READDIR reply header overflowed:" |
528 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 530 | "length %d > %Zu\n", hdrlen, iov->iov_len); |
529 | return -errno_NFSERR_IO; | 531 | return -errno_NFSERR_IO; |
530 | } else if (iov->iov_len != hdrlen) { | 532 | } else if (iov->iov_len != hdrlen) { |
@@ -547,7 +549,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
547 | len = ntohl(*p++); /* string length */ | 549 | len = ntohl(*p++); /* string length */ |
548 | p += XDR_QUADLEN(len) + 2; /* name + cookie */ | 550 | p += XDR_QUADLEN(len) + 2; /* name + cookie */ |
549 | if (len > NFS3_MAXNAMLEN) { | 551 | if (len > NFS3_MAXNAMLEN) { |
550 | printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n", | 552 | dprintk("NFS: giant filename in readdir (len %x)!\n", |
551 | len); | 553 | len); |
552 | goto err_unmap; | 554 | goto err_unmap; |
553 | } | 555 | } |
@@ -567,7 +569,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
567 | goto short_pkt; | 569 | goto short_pkt; |
568 | len = ntohl(*p++); | 570 | len = ntohl(*p++); |
569 | if (len > NFS3_FHSIZE) { | 571 | if (len > NFS3_FHSIZE) { |
570 | printk(KERN_WARNING "NFS: giant filehandle in " | 572 | dprintk("NFS: giant filehandle in " |
571 | "readdir (len %x)!\n", len); | 573 | "readdir (len %x)!\n", len); |
572 | goto err_unmap; | 574 | goto err_unmap; |
573 | } | 575 | } |
@@ -588,7 +590,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
588 | entry[0] = entry[1] = 0; | 590 | entry[0] = entry[1] = 0; |
589 | /* truncate listing ? */ | 591 | /* truncate listing ? */ |
590 | if (!nr) { | 592 | if (!nr) { |
591 | printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); | 593 | dprintk("NFS: readdir reply truncated!\n"); |
592 | entry[1] = 1; | 594 | entry[1] = 1; |
593 | } | 595 | } |
594 | goto out; | 596 | goto out; |
@@ -826,22 +828,23 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) | |||
826 | /* Convert length of symlink */ | 828 | /* Convert length of symlink */ |
827 | len = ntohl(*p++); | 829 | len = ntohl(*p++); |
828 | if (len >= rcvbuf->page_len || len <= 0) { | 830 | if (len >= rcvbuf->page_len || len <= 0) { |
829 | dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); | 831 | dprintk("nfs: server returned giant symlink!\n"); |
830 | return -ENAMETOOLONG; | 832 | return -ENAMETOOLONG; |
831 | } | 833 | } |
832 | 834 | ||
833 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 835 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
834 | if (iov->iov_len < hdrlen) { | 836 | if (iov->iov_len < hdrlen) { |
835 | printk(KERN_WARNING "NFS: READLINK reply header overflowed:" | 837 | dprintk("NFS: READLINK reply header overflowed:" |
836 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 838 | "length %d > %Zu\n", hdrlen, iov->iov_len); |
837 | return -errno_NFSERR_IO; | 839 | return -errno_NFSERR_IO; |
838 | } else if (iov->iov_len != hdrlen) { | 840 | } else if (iov->iov_len != hdrlen) { |
839 | dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); | 841 | dprintk("NFS: READLINK header is short. " |
842 | "iovec will be shifted.\n"); | ||
840 | xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); | 843 | xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); |
841 | } | 844 | } |
842 | recvd = req->rq_rcv_buf.len - hdrlen; | 845 | recvd = req->rq_rcv_buf.len - hdrlen; |
843 | if (recvd < len) { | 846 | if (recvd < len) { |
844 | printk(KERN_WARNING "NFS: server cheating in readlink reply: " | 847 | dprintk("NFS: server cheating in readlink reply: " |
845 | "count %u > recvd %u\n", len, recvd); | 848 | "count %u > recvd %u\n", len, recvd); |
846 | return -EIO; | 849 | return -EIO; |
847 | } | 850 | } |
@@ -876,13 +879,13 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | |||
876 | ocount = ntohl(*p++); | 879 | ocount = ntohl(*p++); |
877 | 880 | ||
878 | if (ocount != count) { | 881 | if (ocount != count) { |
879 | printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n"); | 882 | dprintk("NFS: READ count doesn't match RPC opaque count.\n"); |
880 | return -errno_NFSERR_IO; | 883 | return -errno_NFSERR_IO; |
881 | } | 884 | } |
882 | 885 | ||
883 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 886 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
884 | if (iov->iov_len < hdrlen) { | 887 | if (iov->iov_len < hdrlen) { |
885 | printk(KERN_WARNING "NFS: READ reply header overflowed:" | 888 | dprintk("NFS: READ reply header overflowed:" |
886 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 889 | "length %d > %Zu\n", hdrlen, iov->iov_len); |
887 | return -errno_NFSERR_IO; | 890 | return -errno_NFSERR_IO; |
888 | } else if (iov->iov_len != hdrlen) { | 891 | } else if (iov->iov_len != hdrlen) { |
@@ -892,7 +895,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | |||
892 | 895 | ||
893 | recvd = req->rq_rcv_buf.len - hdrlen; | 896 | recvd = req->rq_rcv_buf.len - hdrlen; |
894 | if (count > recvd) { | 897 | if (count > recvd) { |
895 | printk(KERN_WARNING "NFS: server cheating in read reply: " | 898 | dprintk("NFS: server cheating in read reply: " |
896 | "count %d > recvd %d\n", count, recvd); | 899 | "count %d > recvd %d\n", count, recvd); |
897 | count = recvd; | 900 | count = recvd; |
898 | res->eof = 0; | 901 | res->eof = 0; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4b90e17555a9..cb99fd90a9ac 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -62,10 +62,8 @@ struct nfs4_opendata; | |||
62 | static int _nfs4_proc_open(struct nfs4_opendata *data); | 62 | static int _nfs4_proc_open(struct nfs4_opendata *data); |
63 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 63 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
64 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); | 64 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); |
65 | static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); | ||
66 | static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); | 65 | static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); |
67 | static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); | 66 | static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); |
68 | static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags); | ||
69 | static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 67 | static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); |
70 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 68 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); |
71 | 69 | ||
@@ -177,7 +175,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent | |||
177 | *p++ = xdr_one; /* bitmap length */ | 175 | *p++ = xdr_one; /* bitmap length */ |
178 | *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ | 176 | *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ |
179 | *p++ = htonl(8); /* attribute buffer length */ | 177 | *p++ = htonl(8); /* attribute buffer length */ |
180 | p = xdr_encode_hyper(p, dentry->d_inode->i_ino); | 178 | p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode)); |
181 | } | 179 | } |
182 | 180 | ||
183 | *p++ = xdr_one; /* next */ | 181 | *p++ = xdr_one; /* next */ |
@@ -189,7 +187,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent | |||
189 | *p++ = xdr_one; /* bitmap length */ | 187 | *p++ = xdr_one; /* bitmap length */ |
190 | *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ | 188 | *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ |
191 | *p++ = htonl(8); /* attribute buffer length */ | 189 | *p++ = htonl(8); /* attribute buffer length */ |
192 | p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino); | 190 | p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode)); |
193 | 191 | ||
194 | readdir->pgbase = (char *)p - (char *)start; | 192 | readdir->pgbase = (char *)p - (char *)start; |
195 | readdir->count -= readdir->pgbase; | 193 | readdir->count -= readdir->pgbase; |
@@ -211,8 +209,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) | |||
211 | 209 | ||
212 | spin_lock(&dir->i_lock); | 210 | spin_lock(&dir->i_lock); |
213 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; | 211 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; |
214 | if (cinfo->before == nfsi->change_attr && cinfo->atomic) | 212 | if (!cinfo->atomic || cinfo->before != nfsi->change_attr) |
215 | nfsi->change_attr = cinfo->after; | 213 | nfsi->cache_change_attribute = jiffies; |
214 | nfsi->change_attr = cinfo->after; | ||
216 | spin_unlock(&dir->i_lock); | 215 | spin_unlock(&dir->i_lock); |
217 | } | 216 | } |
218 | 217 | ||
@@ -454,7 +453,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) | |||
454 | memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); | 453 | memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); |
455 | rcu_read_unlock(); | 454 | rcu_read_unlock(); |
456 | lock_kernel(); | 455 | lock_kernel(); |
457 | ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); | 456 | ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); |
458 | unlock_kernel(); | 457 | unlock_kernel(); |
459 | if (ret != 0) | 458 | if (ret != 0) |
460 | goto out; | 459 | goto out; |
@@ -948,36 +947,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
948 | return 0; | 947 | return 0; |
949 | } | 948 | } |
950 | 949 | ||
951 | static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) | ||
952 | { | ||
953 | struct nfs_access_entry cache; | ||
954 | int mask = 0; | ||
955 | int status; | ||
956 | |||
957 | if (openflags & FMODE_READ) | ||
958 | mask |= MAY_READ; | ||
959 | if (openflags & FMODE_WRITE) | ||
960 | mask |= MAY_WRITE; | ||
961 | if (openflags & FMODE_EXEC) | ||
962 | mask |= MAY_EXEC; | ||
963 | status = nfs_access_get_cached(inode, cred, &cache); | ||
964 | if (status == 0) | ||
965 | goto out; | ||
966 | |||
967 | /* Be clever: ask server to check for all possible rights */ | ||
968 | cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; | ||
969 | cache.cred = cred; | ||
970 | cache.jiffies = jiffies; | ||
971 | status = _nfs4_proc_access(inode, &cache); | ||
972 | if (status != 0) | ||
973 | return status; | ||
974 | nfs_access_add_cache(inode, &cache); | ||
975 | out: | ||
976 | if ((cache.mask & mask) == mask) | ||
977 | return 0; | ||
978 | return -EACCES; | ||
979 | } | ||
980 | |||
981 | static int nfs4_recover_expired_lease(struct nfs_server *server) | 950 | static int nfs4_recover_expired_lease(struct nfs_server *server) |
982 | { | 951 | { |
983 | struct nfs_client *clp = server->nfs_client; | 952 | struct nfs_client *clp = server->nfs_client; |
@@ -1381,7 +1350,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct | |||
1381 | 1350 | ||
1382 | /* If the open_intent is for execute, we have an extra check to make */ | 1351 | /* If the open_intent is for execute, we have an extra check to make */ |
1383 | if (nd->intent.open.flags & FMODE_EXEC) { | 1352 | if (nd->intent.open.flags & FMODE_EXEC) { |
1384 | ret = _nfs4_do_access(state->inode, | 1353 | ret = nfs_may_open(state->inode, |
1385 | state->owner->so_cred, | 1354 | state->owner->so_cred, |
1386 | nd->intent.open.flags); | 1355 | nd->intent.open.flags); |
1387 | if (ret < 0) | 1356 | if (ret < 0) |
@@ -1390,7 +1359,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct | |||
1390 | filp = lookup_instantiate_filp(nd, path->dentry, NULL); | 1359 | filp = lookup_instantiate_filp(nd, path->dentry, NULL); |
1391 | if (!IS_ERR(filp)) { | 1360 | if (!IS_ERR(filp)) { |
1392 | struct nfs_open_context *ctx; | 1361 | struct nfs_open_context *ctx; |
1393 | ctx = (struct nfs_open_context *)filp->private_data; | 1362 | ctx = nfs_file_open_context(filp); |
1394 | ctx->state = state; | 1363 | ctx->state = state; |
1395 | return 0; | 1364 | return 0; |
1396 | } | 1365 | } |
@@ -1428,13 +1397,16 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | |||
1428 | state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); | 1397 | state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); |
1429 | put_rpccred(cred); | 1398 | put_rpccred(cred); |
1430 | if (IS_ERR(state)) { | 1399 | if (IS_ERR(state)) { |
1431 | if (PTR_ERR(state) == -ENOENT) | 1400 | if (PTR_ERR(state) == -ENOENT) { |
1432 | d_add(dentry, NULL); | 1401 | d_add(dentry, NULL); |
1402 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1403 | } | ||
1433 | return (struct dentry *)state; | 1404 | return (struct dentry *)state; |
1434 | } | 1405 | } |
1435 | res = d_add_unique(dentry, igrab(state->inode)); | 1406 | res = d_add_unique(dentry, igrab(state->inode)); |
1436 | if (res != NULL) | 1407 | if (res != NULL) |
1437 | path.dentry = res; | 1408 | path.dentry = res; |
1409 | nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); | ||
1438 | nfs4_intent_set_file(nd, &path, state); | 1410 | nfs4_intent_set_file(nd, &path, state); |
1439 | return res; | 1411 | return res; |
1440 | } | 1412 | } |
@@ -1468,6 +1440,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st | |||
1468 | } | 1440 | } |
1469 | } | 1441 | } |
1470 | if (state->inode == dentry->d_inode) { | 1442 | if (state->inode == dentry->d_inode) { |
1443 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1471 | nfs4_intent_set_file(nd, &path, state); | 1444 | nfs4_intent_set_file(nd, &path, state); |
1472 | return 1; | 1445 | return 1; |
1473 | } | 1446 | } |
@@ -1757,10 +1730,16 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh | |||
1757 | 1730 | ||
1758 | static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) | 1731 | static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) |
1759 | { | 1732 | { |
1733 | struct nfs_server *server = NFS_SERVER(inode); | ||
1734 | struct nfs_fattr fattr; | ||
1760 | struct nfs4_accessargs args = { | 1735 | struct nfs4_accessargs args = { |
1761 | .fh = NFS_FH(inode), | 1736 | .fh = NFS_FH(inode), |
1737 | .bitmask = server->attr_bitmask, | ||
1738 | }; | ||
1739 | struct nfs4_accessres res = { | ||
1740 | .server = server, | ||
1741 | .fattr = &fattr, | ||
1762 | }; | 1742 | }; |
1763 | struct nfs4_accessres res = { 0 }; | ||
1764 | struct rpc_message msg = { | 1743 | struct rpc_message msg = { |
1765 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], | 1744 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], |
1766 | .rpc_argp = &args, | 1745 | .rpc_argp = &args, |
@@ -1786,6 +1765,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry | |||
1786 | if (mode & MAY_EXEC) | 1765 | if (mode & MAY_EXEC) |
1787 | args.access |= NFS4_ACCESS_EXECUTE; | 1766 | args.access |= NFS4_ACCESS_EXECUTE; |
1788 | } | 1767 | } |
1768 | nfs_fattr_init(&fattr); | ||
1789 | status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); | 1769 | status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); |
1790 | if (!status) { | 1770 | if (!status) { |
1791 | entry->mask = 0; | 1771 | entry->mask = 0; |
@@ -1795,6 +1775,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry | |||
1795 | entry->mask |= MAY_WRITE; | 1775 | entry->mask |= MAY_WRITE; |
1796 | if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) | 1776 | if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) |
1797 | entry->mask |= MAY_EXEC; | 1777 | entry->mask |= MAY_EXEC; |
1778 | nfs_refresh_inode(inode, &fattr); | ||
1798 | } | 1779 | } |
1799 | return status; | 1780 | return status; |
1800 | } | 1781 | } |
@@ -1900,11 +1881,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
1900 | } | 1881 | } |
1901 | state = nfs4_do_open(dir, &path, flags, sattr, cred); | 1882 | state = nfs4_do_open(dir, &path, flags, sattr, cred); |
1902 | put_rpccred(cred); | 1883 | put_rpccred(cred); |
1884 | d_drop(dentry); | ||
1903 | if (IS_ERR(state)) { | 1885 | if (IS_ERR(state)) { |
1904 | status = PTR_ERR(state); | 1886 | status = PTR_ERR(state); |
1905 | goto out; | 1887 | goto out; |
1906 | } | 1888 | } |
1907 | d_instantiate(dentry, igrab(state->inode)); | 1889 | d_add(dentry, igrab(state->inode)); |
1890 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1908 | if (flags & O_EXCL) { | 1891 | if (flags & O_EXCL) { |
1909 | struct nfs_fattr fattr; | 1892 | struct nfs_fattr fattr; |
1910 | status = nfs4_do_setattr(state->inode, &fattr, sattr, state); | 1893 | status = nfs4_do_setattr(state->inode, &fattr, sattr, state); |
@@ -2218,6 +2201,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
2218 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 2201 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
2219 | if (status == 0) | 2202 | if (status == 0) |
2220 | memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); | 2203 | memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); |
2204 | |||
2205 | nfs_invalidate_atime(dir); | ||
2206 | |||
2221 | dprintk("%s: returns %d\n", __FUNCTION__, status); | 2207 | dprintk("%s: returns %d\n", __FUNCTION__, status); |
2222 | return status; | 2208 | return status; |
2223 | } | 2209 | } |
@@ -2414,6 +2400,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
2414 | rpc_restart_call(task); | 2400 | rpc_restart_call(task); |
2415 | return -EAGAIN; | 2401 | return -EAGAIN; |
2416 | } | 2402 | } |
2403 | |||
2404 | nfs_invalidate_atime(data->inode); | ||
2417 | if (task->tk_status > 0) | 2405 | if (task->tk_status > 0) |
2418 | renew_lease(server, data->timestamp); | 2406 | renew_lease(server, data->timestamp); |
2419 | return 0; | 2407 | return 0; |
@@ -2443,7 +2431,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
2443 | } | 2431 | } |
2444 | if (task->tk_status >= 0) { | 2432 | if (task->tk_status >= 0) { |
2445 | renew_lease(NFS_SERVER(inode), data->timestamp); | 2433 | renew_lease(NFS_SERVER(inode), data->timestamp); |
2446 | nfs_post_op_update_inode(inode, data->res.fattr); | 2434 | nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); |
2447 | } | 2435 | } |
2448 | return 0; | 2436 | return 0; |
2449 | } | 2437 | } |
@@ -2485,8 +2473,7 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | |||
2485 | rpc_restart_call(task); | 2473 | rpc_restart_call(task); |
2486 | return -EAGAIN; | 2474 | return -EAGAIN; |
2487 | } | 2475 | } |
2488 | if (task->tk_status >= 0) | 2476 | nfs_refresh_inode(inode, data->res.fattr); |
2489 | nfs_post_op_update_inode(inode, data->res.fattr); | ||
2490 | return 0; | 2477 | return 0; |
2491 | } | 2478 | } |
2492 | 2479 | ||
@@ -3056,7 +3043,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co | |||
3056 | if (status == 0) { | 3043 | if (status == 0) { |
3057 | status = data->rpc_status; | 3044 | status = data->rpc_status; |
3058 | if (status == 0) | 3045 | if (status == 0) |
3059 | nfs_post_op_update_inode(inode, &data->fattr); | 3046 | nfs_refresh_inode(inode, &data->fattr); |
3060 | } | 3047 | } |
3061 | rpc_put_task(task); | 3048 | rpc_put_task(task); |
3062 | return status; | 3049 | return status; |
@@ -3303,7 +3290,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * | |||
3303 | status = -ENOMEM; | 3290 | status = -ENOMEM; |
3304 | if (seqid == NULL) | 3291 | if (seqid == NULL) |
3305 | goto out; | 3292 | goto out; |
3306 | task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); | 3293 | task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); |
3307 | status = PTR_ERR(task); | 3294 | status = PTR_ERR(task); |
3308 | if (IS_ERR(task)) | 3295 | if (IS_ERR(task)) |
3309 | goto out; | 3296 | goto out; |
@@ -3447,7 +3434,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f | |||
3447 | int ret; | 3434 | int ret; |
3448 | 3435 | ||
3449 | dprintk("%s: begin!\n", __FUNCTION__); | 3436 | dprintk("%s: begin!\n", __FUNCTION__); |
3450 | data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data, | 3437 | data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), |
3451 | fl->fl_u.nfs4_fl.owner); | 3438 | fl->fl_u.nfs4_fl.owner); |
3452 | if (data == NULL) | 3439 | if (data == NULL) |
3453 | return -ENOMEM; | 3440 | return -ENOMEM; |
@@ -3573,7 +3560,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) | |||
3573 | int status; | 3560 | int status; |
3574 | 3561 | ||
3575 | /* verify open state */ | 3562 | /* verify open state */ |
3576 | ctx = (struct nfs_open_context *)filp->private_data; | 3563 | ctx = nfs_file_open_context(filp); |
3577 | state = ctx->state; | 3564 | state = ctx->state; |
3578 | 3565 | ||
3579 | if (request->fl_start < 0 || request->fl_end < 0) | 3566 | if (request->fl_start < 0 || request->fl_end < 0) |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3e4adf8c8312..bfb36261cecb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -774,7 +774,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s | |||
774 | for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { | 774 | for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { |
775 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | 775 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) |
776 | continue; | 776 | continue; |
777 | if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) | 777 | if (nfs_file_open_context(fl->fl_file)->state != state) |
778 | continue; | 778 | continue; |
779 | status = ops->recover_lock(state, fl); | 779 | status = ops->recover_lock(state, fl); |
780 | if (status >= 0) | 780 | if (status >= 0) |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index badd73b7ca12..51dd3804866f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -376,10 +376,12 @@ static int nfs4_stat_to_errno(int); | |||
376 | decode_locku_maxsz) | 376 | decode_locku_maxsz) |
377 | #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ | 377 | #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ |
378 | encode_putfh_maxsz + \ | 378 | encode_putfh_maxsz + \ |
379 | encode_access_maxsz) | 379 | encode_access_maxsz + \ |
380 | encode_getattr_maxsz) | ||
380 | #define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ | 381 | #define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ |
381 | decode_putfh_maxsz + \ | 382 | decode_putfh_maxsz + \ |
382 | decode_access_maxsz) | 383 | decode_access_maxsz + \ |
384 | decode_getattr_maxsz) | ||
383 | #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ | 385 | #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ |
384 | encode_putfh_maxsz + \ | 386 | encode_putfh_maxsz + \ |
385 | encode_getattr_maxsz) | 387 | encode_getattr_maxsz) |
@@ -562,7 +564,6 @@ struct compound_hdr { | |||
562 | 564 | ||
563 | #define RESERVE_SPACE(nbytes) do { \ | 565 | #define RESERVE_SPACE(nbytes) do { \ |
564 | p = xdr_reserve_space(xdr, nbytes); \ | 566 | p = xdr_reserve_space(xdr, nbytes); \ |
565 | if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ | ||
566 | BUG_ON(!p); \ | 567 | BUG_ON(!p); \ |
567 | } while (0) | 568 | } while (0) |
568 | 569 | ||
@@ -628,8 +629,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s | |||
628 | if (iap->ia_valid & ATTR_UID) { | 629 | if (iap->ia_valid & ATTR_UID) { |
629 | owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); | 630 | owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); |
630 | if (owner_namelen < 0) { | 631 | if (owner_namelen < 0) { |
631 | printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", | 632 | dprintk("nfs: couldn't resolve uid %d to string\n", |
632 | iap->ia_uid); | 633 | iap->ia_uid); |
633 | /* XXX */ | 634 | /* XXX */ |
634 | strcpy(owner_name, "nobody"); | 635 | strcpy(owner_name, "nobody"); |
635 | owner_namelen = sizeof("nobody") - 1; | 636 | owner_namelen = sizeof("nobody") - 1; |
@@ -640,8 +641,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s | |||
640 | if (iap->ia_valid & ATTR_GID) { | 641 | if (iap->ia_valid & ATTR_GID) { |
641 | owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); | 642 | owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); |
642 | if (owner_grouplen < 0) { | 643 | if (owner_grouplen < 0) { |
643 | printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", | 644 | dprintk("nfs: couldn't resolve gid %d to string\n", |
644 | iap->ia_gid); | 645 | iap->ia_gid); |
645 | strcpy(owner_group, "nobody"); | 646 | strcpy(owner_group, "nobody"); |
646 | owner_grouplen = sizeof("nobody") - 1; | 647 | owner_grouplen = sizeof("nobody") - 1; |
647 | /* goto out; */ | 648 | /* goto out; */ |
@@ -711,7 +712,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s | |||
711 | * Now we backfill the bitmap and the attribute buffer length. | 712 | * Now we backfill the bitmap and the attribute buffer length. |
712 | */ | 713 | */ |
713 | if (len != ((char *)p - (char *)q) + 4) { | 714 | if (len != ((char *)p - (char *)q) + 4) { |
714 | printk ("encode_attr: Attr length calculation error! %u != %Zu\n", | 715 | printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", |
715 | len, ((char *)p - (char *)q) + 4); | 716 | len, ((char *)p - (char *)q) + 4); |
716 | BUG(); | 717 | BUG(); |
717 | } | 718 | } |
@@ -1376,14 +1377,20 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs | |||
1376 | { | 1377 | { |
1377 | struct xdr_stream xdr; | 1378 | struct xdr_stream xdr; |
1378 | struct compound_hdr hdr = { | 1379 | struct compound_hdr hdr = { |
1379 | .nops = 2, | 1380 | .nops = 3, |
1380 | }; | 1381 | }; |
1381 | int status; | 1382 | int status; |
1382 | 1383 | ||
1383 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 1384 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
1384 | encode_compound_hdr(&xdr, &hdr); | 1385 | encode_compound_hdr(&xdr, &hdr); |
1385 | if ((status = encode_putfh(&xdr, args->fh)) == 0) | 1386 | status = encode_putfh(&xdr, args->fh); |
1386 | status = encode_access(&xdr, args->access); | 1387 | if (status != 0) |
1388 | goto out; | ||
1389 | status = encode_access(&xdr, args->access); | ||
1390 | if (status != 0) | ||
1391 | goto out; | ||
1392 | status = encode_getfattr(&xdr, args->bitmask); | ||
1393 | out: | ||
1387 | return status; | 1394 | return status; |
1388 | } | 1395 | } |
1389 | 1396 | ||
@@ -1857,6 +1864,7 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg | |||
1857 | replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2; | 1864 | replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2; |
1858 | xdr_inline_pages(&req->rq_rcv_buf, replen, | 1865 | xdr_inline_pages(&req->rq_rcv_buf, replen, |
1859 | args->pages, args->pgbase, args->count); | 1866 | args->pages, args->pgbase, args->count); |
1867 | req->rq_rcv_buf.flags |= XDRBUF_READ; | ||
1860 | out: | 1868 | out: |
1861 | return status; | 1869 | return status; |
1862 | } | 1870 | } |
@@ -1933,6 +1941,7 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea | |||
1933 | status = encode_write(&xdr, args); | 1941 | status = encode_write(&xdr, args); |
1934 | if (status) | 1942 | if (status) |
1935 | goto out; | 1943 | goto out; |
1944 | req->rq_snd_buf.flags |= XDRBUF_WRITE; | ||
1936 | status = encode_getfattr(&xdr, args->bitmask); | 1945 | status = encode_getfattr(&xdr, args->bitmask); |
1937 | out: | 1946 | out: |
1938 | return status; | 1947 | return status; |
@@ -2180,9 +2189,9 @@ out: | |||
2180 | #define READ_BUF(nbytes) do { \ | 2189 | #define READ_BUF(nbytes) do { \ |
2181 | p = xdr_inline_decode(xdr, nbytes); \ | 2190 | p = xdr_inline_decode(xdr, nbytes); \ |
2182 | if (unlikely(!p)) { \ | 2191 | if (unlikely(!p)) { \ |
2183 | printk(KERN_INFO "%s: prematurely hit end of receive" \ | 2192 | dprintk("nfs: %s: prematurely hit end of receive" \ |
2184 | " buffer\n", __FUNCTION__); \ | 2193 | " buffer\n", __FUNCTION__); \ |
2185 | printk(KERN_INFO "%s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ | 2194 | dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ |
2186 | __FUNCTION__, xdr->p, nbytes, xdr->end); \ | 2195 | __FUNCTION__, xdr->p, nbytes, xdr->end); \ |
2187 | return -EIO; \ | 2196 | return -EIO; \ |
2188 | } \ | 2197 | } \ |
@@ -2223,9 +2232,8 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) | |||
2223 | READ_BUF(8); | 2232 | READ_BUF(8); |
2224 | READ32(opnum); | 2233 | READ32(opnum); |
2225 | if (opnum != expected) { | 2234 | if (opnum != expected) { |
2226 | printk(KERN_NOTICE | 2235 | dprintk("nfs: Server returned operation" |
2227 | "nfs4_decode_op_hdr: Server returned operation" | 2236 | " %d but we issued a request for %d\n", |
2228 | " %d but we issued a request for %d\n", | ||
2229 | opnum, expected); | 2237 | opnum, expected); |
2230 | return -EIO; | 2238 | return -EIO; |
2231 | } | 2239 | } |
@@ -2758,7 +2766,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf | |||
2758 | dprintk("%s: nfs_map_name_to_uid failed!\n", | 2766 | dprintk("%s: nfs_map_name_to_uid failed!\n", |
2759 | __FUNCTION__); | 2767 | __FUNCTION__); |
2760 | } else | 2768 | } else |
2761 | printk(KERN_WARNING "%s: name too long (%u)!\n", | 2769 | dprintk("%s: name too long (%u)!\n", |
2762 | __FUNCTION__, len); | 2770 | __FUNCTION__, len); |
2763 | bitmap[1] &= ~FATTR4_WORD1_OWNER; | 2771 | bitmap[1] &= ~FATTR4_WORD1_OWNER; |
2764 | } | 2772 | } |
@@ -2783,7 +2791,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf | |||
2783 | dprintk("%s: nfs_map_group_to_gid failed!\n", | 2791 | dprintk("%s: nfs_map_group_to_gid failed!\n", |
2784 | __FUNCTION__); | 2792 | __FUNCTION__); |
2785 | } else | 2793 | } else |
2786 | printk(KERN_WARNING "%s: name too long (%u)!\n", | 2794 | dprintk("%s: name too long (%u)!\n", |
2787 | __FUNCTION__, len); | 2795 | __FUNCTION__, len); |
2788 | bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; | 2796 | bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; |
2789 | } | 2797 | } |
@@ -2950,7 +2958,8 @@ static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrl | |||
2950 | unsigned int nwords = xdr->p - savep; | 2958 | unsigned int nwords = xdr->p - savep; |
2951 | 2959 | ||
2952 | if (unlikely(attrwords != nwords)) { | 2960 | if (unlikely(attrwords != nwords)) { |
2953 | printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n", | 2961 | dprintk("%s: server returned incorrect attribute length: " |
2962 | "%u %c %u\n", | ||
2954 | __FUNCTION__, | 2963 | __FUNCTION__, |
2955 | attrwords << 2, | 2964 | attrwords << 2, |
2956 | (attrwords < nwords) ? '<' : '>', | 2965 | (attrwords < nwords) ? '<' : '>', |
@@ -3451,7 +3460,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_ | |||
3451 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 3460 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
3452 | recvd = req->rq_rcv_buf.len - hdrlen; | 3461 | recvd = req->rq_rcv_buf.len - hdrlen; |
3453 | if (count > recvd) { | 3462 | if (count > recvd) { |
3454 | printk(KERN_WARNING "NFS: server cheating in read reply: " | 3463 | dprintk("NFS: server cheating in read reply: " |
3455 | "count %u > recvd %u\n", count, recvd); | 3464 | "count %u > recvd %u\n", count, recvd); |
3456 | count = recvd; | 3465 | count = recvd; |
3457 | eof = 0; | 3466 | eof = 0; |
@@ -3500,7 +3509,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
3500 | p += 2; /* cookie */ | 3509 | p += 2; /* cookie */ |
3501 | len = ntohl(*p++); /* filename length */ | 3510 | len = ntohl(*p++); /* filename length */ |
3502 | if (len > NFS4_MAXNAMLEN) { | 3511 | if (len > NFS4_MAXNAMLEN) { |
3503 | printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); | 3512 | dprintk("NFS: giant filename in readdir (len 0x%x)\n", |
3513 | len); | ||
3504 | goto err_unmap; | 3514 | goto err_unmap; |
3505 | } | 3515 | } |
3506 | xlen = XDR_QUADLEN(len); | 3516 | xlen = XDR_QUADLEN(len); |
@@ -3528,7 +3538,7 @@ short_pkt: | |||
3528 | entry[0] = entry[1] = 0; | 3538 | entry[0] = entry[1] = 0; |
3529 | /* truncate listing ? */ | 3539 | /* truncate listing ? */ |
3530 | if (!nr) { | 3540 | if (!nr) { |
3531 | printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); | 3541 | dprintk("NFS: readdir reply truncated!\n"); |
3532 | entry[1] = 1; | 3542 | entry[1] = 1; |
3533 | } | 3543 | } |
3534 | goto out; | 3544 | goto out; |
@@ -3554,13 +3564,13 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) | |||
3554 | READ_BUF(4); | 3564 | READ_BUF(4); |
3555 | READ32(len); | 3565 | READ32(len); |
3556 | if (len >= rcvbuf->page_len || len <= 0) { | 3566 | if (len >= rcvbuf->page_len || len <= 0) { |
3557 | dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); | 3567 | dprintk("nfs: server returned giant symlink!\n"); |
3558 | return -ENAMETOOLONG; | 3568 | return -ENAMETOOLONG; |
3559 | } | 3569 | } |
3560 | hdrlen = (char *) xdr->p - (char *) iov->iov_base; | 3570 | hdrlen = (char *) xdr->p - (char *) iov->iov_base; |
3561 | recvd = req->rq_rcv_buf.len - hdrlen; | 3571 | recvd = req->rq_rcv_buf.len - hdrlen; |
3562 | if (recvd < len) { | 3572 | if (recvd < len) { |
3563 | printk(KERN_WARNING "NFS: server cheating in readlink reply: " | 3573 | dprintk("NFS: server cheating in readlink reply: " |
3564 | "count %u > recvd %u\n", len, recvd); | 3574 | "count %u > recvd %u\n", len, recvd); |
3565 | return -EIO; | 3575 | return -EIO; |
3566 | } | 3576 | } |
@@ -3643,7 +3653,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
3643 | hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; | 3653 | hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; |
3644 | recvd = req->rq_rcv_buf.len - hdrlen; | 3654 | recvd = req->rq_rcv_buf.len - hdrlen; |
3645 | if (attrlen > recvd) { | 3655 | if (attrlen > recvd) { |
3646 | printk(KERN_WARNING "NFS: server cheating in getattr" | 3656 | dprintk("NFS: server cheating in getattr" |
3647 | " acl reply: attrlen %u > recvd %u\n", | 3657 | " acl reply: attrlen %u > recvd %u\n", |
3648 | attrlen, recvd); | 3658 | attrlen, recvd); |
3649 | return -EINVAL; | 3659 | return -EINVAL; |
@@ -3688,8 +3698,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) | |||
3688 | READ_BUF(8); | 3698 | READ_BUF(8); |
3689 | READ32(opnum); | 3699 | READ32(opnum); |
3690 | if (opnum != OP_SETCLIENTID) { | 3700 | if (opnum != OP_SETCLIENTID) { |
3691 | printk(KERN_NOTICE | 3701 | dprintk("nfs: decode_setclientid: Server returned operation" |
3692 | "nfs4_decode_setclientid: Server returned operation" | ||
3693 | " %d\n", opnum); | 3702 | " %d\n", opnum); |
3694 | return -EIO; | 3703 | return -EIO; |
3695 | } | 3704 | } |
@@ -3783,8 +3792,13 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac | |||
3783 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | 3792 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); |
3784 | if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) | 3793 | if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) |
3785 | goto out; | 3794 | goto out; |
3786 | if ((status = decode_putfh(&xdr)) == 0) | 3795 | status = decode_putfh(&xdr); |
3787 | status = decode_access(&xdr, res); | 3796 | if (status != 0) |
3797 | goto out; | ||
3798 | status = decode_access(&xdr, res); | ||
3799 | if (status != 0) | ||
3800 | goto out; | ||
3801 | decode_getfattr(&xdr, res->fattr, res->server); | ||
3788 | out: | 3802 | out: |
3789 | return status; | 3803 | return status; |
3790 | } | 3804 | } |
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 3490322d1145..e87b44ee9ac9 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
@@ -76,6 +76,7 @@ | |||
76 | #include <linux/fs.h> | 76 | #include <linux/fs.h> |
77 | #include <linux/init.h> | 77 | #include <linux/init.h> |
78 | #include <linux/sunrpc/clnt.h> | 78 | #include <linux/sunrpc/clnt.h> |
79 | #include <linux/sunrpc/xprtsock.h> | ||
79 | #include <linux/nfs.h> | 80 | #include <linux/nfs.h> |
80 | #include <linux/nfs_fs.h> | 81 | #include <linux/nfs_fs.h> |
81 | #include <linux/nfs_mount.h> | 82 | #include <linux/nfs_mount.h> |
@@ -491,7 +492,7 @@ static int __init root_nfs_get_handle(void) | |||
491 | struct sockaddr_in sin; | 492 | struct sockaddr_in sin; |
492 | int status; | 493 | int status; |
493 | int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? | 494 | int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? |
494 | IPPROTO_TCP : IPPROTO_UDP; | 495 | XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP; |
495 | int version = (nfs_data.flags & NFS_MOUNT_VER3) ? | 496 | int version = (nfs_data.flags & NFS_MOUNT_VER3) ? |
496 | NFS_MNT3_VERSION : NFS_MNT_VERSION; | 497 | NFS_MNT3_VERSION : NFS_MNT_VERSION; |
497 | 498 | ||
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 845cdde1d8b7..97669ed05500 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -476,6 +476,8 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
476 | dprintk("NFS call readdir %d\n", (unsigned int)cookie); | 476 | dprintk("NFS call readdir %d\n", (unsigned int)cookie); |
477 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 477 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
478 | 478 | ||
479 | nfs_invalidate_atime(dir); | ||
480 | |||
479 | dprintk("NFS reply readdir: %d\n", status); | 481 | dprintk("NFS reply readdir: %d\n", status); |
480 | return status; | 482 | return status; |
481 | } | 483 | } |
@@ -550,6 +552,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | |||
550 | 552 | ||
551 | static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) | 553 | static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) |
552 | { | 554 | { |
555 | nfs_invalidate_atime(data->inode); | ||
553 | if (task->tk_status >= 0) { | 556 | if (task->tk_status >= 0) { |
554 | nfs_refresh_inode(data->inode, data->res.fattr); | 557 | nfs_refresh_inode(data->inode, data->res.fattr); |
555 | /* Emulate the eof flag, which isn't normally needed in NFSv2 | 558 | /* Emulate the eof flag, which isn't normally needed in NFSv2 |
@@ -576,7 +579,7 @@ static void nfs_proc_read_setup(struct nfs_read_data *data) | |||
576 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) | 579 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) |
577 | { | 580 | { |
578 | if (task->tk_status >= 0) | 581 | if (task->tk_status >= 0) |
579 | nfs_post_op_update_inode(data->inode, data->res.fattr); | 582 | nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); |
580 | return 0; | 583 | return 0; |
581 | } | 584 | } |
582 | 585 | ||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 19e05633f4e3..4587a86adaac 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -341,9 +341,6 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) | |||
341 | set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); | 341 | set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); |
342 | nfs_mark_for_revalidate(data->inode); | 342 | nfs_mark_for_revalidate(data->inode); |
343 | } | 343 | } |
344 | spin_lock(&data->inode->i_lock); | ||
345 | NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; | ||
346 | spin_unlock(&data->inode->i_lock); | ||
347 | return 0; | 344 | return 0; |
348 | } | 345 | } |
349 | 346 | ||
@@ -497,8 +494,7 @@ int nfs_readpage(struct file *file, struct page *page) | |||
497 | if (ctx == NULL) | 494 | if (ctx == NULL) |
498 | goto out_unlock; | 495 | goto out_unlock; |
499 | } else | 496 | } else |
500 | ctx = get_nfs_open_context((struct nfs_open_context *) | 497 | ctx = get_nfs_open_context(nfs_file_open_context(file)); |
501 | file->private_data); | ||
502 | 498 | ||
503 | error = nfs_readpage_async(ctx, inode, page); | 499 | error = nfs_readpage_async(ctx, inode, page); |
504 | 500 | ||
@@ -576,8 +572,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
576 | if (desc.ctx == NULL) | 572 | if (desc.ctx == NULL) |
577 | return -EBADF; | 573 | return -EBADF; |
578 | } else | 574 | } else |
579 | desc.ctx = get_nfs_open_context((struct nfs_open_context *) | 575 | desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); |
580 | filp->private_data); | ||
581 | if (rsize < PAGE_CACHE_SIZE) | 576 | if (rsize < PAGE_CACHE_SIZE) |
582 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); | 577 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); |
583 | else | 578 | else |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b878528b64c1..fa517ae9207f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -33,6 +33,8 @@ | |||
33 | #include <linux/sunrpc/clnt.h> | 33 | #include <linux/sunrpc/clnt.h> |
34 | #include <linux/sunrpc/stats.h> | 34 | #include <linux/sunrpc/stats.h> |
35 | #include <linux/sunrpc/metrics.h> | 35 | #include <linux/sunrpc/metrics.h> |
36 | #include <linux/sunrpc/xprtsock.h> | ||
37 | #include <linux/sunrpc/xprtrdma.h> | ||
36 | #include <linux/nfs_fs.h> | 38 | #include <linux/nfs_fs.h> |
37 | #include <linux/nfs_mount.h> | 39 | #include <linux/nfs_mount.h> |
38 | #include <linux/nfs4_mount.h> | 40 | #include <linux/nfs4_mount.h> |
@@ -58,36 +60,6 @@ | |||
58 | 60 | ||
59 | #define NFSDBG_FACILITY NFSDBG_VFS | 61 | #define NFSDBG_FACILITY NFSDBG_VFS |
60 | 62 | ||
61 | |||
62 | struct nfs_parsed_mount_data { | ||
63 | int flags; | ||
64 | int rsize, wsize; | ||
65 | int timeo, retrans; | ||
66 | int acregmin, acregmax, | ||
67 | acdirmin, acdirmax; | ||
68 | int namlen; | ||
69 | unsigned int bsize; | ||
70 | unsigned int auth_flavor_len; | ||
71 | rpc_authflavor_t auth_flavors[1]; | ||
72 | char *client_address; | ||
73 | |||
74 | struct { | ||
75 | struct sockaddr_in address; | ||
76 | unsigned int program; | ||
77 | unsigned int version; | ||
78 | unsigned short port; | ||
79 | int protocol; | ||
80 | } mount_server; | ||
81 | |||
82 | struct { | ||
83 | struct sockaddr_in address; | ||
84 | char *hostname; | ||
85 | char *export_path; | ||
86 | unsigned int program; | ||
87 | int protocol; | ||
88 | } nfs_server; | ||
89 | }; | ||
90 | |||
91 | enum { | 63 | enum { |
92 | /* Mount options that take no arguments */ | 64 | /* Mount options that take no arguments */ |
93 | Opt_soft, Opt_hard, | 65 | Opt_soft, Opt_hard, |
@@ -97,7 +69,7 @@ enum { | |||
97 | Opt_ac, Opt_noac, | 69 | Opt_ac, Opt_noac, |
98 | Opt_lock, Opt_nolock, | 70 | Opt_lock, Opt_nolock, |
99 | Opt_v2, Opt_v3, | 71 | Opt_v2, Opt_v3, |
100 | Opt_udp, Opt_tcp, | 72 | Opt_udp, Opt_tcp, Opt_rdma, |
101 | Opt_acl, Opt_noacl, | 73 | Opt_acl, Opt_noacl, |
102 | Opt_rdirplus, Opt_nordirplus, | 74 | Opt_rdirplus, Opt_nordirplus, |
103 | Opt_sharecache, Opt_nosharecache, | 75 | Opt_sharecache, Opt_nosharecache, |
@@ -116,7 +88,7 @@ enum { | |||
116 | 88 | ||
117 | /* Mount options that take string arguments */ | 89 | /* Mount options that take string arguments */ |
118 | Opt_sec, Opt_proto, Opt_mountproto, | 90 | Opt_sec, Opt_proto, Opt_mountproto, |
119 | Opt_addr, Opt_mounthost, Opt_clientaddr, | 91 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
120 | 92 | ||
121 | /* Mount options that are ignored */ | 93 | /* Mount options that are ignored */ |
122 | Opt_userspace, Opt_deprecated, | 94 | Opt_userspace, Opt_deprecated, |
@@ -143,6 +115,7 @@ static match_table_t nfs_mount_option_tokens = { | |||
143 | { Opt_v3, "v3" }, | 115 | { Opt_v3, "v3" }, |
144 | { Opt_udp, "udp" }, | 116 | { Opt_udp, "udp" }, |
145 | { Opt_tcp, "tcp" }, | 117 | { Opt_tcp, "tcp" }, |
118 | { Opt_rdma, "rdma" }, | ||
146 | { Opt_acl, "acl" }, | 119 | { Opt_acl, "acl" }, |
147 | { Opt_noacl, "noacl" }, | 120 | { Opt_noacl, "noacl" }, |
148 | { Opt_rdirplus, "rdirplus" }, | 121 | { Opt_rdirplus, "rdirplus" }, |
@@ -175,13 +148,14 @@ static match_table_t nfs_mount_option_tokens = { | |||
175 | { Opt_mountproto, "mountproto=%s" }, | 148 | { Opt_mountproto, "mountproto=%s" }, |
176 | { Opt_addr, "addr=%s" }, | 149 | { Opt_addr, "addr=%s" }, |
177 | { Opt_clientaddr, "clientaddr=%s" }, | 150 | { Opt_clientaddr, "clientaddr=%s" }, |
178 | { Opt_mounthost, "mounthost=%s" }, | 151 | { Opt_userspace, "mounthost=%s" }, |
152 | { Opt_mountaddr, "mountaddr=%s" }, | ||
179 | 153 | ||
180 | { Opt_err, NULL } | 154 | { Opt_err, NULL } |
181 | }; | 155 | }; |
182 | 156 | ||
183 | enum { | 157 | enum { |
184 | Opt_xprt_udp, Opt_xprt_tcp, | 158 | Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma, |
185 | 159 | ||
186 | Opt_xprt_err | 160 | Opt_xprt_err |
187 | }; | 161 | }; |
@@ -189,6 +163,7 @@ enum { | |||
189 | static match_table_t nfs_xprt_protocol_tokens = { | 163 | static match_table_t nfs_xprt_protocol_tokens = { |
190 | { Opt_xprt_udp, "udp" }, | 164 | { Opt_xprt_udp, "udp" }, |
191 | { Opt_xprt_tcp, "tcp" }, | 165 | { Opt_xprt_tcp, "tcp" }, |
166 | { Opt_xprt_rdma, "rdma" }, | ||
192 | 167 | ||
193 | { Opt_xprt_err, NULL } | 168 | { Opt_xprt_err, NULL } |
194 | }; | 169 | }; |
@@ -449,7 +424,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
449 | const char *nostr; | 424 | const char *nostr; |
450 | } nfs_info[] = { | 425 | } nfs_info[] = { |
451 | { NFS_MOUNT_SOFT, ",soft", ",hard" }, | 426 | { NFS_MOUNT_SOFT, ",soft", ",hard" }, |
452 | { NFS_MOUNT_INTR, ",intr", "" }, | 427 | { NFS_MOUNT_INTR, ",intr", ",nointr" }, |
453 | { NFS_MOUNT_NOCTO, ",nocto", "" }, | 428 | { NFS_MOUNT_NOCTO, ",nocto", "" }, |
454 | { NFS_MOUNT_NOAC, ",noac", "" }, | 429 | { NFS_MOUNT_NOAC, ",noac", "" }, |
455 | { NFS_MOUNT_NONLM, ",nolock", "" }, | 430 | { NFS_MOUNT_NONLM, ",nolock", "" }, |
@@ -460,8 +435,6 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
460 | }; | 435 | }; |
461 | const struct proc_nfs_info *nfs_infop; | 436 | const struct proc_nfs_info *nfs_infop; |
462 | struct nfs_client *clp = nfss->nfs_client; | 437 | struct nfs_client *clp = nfss->nfs_client; |
463 | char buf[12]; | ||
464 | const char *proto; | ||
465 | 438 | ||
466 | seq_printf(m, ",vers=%d", clp->rpc_ops->version); | 439 | seq_printf(m, ",vers=%d", clp->rpc_ops->version); |
467 | seq_printf(m, ",rsize=%d", nfss->rsize); | 440 | seq_printf(m, ",rsize=%d", nfss->rsize); |
@@ -480,18 +453,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
480 | else | 453 | else |
481 | seq_puts(m, nfs_infop->nostr); | 454 | seq_puts(m, nfs_infop->nostr); |
482 | } | 455 | } |
483 | switch (nfss->client->cl_xprt->prot) { | 456 | seq_printf(m, ",proto=%s", |
484 | case IPPROTO_TCP: | 457 | rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); |
485 | proto = "tcp"; | ||
486 | break; | ||
487 | case IPPROTO_UDP: | ||
488 | proto = "udp"; | ||
489 | break; | ||
490 | default: | ||
491 | snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); | ||
492 | proto = buf; | ||
493 | } | ||
494 | seq_printf(m, ",proto=%s", proto); | ||
495 | seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); | 458 | seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); |
496 | seq_printf(m, ",retrans=%u", clp->retrans_count); | 459 | seq_printf(m, ",retrans=%u", clp->retrans_count); |
497 | seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); | 460 | seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); |
@@ -506,8 +469,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
506 | 469 | ||
507 | nfs_show_mount_options(m, nfss, 0); | 470 | nfs_show_mount_options(m, nfss, 0); |
508 | 471 | ||
509 | seq_puts(m, ",addr="); | 472 | seq_printf(m, ",addr="NIPQUAD_FMT, |
510 | seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\"); | 473 | NIPQUAD(nfss->nfs_client->cl_addr.sin_addr)); |
511 | 474 | ||
512 | return 0; | 475 | return 0; |
513 | } | 476 | } |
@@ -698,13 +661,19 @@ static int nfs_parse_mount_options(char *raw, | |||
698 | break; | 661 | break; |
699 | case Opt_udp: | 662 | case Opt_udp: |
700 | mnt->flags &= ~NFS_MOUNT_TCP; | 663 | mnt->flags &= ~NFS_MOUNT_TCP; |
701 | mnt->nfs_server.protocol = IPPROTO_UDP; | 664 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; |
702 | mnt->timeo = 7; | 665 | mnt->timeo = 7; |
703 | mnt->retrans = 5; | 666 | mnt->retrans = 5; |
704 | break; | 667 | break; |
705 | case Opt_tcp: | 668 | case Opt_tcp: |
706 | mnt->flags |= NFS_MOUNT_TCP; | 669 | mnt->flags |= NFS_MOUNT_TCP; |
707 | mnt->nfs_server.protocol = IPPROTO_TCP; | 670 | mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; |
671 | mnt->timeo = 600; | ||
672 | mnt->retrans = 2; | ||
673 | break; | ||
674 | case Opt_rdma: | ||
675 | mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ | ||
676 | mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; | ||
708 | mnt->timeo = 600; | 677 | mnt->timeo = 600; |
709 | mnt->retrans = 2; | 678 | mnt->retrans = 2; |
710 | break; | 679 | break; |
@@ -913,13 +882,20 @@ static int nfs_parse_mount_options(char *raw, | |||
913 | switch (token) { | 882 | switch (token) { |
914 | case Opt_xprt_udp: | 883 | case Opt_xprt_udp: |
915 | mnt->flags &= ~NFS_MOUNT_TCP; | 884 | mnt->flags &= ~NFS_MOUNT_TCP; |
916 | mnt->nfs_server.protocol = IPPROTO_UDP; | 885 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; |
917 | mnt->timeo = 7; | 886 | mnt->timeo = 7; |
918 | mnt->retrans = 5; | 887 | mnt->retrans = 5; |
919 | break; | 888 | break; |
920 | case Opt_xprt_tcp: | 889 | case Opt_xprt_tcp: |
921 | mnt->flags |= NFS_MOUNT_TCP; | 890 | mnt->flags |= NFS_MOUNT_TCP; |
922 | mnt->nfs_server.protocol = IPPROTO_TCP; | 891 | mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; |
892 | mnt->timeo = 600; | ||
893 | mnt->retrans = 2; | ||
894 | break; | ||
895 | case Opt_xprt_rdma: | ||
896 | /* vector side protocols to TCP */ | ||
897 | mnt->flags |= NFS_MOUNT_TCP; | ||
898 | mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; | ||
923 | mnt->timeo = 600; | 899 | mnt->timeo = 600; |
924 | mnt->retrans = 2; | 900 | mnt->retrans = 2; |
925 | break; | 901 | break; |
@@ -937,11 +913,12 @@ static int nfs_parse_mount_options(char *raw, | |||
937 | 913 | ||
938 | switch (token) { | 914 | switch (token) { |
939 | case Opt_xprt_udp: | 915 | case Opt_xprt_udp: |
940 | mnt->mount_server.protocol = IPPROTO_UDP; | 916 | mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; |
941 | break; | 917 | break; |
942 | case Opt_xprt_tcp: | 918 | case Opt_xprt_tcp: |
943 | mnt->mount_server.protocol = IPPROTO_TCP; | 919 | mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; |
944 | break; | 920 | break; |
921 | case Opt_xprt_rdma: /* not used for side protocols */ | ||
945 | default: | 922 | default: |
946 | goto out_unrec_xprt; | 923 | goto out_unrec_xprt; |
947 | } | 924 | } |
@@ -961,7 +938,7 @@ static int nfs_parse_mount_options(char *raw, | |||
961 | goto out_nomem; | 938 | goto out_nomem; |
962 | mnt->client_address = string; | 939 | mnt->client_address = string; |
963 | break; | 940 | break; |
964 | case Opt_mounthost: | 941 | case Opt_mountaddr: |
965 | string = match_strdup(args); | 942 | string = match_strdup(args); |
966 | if (string == NULL) | 943 | if (string == NULL) |
967 | goto out_nomem; | 944 | goto out_nomem; |
@@ -1027,16 +1004,10 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
1027 | sin = args->mount_server.address; | 1004 | sin = args->mount_server.address; |
1028 | else | 1005 | else |
1029 | sin = args->nfs_server.address; | 1006 | sin = args->nfs_server.address; |
1030 | if (args->mount_server.port == 0) { | 1007 | /* |
1031 | status = rpcb_getport_sync(&sin, | 1008 | * autobind will be used if mount_server.port == 0 |
1032 | args->mount_server.program, | 1009 | */ |
1033 | args->mount_server.version, | 1010 | sin.sin_port = htons(args->mount_server.port); |
1034 | args->mount_server.protocol); | ||
1035 | if (status < 0) | ||
1036 | goto out_err; | ||
1037 | sin.sin_port = htons(status); | ||
1038 | } else | ||
1039 | sin.sin_port = htons(args->mount_server.port); | ||
1040 | 1011 | ||
1041 | /* | 1012 | /* |
1042 | * Now ask the mount server to map our export path | 1013 | * Now ask the mount server to map our export path |
@@ -1049,14 +1020,11 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
1049 | args->mount_server.version, | 1020 | args->mount_server.version, |
1050 | args->mount_server.protocol, | 1021 | args->mount_server.protocol, |
1051 | root_fh); | 1022 | root_fh); |
1052 | if (status < 0) | 1023 | if (status == 0) |
1053 | goto out_err; | 1024 | return 0; |
1054 | |||
1055 | return status; | ||
1056 | 1025 | ||
1057 | out_err: | 1026 | dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT |
1058 | dfprintk(MOUNT, "NFS: unable to contact server on host " | 1027 | ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status); |
1059 | NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr)); | ||
1060 | return status; | 1028 | return status; |
1061 | } | 1029 | } |
1062 | 1030 | ||
@@ -1079,15 +1047,31 @@ out_err: | |||
1079 | * XXX: as far as I can tell, changing the NFS program number is not | 1047 | * XXX: as far as I can tell, changing the NFS program number is not |
1080 | * supported in the NFS client. | 1048 | * supported in the NFS client. |
1081 | */ | 1049 | */ |
1082 | static int nfs_validate_mount_data(struct nfs_mount_data **options, | 1050 | static int nfs_validate_mount_data(void *options, |
1051 | struct nfs_parsed_mount_data *args, | ||
1083 | struct nfs_fh *mntfh, | 1052 | struct nfs_fh *mntfh, |
1084 | const char *dev_name) | 1053 | const char *dev_name) |
1085 | { | 1054 | { |
1086 | struct nfs_mount_data *data = *options; | 1055 | struct nfs_mount_data *data = (struct nfs_mount_data *)options; |
1087 | 1056 | ||
1088 | if (data == NULL) | 1057 | if (data == NULL) |
1089 | goto out_no_data; | 1058 | goto out_no_data; |
1090 | 1059 | ||
1060 | memset(args, 0, sizeof(*args)); | ||
1061 | args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); | ||
1062 | args->rsize = NFS_MAX_FILE_IO_SIZE; | ||
1063 | args->wsize = NFS_MAX_FILE_IO_SIZE; | ||
1064 | args->timeo = 600; | ||
1065 | args->retrans = 2; | ||
1066 | args->acregmin = 3; | ||
1067 | args->acregmax = 60; | ||
1068 | args->acdirmin = 30; | ||
1069 | args->acdirmax = 60; | ||
1070 | args->mount_server.protocol = XPRT_TRANSPORT_UDP; | ||
1071 | args->mount_server.program = NFS_MNT_PROGRAM; | ||
1072 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; | ||
1073 | args->nfs_server.program = NFS_PROGRAM; | ||
1074 | |||
1091 | switch (data->version) { | 1075 | switch (data->version) { |
1092 | case 1: | 1076 | case 1: |
1093 | data->namlen = 0; | 1077 | data->namlen = 0; |
@@ -1116,92 +1100,73 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options, | |||
1116 | if (mntfh->size < sizeof(mntfh->data)) | 1100 | if (mntfh->size < sizeof(mntfh->data)) |
1117 | memset(mntfh->data + mntfh->size, 0, | 1101 | memset(mntfh->data + mntfh->size, 0, |
1118 | sizeof(mntfh->data) - mntfh->size); | 1102 | sizeof(mntfh->data) - mntfh->size); |
1103 | |||
1104 | if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) | ||
1105 | goto out_no_address; | ||
1106 | |||
1107 | /* | ||
1108 | * Translate to nfs_parsed_mount_data, which nfs_fill_super | ||
1109 | * can deal with. | ||
1110 | */ | ||
1111 | args->flags = data->flags; | ||
1112 | args->rsize = data->rsize; | ||
1113 | args->wsize = data->wsize; | ||
1114 | args->flags = data->flags; | ||
1115 | args->timeo = data->timeo; | ||
1116 | args->retrans = data->retrans; | ||
1117 | args->acregmin = data->acregmin; | ||
1118 | args->acregmax = data->acregmax; | ||
1119 | args->acdirmin = data->acdirmin; | ||
1120 | args->acdirmax = data->acdirmax; | ||
1121 | args->nfs_server.address = data->addr; | ||
1122 | if (!(data->flags & NFS_MOUNT_TCP)) | ||
1123 | args->nfs_server.protocol = XPRT_TRANSPORT_UDP; | ||
1124 | /* N.B. caller will free nfs_server.hostname in all cases */ | ||
1125 | args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); | ||
1126 | args->namlen = data->namlen; | ||
1127 | args->bsize = data->bsize; | ||
1128 | args->auth_flavors[0] = data->pseudoflavor; | ||
1119 | break; | 1129 | break; |
1120 | default: { | 1130 | default: { |
1121 | unsigned int len; | 1131 | unsigned int len; |
1122 | char *c; | 1132 | char *c; |
1123 | int status; | 1133 | int status; |
1124 | struct nfs_parsed_mount_data args = { | ||
1125 | .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP), | ||
1126 | .rsize = NFS_MAX_FILE_IO_SIZE, | ||
1127 | .wsize = NFS_MAX_FILE_IO_SIZE, | ||
1128 | .timeo = 600, | ||
1129 | .retrans = 2, | ||
1130 | .acregmin = 3, | ||
1131 | .acregmax = 60, | ||
1132 | .acdirmin = 30, | ||
1133 | .acdirmax = 60, | ||
1134 | .mount_server.protocol = IPPROTO_UDP, | ||
1135 | .mount_server.program = NFS_MNT_PROGRAM, | ||
1136 | .nfs_server.protocol = IPPROTO_TCP, | ||
1137 | .nfs_server.program = NFS_PROGRAM, | ||
1138 | }; | ||
1139 | |||
1140 | if (nfs_parse_mount_options((char *) *options, &args) == 0) | ||
1141 | return -EINVAL; | ||
1142 | 1134 | ||
1143 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 1135 | if (nfs_parse_mount_options((char *)options, args) == 0) |
1144 | if (data == NULL) | 1136 | return -EINVAL; |
1145 | return -ENOMEM; | ||
1146 | 1137 | ||
1147 | /* | 1138 | if (!nfs_verify_server_address((struct sockaddr *) |
1148 | * NB: after this point, caller will free "data" | 1139 | &args->nfs_server.address)) |
1149 | * if we return an error | 1140 | goto out_no_address; |
1150 | */ | ||
1151 | *options = data; | ||
1152 | 1141 | ||
1153 | c = strchr(dev_name, ':'); | 1142 | c = strchr(dev_name, ':'); |
1154 | if (c == NULL) | 1143 | if (c == NULL) |
1155 | return -EINVAL; | 1144 | return -EINVAL; |
1156 | len = c - dev_name; | 1145 | len = c - dev_name; |
1157 | if (len > sizeof(data->hostname)) | 1146 | /* N.B. caller will free nfs_server.hostname in all cases */ |
1158 | return -ENAMETOOLONG; | 1147 | args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); |
1159 | strncpy(data->hostname, dev_name, len); | ||
1160 | args.nfs_server.hostname = data->hostname; | ||
1161 | 1148 | ||
1162 | c++; | 1149 | c++; |
1163 | if (strlen(c) > NFS_MAXPATHLEN) | 1150 | if (strlen(c) > NFS_MAXPATHLEN) |
1164 | return -ENAMETOOLONG; | 1151 | return -ENAMETOOLONG; |
1165 | args.nfs_server.export_path = c; | 1152 | args->nfs_server.export_path = c; |
1166 | 1153 | ||
1167 | status = nfs_try_mount(&args, mntfh); | 1154 | status = nfs_try_mount(args, mntfh); |
1168 | if (status) | 1155 | if (status) |
1169 | return status; | 1156 | return status; |
1170 | 1157 | ||
1171 | /* | ||
1172 | * Translate to nfs_mount_data, which nfs_fill_super | ||
1173 | * can deal with. | ||
1174 | */ | ||
1175 | data->version = 6; | ||
1176 | data->flags = args.flags; | ||
1177 | data->rsize = args.rsize; | ||
1178 | data->wsize = args.wsize; | ||
1179 | data->timeo = args.timeo; | ||
1180 | data->retrans = args.retrans; | ||
1181 | data->acregmin = args.acregmin; | ||
1182 | data->acregmax = args.acregmax; | ||
1183 | data->acdirmin = args.acdirmin; | ||
1184 | data->acdirmax = args.acdirmax; | ||
1185 | data->addr = args.nfs_server.address; | ||
1186 | data->namlen = args.namlen; | ||
1187 | data->bsize = args.bsize; | ||
1188 | data->pseudoflavor = args.auth_flavors[0]; | ||
1189 | |||
1190 | break; | 1158 | break; |
1191 | } | 1159 | } |
1192 | } | 1160 | } |
1193 | 1161 | ||
1194 | if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) | 1162 | if (!(args->flags & NFS_MOUNT_SECFLAVOUR)) |
1195 | data->pseudoflavor = RPC_AUTH_UNIX; | 1163 | args->auth_flavors[0] = RPC_AUTH_UNIX; |
1196 | 1164 | ||
1197 | #ifndef CONFIG_NFS_V3 | 1165 | #ifndef CONFIG_NFS_V3 |
1198 | if (data->flags & NFS_MOUNT_VER3) | 1166 | if (args->flags & NFS_MOUNT_VER3) |
1199 | goto out_v3_not_compiled; | 1167 | goto out_v3_not_compiled; |
1200 | #endif /* !CONFIG_NFS_V3 */ | 1168 | #endif /* !CONFIG_NFS_V3 */ |
1201 | 1169 | ||
1202 | if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) | ||
1203 | goto out_no_address; | ||
1204 | |||
1205 | return 0; | 1170 | return 0; |
1206 | 1171 | ||
1207 | out_no_data: | 1172 | out_no_data: |
@@ -1258,7 +1223,8 @@ static inline void nfs_initialise_sb(struct super_block *sb) | |||
1258 | /* | 1223 | /* |
1259 | * Finish setting up an NFS2/3 superblock | 1224 | * Finish setting up an NFS2/3 superblock |
1260 | */ | 1225 | */ |
1261 | static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data) | 1226 | static void nfs_fill_super(struct super_block *sb, |
1227 | struct nfs_parsed_mount_data *data) | ||
1262 | { | 1228 | { |
1263 | struct nfs_server *server = NFS_SB(sb); | 1229 | struct nfs_server *server = NFS_SB(sb); |
1264 | 1230 | ||
@@ -1379,7 +1345,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
1379 | struct nfs_server *server = NULL; | 1345 | struct nfs_server *server = NULL; |
1380 | struct super_block *s; | 1346 | struct super_block *s; |
1381 | struct nfs_fh mntfh; | 1347 | struct nfs_fh mntfh; |
1382 | struct nfs_mount_data *data = raw_data; | 1348 | struct nfs_parsed_mount_data data; |
1383 | struct dentry *mntroot; | 1349 | struct dentry *mntroot; |
1384 | int (*compare_super)(struct super_block *, void *) = nfs_compare_super; | 1350 | int (*compare_super)(struct super_block *, void *) = nfs_compare_super; |
1385 | struct nfs_sb_mountdata sb_mntdata = { | 1351 | struct nfs_sb_mountdata sb_mntdata = { |
@@ -1388,12 +1354,12 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
1388 | int error; | 1354 | int error; |
1389 | 1355 | ||
1390 | /* Validate the mount data */ | 1356 | /* Validate the mount data */ |
1391 | error = nfs_validate_mount_data(&data, &mntfh, dev_name); | 1357 | error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); |
1392 | if (error < 0) | 1358 | if (error < 0) |
1393 | goto out; | 1359 | goto out; |
1394 | 1360 | ||
1395 | /* Get a volume representation */ | 1361 | /* Get a volume representation */ |
1396 | server = nfs_create_server(data, &mntfh); | 1362 | server = nfs_create_server(&data, &mntfh); |
1397 | if (IS_ERR(server)) { | 1363 | if (IS_ERR(server)) { |
1398 | error = PTR_ERR(server); | 1364 | error = PTR_ERR(server); |
1399 | goto out; | 1365 | goto out; |
@@ -1417,7 +1383,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
1417 | 1383 | ||
1418 | if (!s->s_root) { | 1384 | if (!s->s_root) { |
1419 | /* initial superblock/root creation */ | 1385 | /* initial superblock/root creation */ |
1420 | nfs_fill_super(s, data); | 1386 | nfs_fill_super(s, &data); |
1421 | } | 1387 | } |
1422 | 1388 | ||
1423 | mntroot = nfs_get_root(s, &mntfh); | 1389 | mntroot = nfs_get_root(s, &mntfh); |
@@ -1432,8 +1398,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
1432 | error = 0; | 1398 | error = 0; |
1433 | 1399 | ||
1434 | out: | 1400 | out: |
1435 | if (data != raw_data) | 1401 | kfree(data.nfs_server.hostname); |
1436 | kfree(data); | ||
1437 | return error; | 1402 | return error; |
1438 | 1403 | ||
1439 | out_err_nosb: | 1404 | out_err_nosb: |
@@ -1559,38 +1524,49 @@ static void nfs4_fill_super(struct super_block *sb) | |||
1559 | /* | 1524 | /* |
1560 | * Validate NFSv4 mount options | 1525 | * Validate NFSv4 mount options |
1561 | */ | 1526 | */ |
1562 | static int nfs4_validate_mount_data(struct nfs4_mount_data **options, | 1527 | static int nfs4_validate_mount_data(void *options, |
1563 | const char *dev_name, | 1528 | struct nfs_parsed_mount_data *args, |
1564 | struct sockaddr_in *addr, | 1529 | const char *dev_name) |
1565 | rpc_authflavor_t *authflavour, | ||
1566 | char **hostname, | ||
1567 | char **mntpath, | ||
1568 | char **ip_addr) | ||
1569 | { | 1530 | { |
1570 | struct nfs4_mount_data *data = *options; | 1531 | struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; |
1571 | char *c; | 1532 | char *c; |
1572 | 1533 | ||
1573 | if (data == NULL) | 1534 | if (data == NULL) |
1574 | goto out_no_data; | 1535 | goto out_no_data; |
1575 | 1536 | ||
1537 | memset(args, 0, sizeof(*args)); | ||
1538 | args->rsize = NFS_MAX_FILE_IO_SIZE; | ||
1539 | args->wsize = NFS_MAX_FILE_IO_SIZE; | ||
1540 | args->timeo = 600; | ||
1541 | args->retrans = 2; | ||
1542 | args->acregmin = 3; | ||
1543 | args->acregmax = 60; | ||
1544 | args->acdirmin = 30; | ||
1545 | args->acdirmax = 60; | ||
1546 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; | ||
1547 | |||
1576 | switch (data->version) { | 1548 | switch (data->version) { |
1577 | case 1: | 1549 | case 1: |
1578 | if (data->host_addrlen != sizeof(*addr)) | 1550 | if (data->host_addrlen != sizeof(args->nfs_server.address)) |
1579 | goto out_no_address; | 1551 | goto out_no_address; |
1580 | if (copy_from_user(addr, data->host_addr, sizeof(*addr))) | 1552 | if (copy_from_user(&args->nfs_server.address, |
1553 | data->host_addr, | ||
1554 | sizeof(args->nfs_server.address))) | ||
1581 | return -EFAULT; | 1555 | return -EFAULT; |
1582 | if (addr->sin_port == 0) | 1556 | if (args->nfs_server.address.sin_port == 0) |
1583 | addr->sin_port = htons(NFS_PORT); | 1557 | args->nfs_server.address.sin_port = htons(NFS_PORT); |
1584 | if (!nfs_verify_server_address((struct sockaddr *) addr)) | 1558 | if (!nfs_verify_server_address((struct sockaddr *) |
1559 | &args->nfs_server.address)) | ||
1585 | goto out_no_address; | 1560 | goto out_no_address; |
1586 | 1561 | ||
1587 | switch (data->auth_flavourlen) { | 1562 | switch (data->auth_flavourlen) { |
1588 | case 0: | 1563 | case 0: |
1589 | *authflavour = RPC_AUTH_UNIX; | 1564 | args->auth_flavors[0] = RPC_AUTH_UNIX; |
1590 | break; | 1565 | break; |
1591 | case 1: | 1566 | case 1: |
1592 | if (copy_from_user(authflavour, data->auth_flavours, | 1567 | if (copy_from_user(&args->auth_flavors[0], |
1593 | sizeof(*authflavour))) | 1568 | data->auth_flavours, |
1569 | sizeof(args->auth_flavors[0]))) | ||
1594 | return -EFAULT; | 1570 | return -EFAULT; |
1595 | break; | 1571 | break; |
1596 | default: | 1572 | default: |
@@ -1600,75 +1576,57 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, | |||
1600 | c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); | 1576 | c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); |
1601 | if (IS_ERR(c)) | 1577 | if (IS_ERR(c)) |
1602 | return PTR_ERR(c); | 1578 | return PTR_ERR(c); |
1603 | *hostname = c; | 1579 | args->nfs_server.hostname = c; |
1604 | 1580 | ||
1605 | c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); | 1581 | c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); |
1606 | if (IS_ERR(c)) | 1582 | if (IS_ERR(c)) |
1607 | return PTR_ERR(c); | 1583 | return PTR_ERR(c); |
1608 | *mntpath = c; | 1584 | args->nfs_server.export_path = c; |
1609 | dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath); | 1585 | dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); |
1610 | 1586 | ||
1611 | c = strndup_user(data->client_addr.data, 16); | 1587 | c = strndup_user(data->client_addr.data, 16); |
1612 | if (IS_ERR(c)) | 1588 | if (IS_ERR(c)) |
1613 | return PTR_ERR(c); | 1589 | return PTR_ERR(c); |
1614 | *ip_addr = c; | 1590 | args->client_address = c; |
1591 | |||
1592 | /* | ||
1593 | * Translate to nfs_parsed_mount_data, which nfs4_fill_super | ||
1594 | * can deal with. | ||
1595 | */ | ||
1596 | |||
1597 | args->flags = data->flags & NFS4_MOUNT_FLAGMASK; | ||
1598 | args->rsize = data->rsize; | ||
1599 | args->wsize = data->wsize; | ||
1600 | args->timeo = data->timeo; | ||
1601 | args->retrans = data->retrans; | ||
1602 | args->acregmin = data->acregmin; | ||
1603 | args->acregmax = data->acregmax; | ||
1604 | args->acdirmin = data->acdirmin; | ||
1605 | args->acdirmax = data->acdirmax; | ||
1606 | args->nfs_server.protocol = data->proto; | ||
1615 | 1607 | ||
1616 | break; | 1608 | break; |
1617 | default: { | 1609 | default: { |
1618 | unsigned int len; | 1610 | unsigned int len; |
1619 | struct nfs_parsed_mount_data args = { | 1611 | |
1620 | .rsize = NFS_MAX_FILE_IO_SIZE, | 1612 | if (nfs_parse_mount_options((char *)options, args) == 0) |
1621 | .wsize = NFS_MAX_FILE_IO_SIZE, | ||
1622 | .timeo = 600, | ||
1623 | .retrans = 2, | ||
1624 | .acregmin = 3, | ||
1625 | .acregmax = 60, | ||
1626 | .acdirmin = 30, | ||
1627 | .acdirmax = 60, | ||
1628 | .nfs_server.protocol = IPPROTO_TCP, | ||
1629 | }; | ||
1630 | |||
1631 | if (nfs_parse_mount_options((char *) *options, &args) == 0) | ||
1632 | return -EINVAL; | 1613 | return -EINVAL; |
1633 | 1614 | ||
1634 | if (!nfs_verify_server_address((struct sockaddr *) | 1615 | if (!nfs_verify_server_address((struct sockaddr *) |
1635 | &args.nfs_server.address)) | 1616 | &args->nfs_server.address)) |
1636 | return -EINVAL; | 1617 | return -EINVAL; |
1637 | *addr = args.nfs_server.address; | ||
1638 | 1618 | ||
1639 | switch (args.auth_flavor_len) { | 1619 | switch (args->auth_flavor_len) { |
1640 | case 0: | 1620 | case 0: |
1641 | *authflavour = RPC_AUTH_UNIX; | 1621 | args->auth_flavors[0] = RPC_AUTH_UNIX; |
1642 | break; | 1622 | break; |
1643 | case 1: | 1623 | case 1: |
1644 | *authflavour = (rpc_authflavor_t) args.auth_flavors[0]; | ||
1645 | break; | 1624 | break; |
1646 | default: | 1625 | default: |
1647 | goto out_inval_auth; | 1626 | goto out_inval_auth; |
1648 | } | 1627 | } |
1649 | 1628 | ||
1650 | /* | 1629 | /* |
1651 | * Translate to nfs4_mount_data, which nfs4_fill_super | ||
1652 | * can deal with. | ||
1653 | */ | ||
1654 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
1655 | if (data == NULL) | ||
1656 | return -ENOMEM; | ||
1657 | *options = data; | ||
1658 | |||
1659 | data->version = 1; | ||
1660 | data->flags = args.flags & NFS4_MOUNT_FLAGMASK; | ||
1661 | data->rsize = args.rsize; | ||
1662 | data->wsize = args.wsize; | ||
1663 | data->timeo = args.timeo; | ||
1664 | data->retrans = args.retrans; | ||
1665 | data->acregmin = args.acregmin; | ||
1666 | data->acregmax = args.acregmax; | ||
1667 | data->acdirmin = args.acdirmin; | ||
1668 | data->acdirmax = args.acdirmax; | ||
1669 | data->proto = args.nfs_server.protocol; | ||
1670 | |||
1671 | /* | ||
1672 | * Split "dev_name" into "hostname:mntpath". | 1630 | * Split "dev_name" into "hostname:mntpath". |
1673 | */ | 1631 | */ |
1674 | c = strchr(dev_name, ':'); | 1632 | c = strchr(dev_name, ':'); |
@@ -1678,27 +1636,25 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, | |||
1678 | len = c - dev_name; | 1636 | len = c - dev_name; |
1679 | if (len > NFS4_MAXNAMLEN) | 1637 | if (len > NFS4_MAXNAMLEN) |
1680 | return -ENAMETOOLONG; | 1638 | return -ENAMETOOLONG; |
1681 | *hostname = kzalloc(len, GFP_KERNEL); | 1639 | args->nfs_server.hostname = kzalloc(len, GFP_KERNEL); |
1682 | if (*hostname == NULL) | 1640 | if (args->nfs_server.hostname == NULL) |
1683 | return -ENOMEM; | 1641 | return -ENOMEM; |
1684 | strncpy(*hostname, dev_name, len - 1); | 1642 | strncpy(args->nfs_server.hostname, dev_name, len - 1); |
1685 | 1643 | ||
1686 | c++; /* step over the ':' */ | 1644 | c++; /* step over the ':' */ |
1687 | len = strlen(c); | 1645 | len = strlen(c); |
1688 | if (len > NFS4_MAXPATHLEN) | 1646 | if (len > NFS4_MAXPATHLEN) |
1689 | return -ENAMETOOLONG; | 1647 | return -ENAMETOOLONG; |
1690 | *mntpath = kzalloc(len + 1, GFP_KERNEL); | 1648 | args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL); |
1691 | if (*mntpath == NULL) | 1649 | if (args->nfs_server.export_path == NULL) |
1692 | return -ENOMEM; | 1650 | return -ENOMEM; |
1693 | strncpy(*mntpath, c, len); | 1651 | strncpy(args->nfs_server.export_path, c, len); |
1694 | 1652 | ||
1695 | dprintk("MNTPATH: %s\n", *mntpath); | 1653 | dprintk("MNTPATH: %s\n", args->nfs_server.export_path); |
1696 | 1654 | ||
1697 | if (args.client_address == NULL) | 1655 | if (args->client_address == NULL) |
1698 | goto out_no_client_address; | 1656 | goto out_no_client_address; |
1699 | 1657 | ||
1700 | *ip_addr = args.client_address; | ||
1701 | |||
1702 | break; | 1658 | break; |
1703 | } | 1659 | } |
1704 | } | 1660 | } |
@@ -1729,14 +1685,11 @@ out_no_client_address: | |||
1729 | static int nfs4_get_sb(struct file_system_type *fs_type, | 1685 | static int nfs4_get_sb(struct file_system_type *fs_type, |
1730 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) | 1686 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) |
1731 | { | 1687 | { |
1732 | struct nfs4_mount_data *data = raw_data; | 1688 | struct nfs_parsed_mount_data data; |
1733 | struct super_block *s; | 1689 | struct super_block *s; |
1734 | struct nfs_server *server; | 1690 | struct nfs_server *server; |
1735 | struct sockaddr_in addr; | ||
1736 | rpc_authflavor_t authflavour; | ||
1737 | struct nfs_fh mntfh; | 1691 | struct nfs_fh mntfh; |
1738 | struct dentry *mntroot; | 1692 | struct dentry *mntroot; |
1739 | char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; | ||
1740 | int (*compare_super)(struct super_block *, void *) = nfs_compare_super; | 1693 | int (*compare_super)(struct super_block *, void *) = nfs_compare_super; |
1741 | struct nfs_sb_mountdata sb_mntdata = { | 1694 | struct nfs_sb_mountdata sb_mntdata = { |
1742 | .mntflags = flags, | 1695 | .mntflags = flags, |
@@ -1744,14 +1697,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type, | |||
1744 | int error; | 1697 | int error; |
1745 | 1698 | ||
1746 | /* Validate the mount data */ | 1699 | /* Validate the mount data */ |
1747 | error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour, | 1700 | error = nfs4_validate_mount_data(raw_data, &data, dev_name); |
1748 | &hostname, &mntpath, &ip_addr); | ||
1749 | if (error < 0) | 1701 | if (error < 0) |
1750 | goto out; | 1702 | goto out; |
1751 | 1703 | ||
1752 | /* Get a volume representation */ | 1704 | /* Get a volume representation */ |
1753 | server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, | 1705 | server = nfs4_create_server(&data, &mntfh); |
1754 | authflavour, &mntfh); | ||
1755 | if (IS_ERR(server)) { | 1706 | if (IS_ERR(server)) { |
1756 | error = PTR_ERR(server); | 1707 | error = PTR_ERR(server); |
1757 | goto out; | 1708 | goto out; |
@@ -1790,9 +1741,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type, | |||
1790 | error = 0; | 1741 | error = 0; |
1791 | 1742 | ||
1792 | out: | 1743 | out: |
1793 | kfree(ip_addr); | 1744 | kfree(data.client_address); |
1794 | kfree(mntpath); | 1745 | kfree(data.nfs_server.export_path); |
1795 | kfree(hostname); | 1746 | kfree(data.nfs_server.hostname); |
1796 | return error; | 1747 | return error; |
1797 | 1748 | ||
1798 | out_free: | 1749 | out_free: |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 045ab805c17f..1aed850d18f2 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -66,7 +66,6 @@ static void nfs_async_unlink_init(struct rpc_task *task, void *calldata) | |||
66 | .rpc_cred = data->cred, | 66 | .rpc_cred = data->cred, |
67 | }; | 67 | }; |
68 | 68 | ||
69 | nfs_begin_data_update(dir); | ||
70 | NFS_PROTO(dir)->unlink_setup(&msg, dir); | 69 | NFS_PROTO(dir)->unlink_setup(&msg, dir); |
71 | rpc_call_setup(task, &msg, 0); | 70 | rpc_call_setup(task, &msg, 0); |
72 | } | 71 | } |
@@ -84,8 +83,6 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) | |||
84 | 83 | ||
85 | if (!NFS_PROTO(dir)->unlink_done(task, dir)) | 84 | if (!NFS_PROTO(dir)->unlink_done(task, dir)) |
86 | rpc_restart_call(task); | 85 | rpc_restart_call(task); |
87 | else | ||
88 | nfs_end_data_update(dir); | ||
89 | } | 86 | } |
90 | 87 | ||
91 | /** | 88 | /** |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0d7a77cc394b..e2bb66c34406 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -110,6 +110,13 @@ void nfs_writedata_release(void *wdata) | |||
110 | nfs_writedata_free(wdata); | 110 | nfs_writedata_free(wdata); |
111 | } | 111 | } |
112 | 112 | ||
113 | static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) | ||
114 | { | ||
115 | ctx->error = error; | ||
116 | smp_wmb(); | ||
117 | set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | ||
118 | } | ||
119 | |||
113 | static struct nfs_page *nfs_page_find_request_locked(struct page *page) | 120 | static struct nfs_page *nfs_page_find_request_locked(struct page *page) |
114 | { | 121 | { |
115 | struct nfs_page *req = NULL; | 122 | struct nfs_page *req = NULL; |
@@ -243,10 +250,7 @@ static void nfs_end_page_writeback(struct page *page) | |||
243 | 250 | ||
244 | /* | 251 | /* |
245 | * Find an associated nfs write request, and prepare to flush it out | 252 | * Find an associated nfs write request, and prepare to flush it out |
246 | * Returns 1 if there was no write request, or if the request was | 253 | * May return an error if the user signalled nfs_wait_on_request(). |
247 | * already tagged by nfs_set_page_dirty.Returns 0 if the request | ||
248 | * was not tagged. | ||
249 | * May also return an error if the user signalled nfs_wait_on_request(). | ||
250 | */ | 254 | */ |
251 | static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | 255 | static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, |
252 | struct page *page) | 256 | struct page *page) |
@@ -261,7 +265,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
261 | req = nfs_page_find_request_locked(page); | 265 | req = nfs_page_find_request_locked(page); |
262 | if (req == NULL) { | 266 | if (req == NULL) { |
263 | spin_unlock(&inode->i_lock); | 267 | spin_unlock(&inode->i_lock); |
264 | return 1; | 268 | return 0; |
265 | } | 269 | } |
266 | if (nfs_lock_request_dontget(req)) | 270 | if (nfs_lock_request_dontget(req)) |
267 | break; | 271 | break; |
@@ -282,7 +286,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
282 | spin_unlock(&inode->i_lock); | 286 | spin_unlock(&inode->i_lock); |
283 | nfs_unlock_request(req); | 287 | nfs_unlock_request(req); |
284 | nfs_pageio_complete(pgio); | 288 | nfs_pageio_complete(pgio); |
285 | return 1; | 289 | return 0; |
286 | } | 290 | } |
287 | if (nfs_set_page_writeback(page) != 0) { | 291 | if (nfs_set_page_writeback(page) != 0) { |
288 | spin_unlock(&inode->i_lock); | 292 | spin_unlock(&inode->i_lock); |
@@ -290,70 +294,56 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
290 | } | 294 | } |
291 | radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, | 295 | radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, |
292 | NFS_PAGE_TAG_LOCKED); | 296 | NFS_PAGE_TAG_LOCKED); |
293 | ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); | ||
294 | spin_unlock(&inode->i_lock); | 297 | spin_unlock(&inode->i_lock); |
295 | nfs_pageio_add_request(pgio, req); | 298 | nfs_pageio_add_request(pgio, req); |
296 | return ret; | 299 | return 0; |
297 | } | 300 | } |
298 | 301 | ||
299 | /* | 302 | static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) |
300 | * Write an mmapped page to the server. | ||
301 | */ | ||
302 | static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) | ||
303 | { | 303 | { |
304 | struct nfs_pageio_descriptor mypgio, *pgio; | ||
305 | struct nfs_open_context *ctx; | ||
306 | struct inode *inode = page->mapping->host; | 304 | struct inode *inode = page->mapping->host; |
307 | unsigned offset; | ||
308 | int err; | ||
309 | 305 | ||
310 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); | 306 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); |
311 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); | 307 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); |
312 | 308 | ||
313 | if (wbc->for_writepages) | ||
314 | pgio = wbc->fs_private; | ||
315 | else { | ||
316 | nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc)); | ||
317 | pgio = &mypgio; | ||
318 | } | ||
319 | |||
320 | nfs_pageio_cond_complete(pgio, page->index); | 309 | nfs_pageio_cond_complete(pgio, page->index); |
310 | return nfs_page_async_flush(pgio, page); | ||
311 | } | ||
321 | 312 | ||
322 | err = nfs_page_async_flush(pgio, page); | 313 | /* |
323 | if (err <= 0) | 314 | * Write an mmapped page to the server. |
324 | goto out; | 315 | */ |
325 | err = 0; | 316 | static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) |
326 | offset = nfs_page_length(page); | 317 | { |
327 | if (!offset) | 318 | struct nfs_pageio_descriptor pgio; |
328 | goto out; | 319 | int err; |
329 | |||
330 | nfs_pageio_cond_complete(pgio, page->index); | ||
331 | 320 | ||
332 | ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); | 321 | nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); |
333 | if (ctx == NULL) { | 322 | err = nfs_do_writepage(page, wbc, &pgio); |
334 | err = -EBADF; | 323 | nfs_pageio_complete(&pgio); |
335 | goto out; | 324 | if (err < 0) |
336 | } | 325 | return err; |
337 | err = nfs_writepage_setup(ctx, page, 0, offset); | 326 | if (pgio.pg_error < 0) |
338 | put_nfs_open_context(ctx); | 327 | return pgio.pg_error; |
339 | if (err != 0) | 328 | return 0; |
340 | goto out; | ||
341 | err = nfs_page_async_flush(pgio, page); | ||
342 | if (err > 0) | ||
343 | err = 0; | ||
344 | out: | ||
345 | if (!wbc->for_writepages) | ||
346 | nfs_pageio_complete(pgio); | ||
347 | return err; | ||
348 | } | 329 | } |
349 | 330 | ||
350 | int nfs_writepage(struct page *page, struct writeback_control *wbc) | 331 | int nfs_writepage(struct page *page, struct writeback_control *wbc) |
351 | { | 332 | { |
352 | int err; | 333 | int ret; |
334 | |||
335 | ret = nfs_writepage_locked(page, wbc); | ||
336 | unlock_page(page); | ||
337 | return ret; | ||
338 | } | ||
339 | |||
340 | static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) | ||
341 | { | ||
342 | int ret; | ||
353 | 343 | ||
354 | err = nfs_writepage_locked(page, wbc); | 344 | ret = nfs_do_writepage(page, wbc, data); |
355 | unlock_page(page); | 345 | unlock_page(page); |
356 | return err; | 346 | return ret; |
357 | } | 347 | } |
358 | 348 | ||
359 | int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | 349 | int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) |
@@ -365,12 +355,11 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
365 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); | 355 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); |
366 | 356 | ||
367 | nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); | 357 | nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); |
368 | wbc->fs_private = &pgio; | 358 | err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); |
369 | err = generic_writepages(mapping, wbc); | ||
370 | nfs_pageio_complete(&pgio); | 359 | nfs_pageio_complete(&pgio); |
371 | if (err) | 360 | if (err < 0) |
372 | return err; | 361 | return err; |
373 | if (pgio.pg_error) | 362 | if (pgio.pg_error < 0) |
374 | return pgio.pg_error; | 363 | return pgio.pg_error; |
375 | return 0; | 364 | return 0; |
376 | } | 365 | } |
@@ -389,14 +378,11 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
389 | return error; | 378 | return error; |
390 | if (!nfsi->npages) { | 379 | if (!nfsi->npages) { |
391 | igrab(inode); | 380 | igrab(inode); |
392 | nfs_begin_data_update(inode); | ||
393 | if (nfs_have_delegation(inode, FMODE_WRITE)) | 381 | if (nfs_have_delegation(inode, FMODE_WRITE)) |
394 | nfsi->change_attr++; | 382 | nfsi->change_attr++; |
395 | } | 383 | } |
396 | SetPagePrivate(req->wb_page); | 384 | SetPagePrivate(req->wb_page); |
397 | set_page_private(req->wb_page, (unsigned long)req); | 385 | set_page_private(req->wb_page, (unsigned long)req); |
398 | if (PageDirty(req->wb_page)) | ||
399 | set_bit(PG_NEED_FLUSH, &req->wb_flags); | ||
400 | nfsi->npages++; | 386 | nfsi->npages++; |
401 | kref_get(&req->wb_kref); | 387 | kref_get(&req->wb_kref); |
402 | return 0; | 388 | return 0; |
@@ -416,12 +402,9 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
416 | set_page_private(req->wb_page, 0); | 402 | set_page_private(req->wb_page, 0); |
417 | ClearPagePrivate(req->wb_page); | 403 | ClearPagePrivate(req->wb_page); |
418 | radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); | 404 | radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); |
419 | if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags)) | ||
420 | __set_page_dirty_nobuffers(req->wb_page); | ||
421 | nfsi->npages--; | 405 | nfsi->npages--; |
422 | if (!nfsi->npages) { | 406 | if (!nfsi->npages) { |
423 | spin_unlock(&inode->i_lock); | 407 | spin_unlock(&inode->i_lock); |
424 | nfs_end_data_update(inode); | ||
425 | iput(inode); | 408 | iput(inode); |
426 | } else | 409 | } else |
427 | spin_unlock(&inode->i_lock); | 410 | spin_unlock(&inode->i_lock); |
@@ -682,7 +665,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, | |||
682 | 665 | ||
683 | int nfs_flush_incompatible(struct file *file, struct page *page) | 666 | int nfs_flush_incompatible(struct file *file, struct page *page) |
684 | { | 667 | { |
685 | struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; | 668 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
686 | struct nfs_page *req; | 669 | struct nfs_page *req; |
687 | int do_flush, status; | 670 | int do_flush, status; |
688 | /* | 671 | /* |
@@ -716,7 +699,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
716 | int nfs_updatepage(struct file *file, struct page *page, | 699 | int nfs_updatepage(struct file *file, struct page *page, |
717 | unsigned int offset, unsigned int count) | 700 | unsigned int offset, unsigned int count) |
718 | { | 701 | { |
719 | struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; | 702 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
720 | struct inode *inode = page->mapping->host; | 703 | struct inode *inode = page->mapping->host; |
721 | int status = 0; | 704 | int status = 0; |
722 | 705 | ||
@@ -967,7 +950,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) | |||
967 | 950 | ||
968 | if (task->tk_status < 0) { | 951 | if (task->tk_status < 0) { |
969 | nfs_set_pageerror(page); | 952 | nfs_set_pageerror(page); |
970 | req->wb_context->error = task->tk_status; | 953 | nfs_context_set_write_error(req->wb_context, task->tk_status); |
971 | dprintk(", error = %d\n", task->tk_status); | 954 | dprintk(", error = %d\n", task->tk_status); |
972 | goto out; | 955 | goto out; |
973 | } | 956 | } |
@@ -1030,7 +1013,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) | |||
1030 | 1013 | ||
1031 | if (task->tk_status < 0) { | 1014 | if (task->tk_status < 0) { |
1032 | nfs_set_pageerror(page); | 1015 | nfs_set_pageerror(page); |
1033 | req->wb_context->error = task->tk_status; | 1016 | nfs_context_set_write_error(req->wb_context, task->tk_status); |
1034 | dprintk(", error = %d\n", task->tk_status); | 1017 | dprintk(", error = %d\n", task->tk_status); |
1035 | goto remove_request; | 1018 | goto remove_request; |
1036 | } | 1019 | } |
@@ -1244,7 +1227,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) | |||
1244 | req->wb_bytes, | 1227 | req->wb_bytes, |
1245 | (long long)req_offset(req)); | 1228 | (long long)req_offset(req)); |
1246 | if (task->tk_status < 0) { | 1229 | if (task->tk_status < 0) { |
1247 | req->wb_context->error = task->tk_status; | 1230 | nfs_context_set_write_error(req->wb_context, task->tk_status); |
1248 | nfs_inode_remove_request(req); | 1231 | nfs_inode_remove_request(req); |
1249 | dprintk(", error = %d\n", task->tk_status); | 1232 | dprintk(", error = %d\n", task->tk_status); |
1250 | goto next; | 1233 | goto next; |
@@ -1347,53 +1330,52 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr | |||
1347 | return ret; | 1330 | return ret; |
1348 | } | 1331 | } |
1349 | 1332 | ||
1350 | /* | 1333 | static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) |
1351 | * flush the inode to disk. | ||
1352 | */ | ||
1353 | int nfs_wb_all(struct inode *inode) | ||
1354 | { | 1334 | { |
1355 | struct address_space *mapping = inode->i_mapping; | ||
1356 | struct writeback_control wbc = { | ||
1357 | .bdi = mapping->backing_dev_info, | ||
1358 | .sync_mode = WB_SYNC_ALL, | ||
1359 | .nr_to_write = LONG_MAX, | ||
1360 | .for_writepages = 1, | ||
1361 | .range_cyclic = 1, | ||
1362 | }; | ||
1363 | int ret; | 1335 | int ret; |
1364 | 1336 | ||
1365 | ret = nfs_writepages(mapping, &wbc); | 1337 | ret = nfs_writepages(mapping, wbc); |
1366 | if (ret < 0) | 1338 | if (ret < 0) |
1367 | goto out; | 1339 | goto out; |
1368 | ret = nfs_sync_mapping_wait(mapping, &wbc, 0); | 1340 | ret = nfs_sync_mapping_wait(mapping, wbc, how); |
1369 | if (ret >= 0) | 1341 | if (ret < 0) |
1370 | return 0; | 1342 | goto out; |
1343 | return 0; | ||
1371 | out: | 1344 | out: |
1372 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 1345 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
1373 | return ret; | 1346 | return ret; |
1374 | } | 1347 | } |
1375 | 1348 | ||
1376 | int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how) | 1349 | /* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ |
1350 | static int nfs_write_mapping(struct address_space *mapping, int how) | ||
1377 | { | 1351 | { |
1378 | struct writeback_control wbc = { | 1352 | struct writeback_control wbc = { |
1379 | .bdi = mapping->backing_dev_info, | 1353 | .bdi = mapping->backing_dev_info, |
1380 | .sync_mode = WB_SYNC_ALL, | 1354 | .sync_mode = WB_SYNC_NONE, |
1381 | .nr_to_write = LONG_MAX, | 1355 | .nr_to_write = LONG_MAX, |
1382 | .range_start = range_start, | ||
1383 | .range_end = range_end, | ||
1384 | .for_writepages = 1, | 1356 | .for_writepages = 1, |
1357 | .range_cyclic = 1, | ||
1385 | }; | 1358 | }; |
1386 | int ret; | 1359 | int ret; |
1387 | 1360 | ||
1388 | ret = nfs_writepages(mapping, &wbc); | 1361 | ret = __nfs_write_mapping(mapping, &wbc, how); |
1389 | if (ret < 0) | 1362 | if (ret < 0) |
1390 | goto out; | 1363 | return ret; |
1391 | ret = nfs_sync_mapping_wait(mapping, &wbc, how); | 1364 | wbc.sync_mode = WB_SYNC_ALL; |
1392 | if (ret >= 0) | 1365 | return __nfs_write_mapping(mapping, &wbc, how); |
1393 | return 0; | 1366 | } |
1394 | out: | 1367 | |
1395 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 1368 | /* |
1396 | return ret; | 1369 | * flush the inode to disk. |
1370 | */ | ||
1371 | int nfs_wb_all(struct inode *inode) | ||
1372 | { | ||
1373 | return nfs_write_mapping(inode->i_mapping, 0); | ||
1374 | } | ||
1375 | |||
1376 | int nfs_wb_nocommit(struct inode *inode) | ||
1377 | { | ||
1378 | return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); | ||
1397 | } | 1379 | } |
1398 | 1380 | ||
1399 | int nfs_wb_page_cancel(struct inode *inode, struct page *page) | 1381 | int nfs_wb_page_cancel(struct inode *inode, struct page *page) |
@@ -1477,35 +1459,6 @@ int nfs_wb_page(struct inode *inode, struct page* page) | |||
1477 | return nfs_wb_page_priority(inode, page, FLUSH_STABLE); | 1459 | return nfs_wb_page_priority(inode, page, FLUSH_STABLE); |
1478 | } | 1460 | } |
1479 | 1461 | ||
1480 | int nfs_set_page_dirty(struct page *page) | ||
1481 | { | ||
1482 | struct address_space *mapping = page->mapping; | ||
1483 | struct inode *inode; | ||
1484 | struct nfs_page *req; | ||
1485 | int ret; | ||
1486 | |||
1487 | if (!mapping) | ||
1488 | goto out_raced; | ||
1489 | inode = mapping->host; | ||
1490 | if (!inode) | ||
1491 | goto out_raced; | ||
1492 | spin_lock(&inode->i_lock); | ||
1493 | req = nfs_page_find_request_locked(page); | ||
1494 | if (req != NULL) { | ||
1495 | /* Mark any existing write requests for flushing */ | ||
1496 | ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); | ||
1497 | spin_unlock(&inode->i_lock); | ||
1498 | nfs_release_request(req); | ||
1499 | return ret; | ||
1500 | } | ||
1501 | ret = __set_page_dirty_nobuffers(page); | ||
1502 | spin_unlock(&inode->i_lock); | ||
1503 | return ret; | ||
1504 | out_raced: | ||
1505 | return !TestSetPageDirty(page); | ||
1506 | } | ||
1507 | |||
1508 | |||
1509 | int __init nfs_init_writepagecache(void) | 1462 | int __init nfs_init_writepagecache(void) |
1510 | { | 1463 | { |
1511 | nfs_wdata_cachep = kmem_cache_create("nfs_write_data", | 1464 | nfs_wdata_cachep = kmem_cache_create("nfs_write_data", |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e15f2cf8ac15..57333944af7f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -102,7 +102,8 @@ check_filename(char *str, int len, __be32 err) | |||
102 | out: \ | 102 | out: \ |
103 | return status; \ | 103 | return status; \ |
104 | xdr_error: \ | 104 | xdr_error: \ |
105 | printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ | 105 | dprintk("NFSD: xdr error (%s:%d)\n", \ |
106 | __FILE__, __LINE__); \ | ||
106 | status = nfserr_bad_xdr; \ | 107 | status = nfserr_bad_xdr; \ |
107 | goto out | 108 | goto out |
108 | 109 | ||
@@ -124,7 +125,8 @@ xdr_error: \ | |||
124 | if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ | 125 | if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ |
125 | savemem(argp, p, nbytes) : \ | 126 | savemem(argp, p, nbytes) : \ |
126 | (char *)p)) { \ | 127 | (char *)p)) { \ |
127 | printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ | 128 | dprintk("NFSD: xdr error (%s:%d)\n", \ |
129 | __FILE__, __LINE__); \ | ||
128 | goto xdr_error; \ | 130 | goto xdr_error; \ |
129 | } \ | 131 | } \ |
130 | p += XDR_QUADLEN(nbytes); \ | 132 | p += XDR_QUADLEN(nbytes); \ |
@@ -140,7 +142,8 @@ xdr_error: \ | |||
140 | p = argp->p; \ | 142 | p = argp->p; \ |
141 | argp->p += XDR_QUADLEN(nbytes); \ | 143 | argp->p += XDR_QUADLEN(nbytes); \ |
142 | } else if (!(p = read_buf(argp, nbytes))) { \ | 144 | } else if (!(p = read_buf(argp, nbytes))) { \ |
143 | printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ | 145 | dprintk("NFSD: xdr error (%s:%d)\n", \ |
146 | __FILE__, __LINE__); \ | ||
144 | goto xdr_error; \ | 147 | goto xdr_error; \ |
145 | } \ | 148 | } \ |
146 | } while (0) | 149 | } while (0) |
@@ -948,7 +951,8 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) | |||
948 | */ | 951 | */ |
949 | avail = (char*)argp->end - (char*)argp->p; | 952 | avail = (char*)argp->end - (char*)argp->p; |
950 | if (avail + argp->pagelen < write->wr_buflen) { | 953 | if (avail + argp->pagelen < write->wr_buflen) { |
951 | printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); | 954 | dprintk("NFSD: xdr error (%s:%d)\n", |
955 | __FILE__, __LINE__); | ||
952 | goto xdr_error; | 956 | goto xdr_error; |
953 | } | 957 | } |
954 | argp->rqstp->rq_vec[0].iov_base = p; | 958 | argp->rqstp->rq_vec[0].iov_base = p; |
@@ -1019,7 +1023,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
1019 | argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); | 1023 | argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); |
1020 | if (!argp->ops) { | 1024 | if (!argp->ops) { |
1021 | argp->ops = argp->iops; | 1025 | argp->ops = argp->iops; |
1022 | printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n"); | 1026 | dprintk("nfsd: couldn't allocate room for COMPOUND\n"); |
1023 | goto xdr_error; | 1027 | goto xdr_error; |
1024 | } | 1028 | } |
1025 | } | 1029 | } |
@@ -1326,7 +1330,7 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 * | |||
1326 | path = exp->ex_path; | 1330 | path = exp->ex_path; |
1327 | 1331 | ||
1328 | if (strncmp(path, rootpath, strlen(rootpath))) { | 1332 | if (strncmp(path, rootpath, strlen(rootpath))) { |
1329 | printk("nfsd: fs_locations failed;" | 1333 | dprintk("nfsd: fs_locations failed;" |
1330 | "%s is not contained in %s\n", path, rootpath); | 1334 | "%s is not contained in %s\n", path, rootpath); |
1331 | *stat = nfserr_notsupp; | 1335 | *stat = nfserr_notsupp; |
1332 | return NULL; | 1336 | return NULL; |
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index d7a5e034c3a2..e757a74b9d17 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h | |||
@@ -109,6 +109,10 @@ static inline u64 get_jiffies_64(void) | |||
109 | ((long)(a) - (long)(b) >= 0)) | 109 | ((long)(a) - (long)(b) >= 0)) |
110 | #define time_before_eq(a,b) time_after_eq(b,a) | 110 | #define time_before_eq(a,b) time_after_eq(b,a) |
111 | 111 | ||
112 | #define time_in_range(a,b,c) \ | ||
113 | (time_after_eq(a,b) && \ | ||
114 | time_before_eq(a,c)) | ||
115 | |||
112 | /* Same as above, but does so with platform independent 64bit types. | 116 | /* Same as above, but does so with platform independent 64bit types. |
113 | * These must be used when utilizing jiffies_64 (i.e. return value of | 117 | * These must be used when utilizing jiffies_64 (i.e. return value of |
114 | * get_jiffies_64() */ | 118 | * get_jiffies_64() */ |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7250eeadd7b5..c5164c257f71 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -47,10 +47,8 @@ | |||
47 | #include <linux/nfs3.h> | 47 | #include <linux/nfs3.h> |
48 | #include <linux/nfs4.h> | 48 | #include <linux/nfs4.h> |
49 | #include <linux/nfs_xdr.h> | 49 | #include <linux/nfs_xdr.h> |
50 | |||
51 | #include <linux/nfs_fs_sb.h> | 50 | #include <linux/nfs_fs_sb.h> |
52 | 51 | ||
53 | #include <linux/rwsem.h> | ||
54 | #include <linux/mempool.h> | 52 | #include <linux/mempool.h> |
55 | 53 | ||
56 | /* | 54 | /* |
@@ -77,6 +75,9 @@ struct nfs_open_context { | |||
77 | struct nfs4_state *state; | 75 | struct nfs4_state *state; |
78 | fl_owner_t lockowner; | 76 | fl_owner_t lockowner; |
79 | int mode; | 77 | int mode; |
78 | |||
79 | unsigned long flags; | ||
80 | #define NFS_CONTEXT_ERROR_WRITE (0) | ||
80 | int error; | 81 | int error; |
81 | 82 | ||
82 | struct list_head list; | 83 | struct list_head list; |
@@ -133,11 +134,6 @@ struct nfs_inode { | |||
133 | * server. | 134 | * server. |
134 | */ | 135 | */ |
135 | unsigned long cache_change_attribute; | 136 | unsigned long cache_change_attribute; |
136 | /* | ||
137 | * Counter indicating the number of outstanding requests that | ||
138 | * will cause a file data update. | ||
139 | */ | ||
140 | atomic_t data_updates; | ||
141 | 137 | ||
142 | struct rb_root access_cache; | 138 | struct rb_root access_cache; |
143 | struct list_head access_cache_entry_lru; | 139 | struct list_head access_cache_entry_lru; |
@@ -205,27 +201,18 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) | |||
205 | #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) | 201 | #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) |
206 | #define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) | 202 | #define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) |
207 | #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) | 203 | #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) |
208 | #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) | ||
209 | #define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) | ||
210 | #define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo) | ||
211 | #define NFS_MINATTRTIMEO(inode) \ | 204 | #define NFS_MINATTRTIMEO(inode) \ |
212 | (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \ | 205 | (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \ |
213 | : NFS_SERVER(inode)->acregmin) | 206 | : NFS_SERVER(inode)->acregmin) |
214 | #define NFS_MAXATTRTIMEO(inode) \ | 207 | #define NFS_MAXATTRTIMEO(inode) \ |
215 | (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \ | 208 | (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \ |
216 | : NFS_SERVER(inode)->acregmax) | 209 | : NFS_SERVER(inode)->acregmax) |
217 | #define NFS_ATTRTIMEO_UPDATE(inode) (NFS_I(inode)->attrtimeo_timestamp) | ||
218 | 210 | ||
219 | #define NFS_FLAGS(inode) (NFS_I(inode)->flags) | 211 | #define NFS_FLAGS(inode) (NFS_I(inode)->flags) |
220 | #define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode))) | 212 | #define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode))) |
221 | 213 | ||
222 | #define NFS_FILEID(inode) (NFS_I(inode)->fileid) | 214 | #define NFS_FILEID(inode) (NFS_I(inode)->fileid) |
223 | 215 | ||
224 | static inline int nfs_caches_unstable(struct inode *inode) | ||
225 | { | ||
226 | return atomic_read(&NFS_I(inode)->data_updates) != 0; | ||
227 | } | ||
228 | |||
229 | static inline void nfs_mark_for_revalidate(struct inode *inode) | 216 | static inline void nfs_mark_for_revalidate(struct inode *inode) |
230 | { | 217 | { |
231 | struct nfs_inode *nfsi = NFS_I(inode); | 218 | struct nfs_inode *nfsi = NFS_I(inode); |
@@ -237,12 +224,6 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) | |||
237 | spin_unlock(&inode->i_lock); | 224 | spin_unlock(&inode->i_lock); |
238 | } | 225 | } |
239 | 226 | ||
240 | static inline void NFS_CACHEINV(struct inode *inode) | ||
241 | { | ||
242 | if (!nfs_caches_unstable(inode)) | ||
243 | nfs_mark_for_revalidate(inode); | ||
244 | } | ||
245 | |||
246 | static inline int nfs_server_capable(struct inode *inode, int cap) | 227 | static inline int nfs_server_capable(struct inode *inode, int cap) |
247 | { | 228 | { |
248 | return NFS_SERVER(inode)->caps & cap; | 229 | return NFS_SERVER(inode)->caps & cap; |
@@ -253,28 +234,33 @@ static inline int NFS_USE_READDIRPLUS(struct inode *inode) | |||
253 | return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); | 234 | return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); |
254 | } | 235 | } |
255 | 236 | ||
237 | static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) | ||
238 | { | ||
239 | dentry->d_time = verf; | ||
240 | } | ||
241 | |||
256 | /** | 242 | /** |
257 | * nfs_save_change_attribute - Returns the inode attribute change cookie | 243 | * nfs_save_change_attribute - Returns the inode attribute change cookie |
258 | * @inode - pointer to inode | 244 | * @dir - pointer to parent directory inode |
259 | * The "change attribute" is updated every time we finish an operation | 245 | * The "change attribute" is updated every time we finish an operation |
260 | * that will result in a metadata change on the server. | 246 | * that will result in a metadata change on the server. |
261 | */ | 247 | */ |
262 | static inline long nfs_save_change_attribute(struct inode *inode) | 248 | static inline unsigned long nfs_save_change_attribute(struct inode *dir) |
263 | { | 249 | { |
264 | return NFS_I(inode)->cache_change_attribute; | 250 | return NFS_I(dir)->cache_change_attribute; |
265 | } | 251 | } |
266 | 252 | ||
267 | /** | 253 | /** |
268 | * nfs_verify_change_attribute - Detects NFS inode cache updates | 254 | * nfs_verify_change_attribute - Detects NFS remote directory changes |
269 | * @inode - pointer to inode | 255 | * @dir - pointer to parent directory inode |
270 | * @chattr - previously saved change attribute | 256 | * @chattr - previously saved change attribute |
271 | * Return "false" if metadata has been updated (or is in the process of | 257 | * Return "false" if the verifiers doesn't match the change attribute. |
272 | * being updated) since the change attribute was saved. | 258 | * This would usually indicate that the directory contents have changed on |
259 | * the server, and that any dentries need revalidating. | ||
273 | */ | 260 | */ |
274 | static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) | 261 | static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr) |
275 | { | 262 | { |
276 | return !nfs_caches_unstable(inode) | 263 | return chattr == NFS_I(dir)->cache_change_attribute; |
277 | && time_after_eq(chattr, NFS_I(inode)->cache_change_attribute); | ||
278 | } | 264 | } |
279 | 265 | ||
280 | /* | 266 | /* |
@@ -283,15 +269,14 @@ static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long | |||
283 | extern int nfs_sync_mapping(struct address_space *mapping); | 269 | extern int nfs_sync_mapping(struct address_space *mapping); |
284 | extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping); | 270 | extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping); |
285 | extern void nfs_zap_caches(struct inode *); | 271 | extern void nfs_zap_caches(struct inode *); |
272 | extern void nfs_invalidate_atime(struct inode *); | ||
286 | extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, | 273 | extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, |
287 | struct nfs_fattr *); | 274 | struct nfs_fattr *); |
288 | extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); | 275 | extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); |
289 | extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); | 276 | extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); |
277 | extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); | ||
290 | extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 278 | extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
291 | extern int nfs_permission(struct inode *, int, struct nameidata *); | 279 | extern int nfs_permission(struct inode *, int, struct nameidata *); |
292 | extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); | ||
293 | extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); | ||
294 | extern void nfs_access_zap_cache(struct inode *inode); | ||
295 | extern int nfs_open(struct inode *, struct file *); | 280 | extern int nfs_open(struct inode *, struct file *); |
296 | extern int nfs_release(struct inode *, struct file *); | 281 | extern int nfs_release(struct inode *, struct file *); |
297 | extern int nfs_attribute_timeout(struct inode *inode); | 282 | extern int nfs_attribute_timeout(struct inode *inode); |
@@ -301,13 +286,10 @@ extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *map | |||
301 | extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping); | 286 | extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping); |
302 | extern int nfs_setattr(struct dentry *, struct iattr *); | 287 | extern int nfs_setattr(struct dentry *, struct iattr *); |
303 | extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); | 288 | extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); |
304 | extern void nfs_begin_attr_update(struct inode *); | ||
305 | extern void nfs_end_attr_update(struct inode *); | ||
306 | extern void nfs_begin_data_update(struct inode *); | ||
307 | extern void nfs_end_data_update(struct inode *); | ||
308 | extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); | 289 | extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); |
309 | extern void put_nfs_open_context(struct nfs_open_context *ctx); | 290 | extern void put_nfs_open_context(struct nfs_open_context *ctx); |
310 | extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); | 291 | extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); |
292 | extern u64 nfs_compat_user_ino64(u64 fileid); | ||
311 | 293 | ||
312 | /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ | 294 | /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ |
313 | extern __be32 root_nfs_parse_addr(char *name); /*__init*/ | 295 | extern __be32 root_nfs_parse_addr(char *name); /*__init*/ |
@@ -328,14 +310,15 @@ extern const struct inode_operations nfs3_file_inode_operations; | |||
328 | extern const struct file_operations nfs_file_operations; | 310 | extern const struct file_operations nfs_file_operations; |
329 | extern const struct address_space_operations nfs_file_aops; | 311 | extern const struct address_space_operations nfs_file_aops; |
330 | 312 | ||
331 | static inline struct rpc_cred *nfs_file_cred(struct file *file) | 313 | static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) |
332 | { | 314 | { |
333 | if (file != NULL) { | 315 | return filp->private_data; |
334 | struct nfs_open_context *ctx; | 316 | } |
335 | 317 | ||
336 | ctx = (struct nfs_open_context*)file->private_data; | 318 | static inline struct rpc_cred *nfs_file_cred(struct file *file) |
337 | return ctx->cred; | 319 | { |
338 | } | 320 | if (file != NULL) |
321 | return nfs_file_open_context(file)->cred; | ||
339 | return NULL; | 322 | return NULL; |
340 | } | 323 | } |
341 | 324 | ||
@@ -378,6 +361,8 @@ extern const struct file_operations nfs_dir_operations; | |||
378 | extern struct dentry_operations nfs_dentry_operations; | 361 | extern struct dentry_operations nfs_dentry_operations; |
379 | 362 | ||
380 | extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); | 363 | extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); |
364 | extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags); | ||
365 | extern void nfs_access_zap_cache(struct inode *inode); | ||
381 | 366 | ||
382 | /* | 367 | /* |
383 | * linux/fs/nfs/symlink.c | 368 | * linux/fs/nfs/symlink.c |
@@ -420,15 +405,14 @@ extern int nfs_flush_incompatible(struct file *file, struct page *page); | |||
420 | extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); | 405 | extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); |
421 | extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); | 406 | extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); |
422 | extern void nfs_writedata_release(void *); | 407 | extern void nfs_writedata_release(void *); |
423 | extern int nfs_set_page_dirty(struct page *); | ||
424 | 408 | ||
425 | /* | 409 | /* |
426 | * Try to write back everything synchronously (but check the | 410 | * Try to write back everything synchronously (but check the |
427 | * return value!) | 411 | * return value!) |
428 | */ | 412 | */ |
429 | extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); | 413 | extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); |
430 | extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int); | ||
431 | extern int nfs_wb_all(struct inode *inode); | 414 | extern int nfs_wb_all(struct inode *inode); |
415 | extern int nfs_wb_nocommit(struct inode *inode); | ||
432 | extern int nfs_wb_page(struct inode *inode, struct page* page); | 416 | extern int nfs_wb_page(struct inode *inode, struct page* page); |
433 | extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); | 417 | extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); |
434 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); | 418 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); |
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 78e60798d10e..30dbcc185e69 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h | |||
@@ -30,7 +30,6 @@ | |||
30 | #define PG_BUSY 0 | 30 | #define PG_BUSY 0 |
31 | #define PG_NEED_COMMIT 1 | 31 | #define PG_NEED_COMMIT 1 |
32 | #define PG_NEED_RESCHED 2 | 32 | #define PG_NEED_RESCHED 2 |
33 | #define PG_NEED_FLUSH 3 | ||
34 | 33 | ||
35 | struct nfs_inode; | 34 | struct nfs_inode; |
36 | struct nfs_page { | 35 | struct nfs_page { |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cf74a4db84a5..daab252f2e5c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -62,7 +62,8 @@ struct nfs_fattr { | |||
62 | #define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ | 62 | #define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ |
63 | #define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ | 63 | #define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ |
64 | #define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */ | 64 | #define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */ |
65 | #define NFS_ATTR_FATTR_V4_REFERRAL 0x0010 /* NFSv4 referral */ | 65 | #define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */ |
66 | #define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */ | ||
66 | 67 | ||
67 | /* | 68 | /* |
68 | * Info on the file system | 69 | * Info on the file system |
@@ -538,10 +539,13 @@ typedef u64 clientid4; | |||
538 | 539 | ||
539 | struct nfs4_accessargs { | 540 | struct nfs4_accessargs { |
540 | const struct nfs_fh * fh; | 541 | const struct nfs_fh * fh; |
542 | const u32 * bitmask; | ||
541 | u32 access; | 543 | u32 access; |
542 | }; | 544 | }; |
543 | 545 | ||
544 | struct nfs4_accessres { | 546 | struct nfs4_accessres { |
547 | const struct nfs_server * server; | ||
548 | struct nfs_fattr * fattr; | ||
545 | u32 supported; | 549 | u32 supported; |
546 | u32 access; | 550 | u32 access; |
547 | }; | 551 | }; |
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c0d9d14983b3..d9d5c5ad826c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h | |||
@@ -117,7 +117,7 @@ struct rpc_create_args { | |||
117 | 117 | ||
118 | struct rpc_clnt *rpc_create(struct rpc_create_args *args); | 118 | struct rpc_clnt *rpc_create(struct rpc_create_args *args); |
119 | struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, | 119 | struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, |
120 | struct rpc_program *, int); | 120 | struct rpc_program *, u32); |
121 | struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); | 121 | struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); |
122 | void rpc_shutdown_client(struct rpc_clnt *); | 122 | void rpc_shutdown_client(struct rpc_clnt *); |
123 | void rpc_release_client(struct rpc_clnt *); | 123 | void rpc_release_client(struct rpc_clnt *); |
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 3912cf16361e..3347c72b848a 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h | |||
@@ -88,6 +88,11 @@ enum { | |||
88 | CTL_SLOTTABLE_TCP, | 88 | CTL_SLOTTABLE_TCP, |
89 | CTL_MIN_RESVPORT, | 89 | CTL_MIN_RESVPORT, |
90 | CTL_MAX_RESVPORT, | 90 | CTL_MAX_RESVPORT, |
91 | CTL_SLOTTABLE_RDMA, | ||
92 | CTL_RDMA_MAXINLINEREAD, | ||
93 | CTL_RDMA_MAXINLINEWRITE, | ||
94 | CTL_RDMA_WRITEPADDING, | ||
95 | CTL_RDMA_MEMREG, | ||
91 | }; | 96 | }; |
92 | 97 | ||
93 | #endif /* _LINUX_SUNRPC_DEBUG_H_ */ | 98 | #endif /* _LINUX_SUNRPC_DEBUG_H_ */ |
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 784d4c3ef651..c4beb5775111 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h | |||
@@ -138,6 +138,19 @@ typedef __be32 rpc_fraghdr; | |||
138 | #define RPC_MAX_HEADER_WITH_AUTH \ | 138 | #define RPC_MAX_HEADER_WITH_AUTH \ |
139 | (RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4)) | 139 | (RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4)) |
140 | 140 | ||
141 | /* | ||
142 | * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. | ||
143 | */ | ||
144 | #define RPCBIND_NETID_UDP "udp" | ||
145 | #define RPCBIND_NETID_TCP "tcp" | ||
146 | #define RPCBIND_NETID_UDP6 "udp6" | ||
147 | #define RPCBIND_NETID_TCP6 "tcp6" | ||
148 | |||
149 | /* | ||
150 | * Note that RFC 1833 does not put any size restrictions on the | ||
151 | * netid string, but all currently defined netid's fit in 4 bytes. | ||
152 | */ | ||
153 | #define RPCBIND_MAXNETIDLEN (4u) | ||
141 | 154 | ||
142 | #endif /* __KERNEL__ */ | 155 | #endif /* __KERNEL__ */ |
143 | #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ | 156 | #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ |
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h new file mode 100644 index 000000000000..0013a0d8dc6b --- /dev/null +++ b/include/linux/sunrpc/rpc_rdma.h | |||
@@ -0,0 +1,116 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | #ifndef _LINUX_SUNRPC_RPC_RDMA_H | ||
41 | #define _LINUX_SUNRPC_RPC_RDMA_H | ||
42 | |||
43 | struct rpcrdma_segment { | ||
44 | uint32_t rs_handle; /* Registered memory handle */ | ||
45 | uint32_t rs_length; /* Length of the chunk in bytes */ | ||
46 | uint64_t rs_offset; /* Chunk virtual address or offset */ | ||
47 | }; | ||
48 | |||
49 | /* | ||
50 | * read chunk(s), encoded as a linked list. | ||
51 | */ | ||
52 | struct rpcrdma_read_chunk { | ||
53 | uint32_t rc_discrim; /* 1 indicates presence */ | ||
54 | uint32_t rc_position; /* Position in XDR stream */ | ||
55 | struct rpcrdma_segment rc_target; | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * write chunk, and reply chunk. | ||
60 | */ | ||
61 | struct rpcrdma_write_chunk { | ||
62 | struct rpcrdma_segment wc_target; | ||
63 | }; | ||
64 | |||
65 | /* | ||
66 | * write chunk(s), encoded as a counted array. | ||
67 | */ | ||
68 | struct rpcrdma_write_array { | ||
69 | uint32_t wc_discrim; /* 1 indicates presence */ | ||
70 | uint32_t wc_nchunks; /* Array count */ | ||
71 | struct rpcrdma_write_chunk wc_array[0]; | ||
72 | }; | ||
73 | |||
74 | struct rpcrdma_msg { | ||
75 | uint32_t rm_xid; /* Mirrors the RPC header xid */ | ||
76 | uint32_t rm_vers; /* Version of this protocol */ | ||
77 | uint32_t rm_credit; /* Buffers requested/granted */ | ||
78 | uint32_t rm_type; /* Type of message (enum rpcrdma_proc) */ | ||
79 | union { | ||
80 | |||
81 | struct { /* no chunks */ | ||
82 | uint32_t rm_empty[3]; /* 3 empty chunk lists */ | ||
83 | } rm_nochunks; | ||
84 | |||
85 | struct { /* no chunks and padded */ | ||
86 | uint32_t rm_align; /* Padding alignment */ | ||
87 | uint32_t rm_thresh; /* Padding threshold */ | ||
88 | uint32_t rm_pempty[3]; /* 3 empty chunk lists */ | ||
89 | } rm_padded; | ||
90 | |||
91 | uint32_t rm_chunks[0]; /* read, write and reply chunks */ | ||
92 | |||
93 | } rm_body; | ||
94 | }; | ||
95 | |||
96 | #define RPCRDMA_HDRLEN_MIN 28 | ||
97 | |||
98 | enum rpcrdma_errcode { | ||
99 | ERR_VERS = 1, | ||
100 | ERR_CHUNK = 2 | ||
101 | }; | ||
102 | |||
103 | struct rpcrdma_err_vers { | ||
104 | uint32_t rdma_vers_low; /* Version range supported by peer */ | ||
105 | uint32_t rdma_vers_high; | ||
106 | }; | ||
107 | |||
108 | enum rpcrdma_proc { | ||
109 | RDMA_MSG = 0, /* An RPC call or reply msg */ | ||
110 | RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */ | ||
111 | RDMA_MSGP = 2, /* An RPC call or reply msg with padding */ | ||
112 | RDMA_DONE = 3, /* Client signals reply completion */ | ||
113 | RDMA_ERROR = 4 /* An RPC RDMA encoding error */ | ||
114 | }; | ||
115 | |||
116 | #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ | ||
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index c6b53d181bfa..0751c9464d0f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h | |||
@@ -70,7 +70,10 @@ struct xdr_buf { | |||
70 | 70 | ||
71 | struct page ** pages; /* Array of contiguous pages */ | 71 | struct page ** pages; /* Array of contiguous pages */ |
72 | unsigned int page_base, /* Start of page data */ | 72 | unsigned int page_base, /* Start of page data */ |
73 | page_len; /* Length of page data */ | 73 | page_len, /* Length of page data */ |
74 | flags; /* Flags for data disposition */ | ||
75 | #define XDRBUF_READ 0x01 /* target of file read */ | ||
76 | #define XDRBUF_WRITE 0x02 /* source of file write */ | ||
74 | 77 | ||
75 | unsigned int buflen, /* Total length of storage buffer */ | 78 | unsigned int buflen, /* Total length of storage buffer */ |
76 | len; /* Length of XDR encoded message */ | 79 | len; /* Length of XDR encoded message */ |
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d11cedd14f0f..30b17b3bc1a9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -19,25 +19,11 @@ | |||
19 | 19 | ||
20 | #ifdef __KERNEL__ | 20 | #ifdef __KERNEL__ |
21 | 21 | ||
22 | extern unsigned int xprt_udp_slot_table_entries; | ||
23 | extern unsigned int xprt_tcp_slot_table_entries; | ||
24 | |||
25 | #define RPC_MIN_SLOT_TABLE (2U) | 22 | #define RPC_MIN_SLOT_TABLE (2U) |
26 | #define RPC_DEF_SLOT_TABLE (16U) | 23 | #define RPC_DEF_SLOT_TABLE (16U) |
27 | #define RPC_MAX_SLOT_TABLE (128U) | 24 | #define RPC_MAX_SLOT_TABLE (128U) |
28 | 25 | ||
29 | /* | 26 | /* |
30 | * Parameters for choosing a free port | ||
31 | */ | ||
32 | extern unsigned int xprt_min_resvport; | ||
33 | extern unsigned int xprt_max_resvport; | ||
34 | |||
35 | #define RPC_MIN_RESVPORT (1U) | ||
36 | #define RPC_MAX_RESVPORT (65535U) | ||
37 | #define RPC_DEF_MIN_RESVPORT (665U) | ||
38 | #define RPC_DEF_MAX_RESVPORT (1023U) | ||
39 | |||
40 | /* | ||
41 | * This describes a timeout strategy | 27 | * This describes a timeout strategy |
42 | */ | 28 | */ |
43 | struct rpc_timeout { | 29 | struct rpc_timeout { |
@@ -53,6 +39,10 @@ enum rpc_display_format_t { | |||
53 | RPC_DISPLAY_PORT, | 39 | RPC_DISPLAY_PORT, |
54 | RPC_DISPLAY_PROTO, | 40 | RPC_DISPLAY_PROTO, |
55 | RPC_DISPLAY_ALL, | 41 | RPC_DISPLAY_ALL, |
42 | RPC_DISPLAY_HEX_ADDR, | ||
43 | RPC_DISPLAY_HEX_PORT, | ||
44 | RPC_DISPLAY_UNIVERSAL_ADDR, | ||
45 | RPC_DISPLAY_NETID, | ||
56 | RPC_DISPLAY_MAX, | 46 | RPC_DISPLAY_MAX, |
57 | }; | 47 | }; |
58 | 48 | ||
@@ -196,14 +186,22 @@ struct rpc_xprt { | |||
196 | char * address_strings[RPC_DISPLAY_MAX]; | 186 | char * address_strings[RPC_DISPLAY_MAX]; |
197 | }; | 187 | }; |
198 | 188 | ||
199 | struct rpc_xprtsock_create { | 189 | struct xprt_create { |
200 | int proto; /* IPPROTO_UDP or IPPROTO_TCP */ | 190 | int ident; /* XPRT_TRANSPORT identifier */ |
201 | struct sockaddr * srcaddr; /* optional local address */ | 191 | struct sockaddr * srcaddr; /* optional local address */ |
202 | struct sockaddr * dstaddr; /* remote peer address */ | 192 | struct sockaddr * dstaddr; /* remote peer address */ |
203 | size_t addrlen; | 193 | size_t addrlen; |
204 | struct rpc_timeout * timeout; /* optional timeout parameters */ | 194 | struct rpc_timeout * timeout; /* optional timeout parameters */ |
205 | }; | 195 | }; |
206 | 196 | ||
197 | struct xprt_class { | ||
198 | struct list_head list; | ||
199 | int ident; /* XPRT_TRANSPORT identifier */ | ||
200 | struct rpc_xprt * (*setup)(struct xprt_create *); | ||
201 | struct module *owner; | ||
202 | char name[32]; | ||
203 | }; | ||
204 | |||
207 | /* | 205 | /* |
208 | * Transport operations used by ULPs | 206 | * Transport operations used by ULPs |
209 | */ | 207 | */ |
@@ -212,7 +210,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long | |||
212 | /* | 210 | /* |
213 | * Generic internal transport functions | 211 | * Generic internal transport functions |
214 | */ | 212 | */ |
215 | struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args); | 213 | struct rpc_xprt *xprt_create_transport(struct xprt_create *args); |
216 | void xprt_connect(struct rpc_task *task); | 214 | void xprt_connect(struct rpc_task *task); |
217 | void xprt_reserve(struct rpc_task *task); | 215 | void xprt_reserve(struct rpc_task *task); |
218 | int xprt_reserve_xprt(struct rpc_task *task); | 216 | int xprt_reserve_xprt(struct rpc_task *task); |
@@ -235,6 +233,8 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * | |||
235 | /* | 233 | /* |
236 | * Transport switch helper functions | 234 | * Transport switch helper functions |
237 | */ | 235 | */ |
236 | int xprt_register_transport(struct xprt_class *type); | ||
237 | int xprt_unregister_transport(struct xprt_class *type); | ||
238 | void xprt_set_retrans_timeout_def(struct rpc_task *task); | 238 | void xprt_set_retrans_timeout_def(struct rpc_task *task); |
239 | void xprt_set_retrans_timeout_rtt(struct rpc_task *task); | 239 | void xprt_set_retrans_timeout_rtt(struct rpc_task *task); |
240 | void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); | 240 | void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); |
@@ -248,14 +248,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); | |||
248 | void xprt_disconnect(struct rpc_xprt *xprt); | 248 | void xprt_disconnect(struct rpc_xprt *xprt); |
249 | 249 | ||
250 | /* | 250 | /* |
251 | * Socket transport setup operations | ||
252 | */ | ||
253 | struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args); | ||
254 | struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args); | ||
255 | int init_socket_xprt(void); | ||
256 | void cleanup_socket_xprt(void); | ||
257 | |||
258 | /* | ||
259 | * Reserved bit positions in xprt->state | 251 | * Reserved bit positions in xprt->state |
260 | */ | 252 | */ |
261 | #define XPRT_LOCKED (0) | 253 | #define XPRT_LOCKED (0) |
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h new file mode 100644 index 000000000000..4de56b1d372b --- /dev/null +++ b/include/linux/sunrpc/xprtrdma.h | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | #ifndef _LINUX_SUNRPC_XPRTRDMA_H | ||
41 | #define _LINUX_SUNRPC_XPRTRDMA_H | ||
42 | |||
43 | /* | ||
44 | * RPC transport identifier for RDMA | ||
45 | */ | ||
46 | #define XPRT_TRANSPORT_RDMA 256 | ||
47 | |||
48 | /* | ||
49 | * rpcbind (v3+) RDMA netid. | ||
50 | */ | ||
51 | #define RPCBIND_NETID_RDMA "rdma" | ||
52 | |||
53 | /* | ||
54 | * Constants. Max RPC/NFS header is big enough to account for | ||
55 | * additional marshaling buffers passed down by Linux client. | ||
56 | * | ||
57 | * RDMA header is currently fixed max size, and is big enough for a | ||
58 | * fully-chunked NFS message (read chunks are the largest). Note only | ||
59 | * a single chunk type per message is supported currently. | ||
60 | */ | ||
61 | #define RPCRDMA_MIN_SLOT_TABLE (2U) | ||
62 | #define RPCRDMA_DEF_SLOT_TABLE (32U) | ||
63 | #define RPCRDMA_MAX_SLOT_TABLE (256U) | ||
64 | |||
65 | #define RPCRDMA_DEF_INLINE (1024) /* default inline max */ | ||
66 | |||
67 | #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ | ||
68 | |||
69 | #define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */ | ||
70 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | ||
71 | |||
72 | /* memory registration strategies */ | ||
73 | #define RPCRDMA_PERSISTENT_REGISTRATION (1) | ||
74 | |||
75 | enum rpcrdma_memreg { | ||
76 | RPCRDMA_BOUNCEBUFFERS = 0, | ||
77 | RPCRDMA_REGISTER, | ||
78 | RPCRDMA_MEMWINDOWS, | ||
79 | RPCRDMA_MEMWINDOWS_ASYNC, | ||
80 | RPCRDMA_MTHCAFMR, | ||
81 | RPCRDMA_ALLPHYSICAL, | ||
82 | RPCRDMA_LAST | ||
83 | }; | ||
84 | |||
85 | #endif /* _LINUX_SUNRPC_XPRTRDMA_H */ | ||
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h new file mode 100644 index 000000000000..2c6c2c2783d8 --- /dev/null +++ b/include/linux/sunrpc/xprtsock.h | |||
@@ -0,0 +1,51 @@ | |||
1 | /* | ||
2 | * linux/include/linux/sunrpc/xprtsock.h | ||
3 | * | ||
4 | * Declarations for the RPC transport socket provider. | ||
5 | */ | ||
6 | |||
7 | #ifndef _LINUX_SUNRPC_XPRTSOCK_H | ||
8 | #define _LINUX_SUNRPC_XPRTSOCK_H | ||
9 | |||
10 | #ifdef __KERNEL__ | ||
11 | |||
12 | /* | ||
13 | * Socket transport setup operations | ||
14 | */ | ||
15 | struct rpc_xprt *xs_setup_udp(struct xprt_create *args); | ||
16 | struct rpc_xprt *xs_setup_tcp(struct xprt_create *args); | ||
17 | |||
18 | int init_socket_xprt(void); | ||
19 | void cleanup_socket_xprt(void); | ||
20 | |||
21 | /* | ||
22 | * RPC transport identifiers for UDP, TCP | ||
23 | * | ||
24 | * To preserve compatibility with the historical use of raw IP protocol | ||
25 | * id's for transport selection, these are specified with the previous | ||
26 | * values. No such restriction exists for new transports, except that | ||
27 | * they may not collide with these values (17 and 6, respectively). | ||
28 | */ | ||
29 | #define XPRT_TRANSPORT_UDP IPPROTO_UDP | ||
30 | #define XPRT_TRANSPORT_TCP IPPROTO_TCP | ||
31 | |||
32 | /* | ||
33 | * RPC slot table sizes for UDP, TCP transports | ||
34 | */ | ||
35 | extern unsigned int xprt_udp_slot_table_entries; | ||
36 | extern unsigned int xprt_tcp_slot_table_entries; | ||
37 | |||
38 | /* | ||
39 | * Parameters for choosing a free port | ||
40 | */ | ||
41 | extern unsigned int xprt_min_resvport; | ||
42 | extern unsigned int xprt_max_resvport; | ||
43 | |||
44 | #define RPC_MIN_RESVPORT (1U) | ||
45 | #define RPC_MAX_RESVPORT (65535U) | ||
46 | #define RPC_DEF_MIN_RESVPORT (665U) | ||
47 | #define RPC_DEF_MAX_RESVPORT (1023U) | ||
48 | |||
49 | #endif /* __KERNEL__ */ | ||
50 | |||
51 | #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ | ||
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c7c3337c3a88..d1321a81c9c4 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -62,8 +62,6 @@ struct writeback_control { | |||
62 | unsigned for_reclaim:1; /* Invoked from the page allocator */ | 62 | unsigned for_reclaim:1; /* Invoked from the page allocator */ |
63 | unsigned for_writepages:1; /* This is a writepages() call */ | 63 | unsigned for_writepages:1; /* This is a writepages() call */ |
64 | unsigned range_cyclic:1; /* range_start is cyclic */ | 64 | unsigned range_cyclic:1; /* range_start is cyclic */ |
65 | |||
66 | void *fs_private; /* For use by ->writepages() */ | ||
67 | }; | 65 | }; |
68 | 66 | ||
69 | /* | 67 | /* |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 04f3ffb8d9d4..0ae703c157ba 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -1525,6 +1525,7 @@ add_names: | |||
1525 | context->names[idx].ino = (unsigned long)-1; | 1525 | context->names[idx].ino = (unsigned long)-1; |
1526 | } | 1526 | } |
1527 | } | 1527 | } |
1528 | EXPORT_SYMBOL_GPL(__audit_inode_child); | ||
1528 | 1529 | ||
1529 | /** | 1530 | /** |
1530 | * auditsc_get_stamp - get local copies of audit_context values | 1531 | * auditsc_get_stamp - get local copies of audit_context values |
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 8ebfc4db7f51..5c69a725e530 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
@@ -5,6 +5,7 @@ | |||
5 | 5 | ||
6 | obj-$(CONFIG_SUNRPC) += sunrpc.o | 6 | obj-$(CONFIG_SUNRPC) += sunrpc.o |
7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ | 7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ |
8 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ | ||
8 | 9 | ||
9 | sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ | 10 | sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ |
10 | auth.o auth_null.o auth_unix.o \ | 11 | auth.o auth_null.o auth_unix.o \ |
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 42b3220bed39..8bd074df27d3 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c | |||
@@ -42,7 +42,7 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) | |||
42 | { | 42 | { |
43 | u8 *ptr; | 43 | u8 *ptr; |
44 | u8 pad; | 44 | u8 pad; |
45 | int len = buf->len; | 45 | size_t len = buf->len; |
46 | 46 | ||
47 | if (len <= buf->head[0].iov_len) { | 47 | if (len <= buf->head[0].iov_len) { |
48 | pad = *(u8 *)(buf->head[0].iov_base + len - 1); | 48 | pad = *(u8 *)(buf->head[0].iov_base + len - 1); |
@@ -53,9 +53,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) | |||
53 | } else | 53 | } else |
54 | len -= buf->head[0].iov_len; | 54 | len -= buf->head[0].iov_len; |
55 | if (len <= buf->page_len) { | 55 | if (len <= buf->page_len) { |
56 | int last = (buf->page_base + len - 1) | 56 | unsigned int last = (buf->page_base + len - 1) |
57 | >>PAGE_CACHE_SHIFT; | 57 | >>PAGE_CACHE_SHIFT; |
58 | int offset = (buf->page_base + len - 1) | 58 | unsigned int offset = (buf->page_base + len - 1) |
59 | & (PAGE_CACHE_SIZE - 1); | 59 | & (PAGE_CACHE_SIZE - 1); |
60 | ptr = kmap_atomic(buf->pages[last], KM_USER0); | 60 | ptr = kmap_atomic(buf->pages[last], KM_USER0); |
61 | pad = *(ptr + offset); | 61 | pad = *(ptr + offset); |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 52429b1ffcc1..76be83ee4b04 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -127,7 +127,14 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s | |||
127 | struct rpc_clnt *clnt = NULL; | 127 | struct rpc_clnt *clnt = NULL; |
128 | struct rpc_auth *auth; | 128 | struct rpc_auth *auth; |
129 | int err; | 129 | int err; |
130 | int len; | 130 | size_t len; |
131 | |||
132 | /* sanity check the name before trying to print it */ | ||
133 | err = -EINVAL; | ||
134 | len = strlen(servname); | ||
135 | if (len > RPC_MAXNETNAMELEN) | ||
136 | goto out_no_rpciod; | ||
137 | len++; | ||
131 | 138 | ||
132 | dprintk("RPC: creating %s client for %s (xprt %p)\n", | 139 | dprintk("RPC: creating %s client for %s (xprt %p)\n", |
133 | program->name, servname, xprt); | 140 | program->name, servname, xprt); |
@@ -148,7 +155,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s | |||
148 | clnt->cl_parent = clnt; | 155 | clnt->cl_parent = clnt; |
149 | 156 | ||
150 | clnt->cl_server = clnt->cl_inline_name; | 157 | clnt->cl_server = clnt->cl_inline_name; |
151 | len = strlen(servname) + 1; | ||
152 | if (len > sizeof(clnt->cl_inline_name)) { | 158 | if (len > sizeof(clnt->cl_inline_name)) { |
153 | char *buf = kmalloc(len, GFP_KERNEL); | 159 | char *buf = kmalloc(len, GFP_KERNEL); |
154 | if (buf != 0) | 160 | if (buf != 0) |
@@ -234,8 +240,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) | |||
234 | { | 240 | { |
235 | struct rpc_xprt *xprt; | 241 | struct rpc_xprt *xprt; |
236 | struct rpc_clnt *clnt; | 242 | struct rpc_clnt *clnt; |
237 | struct rpc_xprtsock_create xprtargs = { | 243 | struct xprt_create xprtargs = { |
238 | .proto = args->protocol, | 244 | .ident = args->protocol, |
239 | .srcaddr = args->saddress, | 245 | .srcaddr = args->saddress, |
240 | .dstaddr = args->address, | 246 | .dstaddr = args->address, |
241 | .addrlen = args->addrsize, | 247 | .addrlen = args->addrsize, |
@@ -253,7 +259,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) | |||
253 | */ | 259 | */ |
254 | if (args->servername == NULL) { | 260 | if (args->servername == NULL) { |
255 | struct sockaddr_in *addr = | 261 | struct sockaddr_in *addr = |
256 | (struct sockaddr_in *) &args->address; | 262 | (struct sockaddr_in *) args->address; |
257 | snprintf(servername, sizeof(servername), NIPQUAD_FMT, | 263 | snprintf(servername, sizeof(servername), NIPQUAD_FMT, |
258 | NIPQUAD(addr->sin_addr.s_addr)); | 264 | NIPQUAD(addr->sin_addr.s_addr)); |
259 | args->servername = servername; | 265 | args->servername = servername; |
@@ -269,9 +275,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) | |||
269 | if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) | 275 | if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) |
270 | xprt->resvport = 0; | 276 | xprt->resvport = 0; |
271 | 277 | ||
272 | dprintk("RPC: creating %s client for %s (xprt %p)\n", | ||
273 | args->program->name, args->servername, xprt); | ||
274 | |||
275 | clnt = rpc_new_client(xprt, args->servername, args->program, | 278 | clnt = rpc_new_client(xprt, args->servername, args->program, |
276 | args->version, args->authflavor); | 279 | args->version, args->authflavor); |
277 | if (IS_ERR(clnt)) | 280 | if (IS_ERR(clnt)) |
@@ -439,7 +442,7 @@ rpc_release_client(struct rpc_clnt *clnt) | |||
439 | */ | 442 | */ |
440 | struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, | 443 | struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, |
441 | struct rpc_program *program, | 444 | struct rpc_program *program, |
442 | int vers) | 445 | u32 vers) |
443 | { | 446 | { |
444 | struct rpc_clnt *clnt; | 447 | struct rpc_clnt *clnt; |
445 | struct rpc_version *version; | 448 | struct rpc_version *version; |
@@ -843,8 +846,7 @@ call_allocate(struct rpc_task *task) | |||
843 | dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); | 846 | dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); |
844 | 847 | ||
845 | if (RPC_IS_ASYNC(task) || !signalled()) { | 848 | if (RPC_IS_ASYNC(task) || !signalled()) { |
846 | xprt_release(task); | 849 | task->tk_action = call_allocate; |
847 | task->tk_action = call_reserve; | ||
848 | rpc_delay(task, HZ>>4); | 850 | rpc_delay(task, HZ>>4); |
849 | return; | 851 | return; |
850 | } | 852 | } |
@@ -871,6 +873,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) | |||
871 | buf->head[0].iov_len = len; | 873 | buf->head[0].iov_len = len; |
872 | buf->tail[0].iov_len = 0; | 874 | buf->tail[0].iov_len = 0; |
873 | buf->page_len = 0; | 875 | buf->page_len = 0; |
876 | buf->flags = 0; | ||
874 | buf->len = 0; | 877 | buf->len = 0; |
875 | buf->buflen = len; | 878 | buf->buflen = len; |
876 | } | 879 | } |
@@ -937,7 +940,7 @@ call_bind(struct rpc_task *task) | |||
937 | static void | 940 | static void |
938 | call_bind_status(struct rpc_task *task) | 941 | call_bind_status(struct rpc_task *task) |
939 | { | 942 | { |
940 | int status = -EACCES; | 943 | int status = -EIO; |
941 | 944 | ||
942 | if (task->tk_status >= 0) { | 945 | if (task->tk_status >= 0) { |
943 | dprint_status(task); | 946 | dprint_status(task); |
@@ -947,9 +950,20 @@ call_bind_status(struct rpc_task *task) | |||
947 | } | 950 | } |
948 | 951 | ||
949 | switch (task->tk_status) { | 952 | switch (task->tk_status) { |
953 | case -EAGAIN: | ||
954 | dprintk("RPC: %5u rpcbind waiting for another request " | ||
955 | "to finish\n", task->tk_pid); | ||
956 | /* avoid busy-waiting here -- could be a network outage. */ | ||
957 | rpc_delay(task, 5*HZ); | ||
958 | goto retry_timeout; | ||
950 | case -EACCES: | 959 | case -EACCES: |
951 | dprintk("RPC: %5u remote rpcbind: RPC program/version " | 960 | dprintk("RPC: %5u remote rpcbind: RPC program/version " |
952 | "unavailable\n", task->tk_pid); | 961 | "unavailable\n", task->tk_pid); |
962 | /* fail immediately if this is an RPC ping */ | ||
963 | if (task->tk_msg.rpc_proc->p_proc == 0) { | ||
964 | status = -EOPNOTSUPP; | ||
965 | break; | ||
966 | } | ||
953 | rpc_delay(task, 3*HZ); | 967 | rpc_delay(task, 3*HZ); |
954 | goto retry_timeout; | 968 | goto retry_timeout; |
955 | case -ETIMEDOUT: | 969 | case -ETIMEDOUT: |
@@ -957,6 +971,7 @@ call_bind_status(struct rpc_task *task) | |||
957 | task->tk_pid); | 971 | task->tk_pid); |
958 | goto retry_timeout; | 972 | goto retry_timeout; |
959 | case -EPFNOSUPPORT: | 973 | case -EPFNOSUPPORT: |
974 | /* server doesn't support any rpcbind version we know of */ | ||
960 | dprintk("RPC: %5u remote rpcbind service unavailable\n", | 975 | dprintk("RPC: %5u remote rpcbind service unavailable\n", |
961 | task->tk_pid); | 976 | task->tk_pid); |
962 | break; | 977 | break; |
@@ -969,7 +984,6 @@ call_bind_status(struct rpc_task *task) | |||
969 | default: | 984 | default: |
970 | dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", | 985 | dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", |
971 | task->tk_pid, -task->tk_status); | 986 | task->tk_pid, -task->tk_status); |
972 | status = -EIO; | ||
973 | } | 987 | } |
974 | 988 | ||
975 | rpc_exit(task, status); | 989 | rpc_exit(task, status); |
@@ -1257,7 +1271,6 @@ call_refresh(struct rpc_task *task) | |||
1257 | { | 1271 | { |
1258 | dprint_status(task); | 1272 | dprint_status(task); |
1259 | 1273 | ||
1260 | xprt_release(task); /* Must do to obtain new XID */ | ||
1261 | task->tk_action = call_refreshresult; | 1274 | task->tk_action = call_refreshresult; |
1262 | task->tk_status = 0; | 1275 | task->tk_status = 0; |
1263 | task->tk_client->cl_stats->rpcauthrefresh++; | 1276 | task->tk_client->cl_stats->rpcauthrefresh++; |
@@ -1375,6 +1388,8 @@ call_verify(struct rpc_task *task) | |||
1375 | dprintk("RPC: %5u %s: retry stale creds\n", | 1388 | dprintk("RPC: %5u %s: retry stale creds\n", |
1376 | task->tk_pid, __FUNCTION__); | 1389 | task->tk_pid, __FUNCTION__); |
1377 | rpcauth_invalcred(task); | 1390 | rpcauth_invalcred(task); |
1391 | /* Ensure we obtain a new XID! */ | ||
1392 | xprt_release(task); | ||
1378 | task->tk_action = call_refresh; | 1393 | task->tk_action = call_refresh; |
1379 | goto out_retry; | 1394 | goto out_retry; |
1380 | case RPC_AUTH_BADCRED: | 1395 | case RPC_AUTH_BADCRED: |
@@ -1523,13 +1538,18 @@ void rpc_show_tasks(void) | |||
1523 | spin_lock(&clnt->cl_lock); | 1538 | spin_lock(&clnt->cl_lock); |
1524 | list_for_each_entry(t, &clnt->cl_tasks, tk_task) { | 1539 | list_for_each_entry(t, &clnt->cl_tasks, tk_task) { |
1525 | const char *rpc_waitq = "none"; | 1540 | const char *rpc_waitq = "none"; |
1541 | int proc; | ||
1542 | |||
1543 | if (t->tk_msg.rpc_proc) | ||
1544 | proc = t->tk_msg.rpc_proc->p_proc; | ||
1545 | else | ||
1546 | proc = -1; | ||
1526 | 1547 | ||
1527 | if (RPC_IS_QUEUED(t)) | 1548 | if (RPC_IS_QUEUED(t)) |
1528 | rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); | 1549 | rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); |
1529 | 1550 | ||
1530 | printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", | 1551 | printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", |
1531 | t->tk_pid, | 1552 | t->tk_pid, proc, |
1532 | (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), | ||
1533 | t->tk_flags, t->tk_status, | 1553 | t->tk_flags, t->tk_status, |
1534 | t->tk_client, | 1554 | t->tk_client, |
1535 | (t->tk_client ? t->tk_client->cl_prog : 0), | 1555 | (t->tk_client ? t->tk_client->cl_prog : 0), |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 669e12a4ed18..c8433e8865aa 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
15 | #include <linux/mount.h> | 15 | #include <linux/mount.h> |
16 | #include <linux/namei.h> | 16 | #include <linux/namei.h> |
17 | #include <linux/dnotify.h> | 17 | #include <linux/fsnotify.h> |
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | 19 | ||
20 | #include <asm/ioctls.h> | 20 | #include <asm/ioctls.h> |
@@ -329,6 +329,7 @@ rpc_show_info(struct seq_file *m, void *v) | |||
329 | clnt->cl_prog, clnt->cl_vers); | 329 | clnt->cl_prog, clnt->cl_vers); |
330 | seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); | 330 | seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); |
331 | seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); | 331 | seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); |
332 | seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); | ||
332 | return 0; | 333 | return 0; |
333 | } | 334 | } |
334 | 335 | ||
@@ -585,6 +586,7 @@ rpc_populate(struct dentry *parent, | |||
585 | if (S_ISDIR(mode)) | 586 | if (S_ISDIR(mode)) |
586 | inc_nlink(dir); | 587 | inc_nlink(dir); |
587 | d_add(dentry, inode); | 588 | d_add(dentry, inode); |
589 | fsnotify_create(dir, dentry); | ||
588 | } | 590 | } |
589 | mutex_unlock(&dir->i_mutex); | 591 | mutex_unlock(&dir->i_mutex); |
590 | return 0; | 592 | return 0; |
@@ -606,7 +608,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry) | |||
606 | inode->i_ino = iunique(dir->i_sb, 100); | 608 | inode->i_ino = iunique(dir->i_sb, 100); |
607 | d_instantiate(dentry, inode); | 609 | d_instantiate(dentry, inode); |
608 | inc_nlink(dir); | 610 | inc_nlink(dir); |
609 | inode_dir_notify(dir, DN_CREATE); | 611 | fsnotify_mkdir(dir, dentry); |
610 | return 0; | 612 | return 0; |
611 | out_err: | 613 | out_err: |
612 | printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", | 614 | printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", |
@@ -748,7 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi | |||
748 | rpci->flags = flags; | 750 | rpci->flags = flags; |
749 | rpci->ops = ops; | 751 | rpci->ops = ops; |
750 | rpci->nkern_readwriters = 1; | 752 | rpci->nkern_readwriters = 1; |
751 | inode_dir_notify(dir, DN_CREATE); | 753 | fsnotify_create(dir, dentry); |
752 | dget(dentry); | 754 | dget(dentry); |
753 | out: | 755 | out: |
754 | mutex_unlock(&dir->i_mutex); | 756 | mutex_unlock(&dir->i_mutex); |
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d1740dbab991..a05493aedb68 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
@@ -16,11 +16,14 @@ | |||
16 | 16 | ||
17 | #include <linux/types.h> | 17 | #include <linux/types.h> |
18 | #include <linux/socket.h> | 18 | #include <linux/socket.h> |
19 | #include <linux/in.h> | ||
20 | #include <linux/in6.h> | ||
19 | #include <linux/kernel.h> | 21 | #include <linux/kernel.h> |
20 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
21 | 23 | ||
22 | #include <linux/sunrpc/clnt.h> | 24 | #include <linux/sunrpc/clnt.h> |
23 | #include <linux/sunrpc/sched.h> | 25 | #include <linux/sunrpc/sched.h> |
26 | #include <linux/sunrpc/xprtsock.h> | ||
24 | 27 | ||
25 | #ifdef RPC_DEBUG | 28 | #ifdef RPC_DEBUG |
26 | # define RPCDBG_FACILITY RPCDBG_BIND | 29 | # define RPCDBG_FACILITY RPCDBG_BIND |
@@ -91,26 +94,6 @@ enum { | |||
91 | #define RPCB_MAXADDRLEN (128u) | 94 | #define RPCB_MAXADDRLEN (128u) |
92 | 95 | ||
93 | /* | 96 | /* |
94 | * r_netid | ||
95 | * | ||
96 | * Quoting RFC 3530, section 2.2: | ||
97 | * | ||
98 | * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP | ||
99 | * over IPv4 the value of r_netid is the string "udp". | ||
100 | * | ||
101 | * ... | ||
102 | * | ||
103 | * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP | ||
104 | * over IPv6 the value of r_netid is the string "udp6". | ||
105 | */ | ||
106 | #define RPCB_NETID_UDP "\165\144\160" /* "udp" */ | ||
107 | #define RPCB_NETID_TCP "\164\143\160" /* "tcp" */ | ||
108 | #define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */ | ||
109 | #define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */ | ||
110 | |||
111 | #define RPCB_MAXNETIDLEN (4u) | ||
112 | |||
113 | /* | ||
114 | * r_owner | 97 | * r_owner |
115 | * | 98 | * |
116 | * The "owner" is allowed to unset a service in the rpcbind database. | 99 | * The "owner" is allowed to unset a service in the rpcbind database. |
@@ -120,7 +103,7 @@ enum { | |||
120 | #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) | 103 | #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) |
121 | 104 | ||
122 | static void rpcb_getport_done(struct rpc_task *, void *); | 105 | static void rpcb_getport_done(struct rpc_task *, void *); |
123 | extern struct rpc_program rpcb_program; | 106 | static struct rpc_program rpcb_program; |
124 | 107 | ||
125 | struct rpcbind_args { | 108 | struct rpcbind_args { |
126 | struct rpc_xprt * r_xprt; | 109 | struct rpc_xprt * r_xprt; |
@@ -137,10 +120,13 @@ struct rpcbind_args { | |||
137 | static struct rpc_procinfo rpcb_procedures2[]; | 120 | static struct rpc_procinfo rpcb_procedures2[]; |
138 | static struct rpc_procinfo rpcb_procedures3[]; | 121 | static struct rpc_procinfo rpcb_procedures3[]; |
139 | 122 | ||
140 | static struct rpcb_info { | 123 | struct rpcb_info { |
141 | int rpc_vers; | 124 | int rpc_vers; |
142 | struct rpc_procinfo * rpc_proc; | 125 | struct rpc_procinfo * rpc_proc; |
143 | } rpcb_next_version[]; | 126 | }; |
127 | |||
128 | static struct rpcb_info rpcb_next_version[]; | ||
129 | static struct rpcb_info rpcb_next_version6[]; | ||
144 | 130 | ||
145 | static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) | 131 | static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) |
146 | { | 132 | { |
@@ -190,7 +176,17 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, | |||
190 | RPC_CLNT_CREATE_INTR), | 176 | RPC_CLNT_CREATE_INTR), |
191 | }; | 177 | }; |
192 | 178 | ||
193 | ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); | 179 | switch (srvaddr->sa_family) { |
180 | case AF_INET: | ||
181 | ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); | ||
182 | break; | ||
183 | case AF_INET6: | ||
184 | ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); | ||
185 | break; | ||
186 | default: | ||
187 | return NULL; | ||
188 | } | ||
189 | |||
194 | if (!privileged) | 190 | if (!privileged) |
195 | args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; | 191 | args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; |
196 | return rpc_create(&args); | 192 | return rpc_create(&args); |
@@ -234,7 +230,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
234 | prog, vers, prot, port); | 230 | prog, vers, prot, port); |
235 | 231 | ||
236 | rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, | 232 | rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, |
237 | IPPROTO_UDP, 2, 1); | 233 | XPRT_TRANSPORT_UDP, 2, 1); |
238 | if (IS_ERR(rpcb_clnt)) | 234 | if (IS_ERR(rpcb_clnt)) |
239 | return PTR_ERR(rpcb_clnt); | 235 | return PTR_ERR(rpcb_clnt); |
240 | 236 | ||
@@ -316,6 +312,7 @@ void rpcb_getport_async(struct rpc_task *task) | |||
316 | struct rpc_task *child; | 312 | struct rpc_task *child; |
317 | struct sockaddr addr; | 313 | struct sockaddr addr; |
318 | int status; | 314 | int status; |
315 | struct rpcb_info *info; | ||
319 | 316 | ||
320 | dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", | 317 | dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", |
321 | task->tk_pid, __FUNCTION__, | 318 | task->tk_pid, __FUNCTION__, |
@@ -325,7 +322,7 @@ void rpcb_getport_async(struct rpc_task *task) | |||
325 | BUG_ON(clnt->cl_parent != clnt); | 322 | BUG_ON(clnt->cl_parent != clnt); |
326 | 323 | ||
327 | if (xprt_test_and_set_binding(xprt)) { | 324 | if (xprt_test_and_set_binding(xprt)) { |
328 | status = -EACCES; /* tell caller to check again */ | 325 | status = -EAGAIN; /* tell caller to check again */ |
329 | dprintk("RPC: %5u %s: waiting for another binder\n", | 326 | dprintk("RPC: %5u %s: waiting for another binder\n", |
330 | task->tk_pid, __FUNCTION__); | 327 | task->tk_pid, __FUNCTION__); |
331 | goto bailout_nowake; | 328 | goto bailout_nowake; |
@@ -343,18 +340,43 @@ void rpcb_getport_async(struct rpc_task *task) | |||
343 | goto bailout_nofree; | 340 | goto bailout_nofree; |
344 | } | 341 | } |
345 | 342 | ||
346 | if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { | 343 | rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); |
344 | |||
345 | /* Don't ever use rpcbind v2 for AF_INET6 requests */ | ||
346 | switch (addr.sa_family) { | ||
347 | case AF_INET: | ||
348 | info = rpcb_next_version; | ||
349 | break; | ||
350 | case AF_INET6: | ||
351 | info = rpcb_next_version6; | ||
352 | break; | ||
353 | default: | ||
354 | status = -EAFNOSUPPORT; | ||
355 | dprintk("RPC: %5u %s: bad address family\n", | ||
356 | task->tk_pid, __FUNCTION__); | ||
357 | goto bailout_nofree; | ||
358 | } | ||
359 | if (info[xprt->bind_index].rpc_proc == NULL) { | ||
347 | xprt->bind_index = 0; | 360 | xprt->bind_index = 0; |
348 | status = -EACCES; /* tell caller to try again later */ | 361 | status = -EPFNOSUPPORT; |
349 | dprintk("RPC: %5u %s: no more getport versions available\n", | 362 | dprintk("RPC: %5u %s: no more getport versions available\n", |
350 | task->tk_pid, __FUNCTION__); | 363 | task->tk_pid, __FUNCTION__); |
351 | goto bailout_nofree; | 364 | goto bailout_nofree; |
352 | } | 365 | } |
353 | bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; | 366 | bind_version = info[xprt->bind_index].rpc_vers; |
354 | 367 | ||
355 | dprintk("RPC: %5u %s: trying rpcbind version %u\n", | 368 | dprintk("RPC: %5u %s: trying rpcbind version %u\n", |
356 | task->tk_pid, __FUNCTION__, bind_version); | 369 | task->tk_pid, __FUNCTION__, bind_version); |
357 | 370 | ||
371 | rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, | ||
372 | bind_version, 0); | ||
373 | if (IS_ERR(rpcb_clnt)) { | ||
374 | status = PTR_ERR(rpcb_clnt); | ||
375 | dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", | ||
376 | task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); | ||
377 | goto bailout_nofree; | ||
378 | } | ||
379 | |||
358 | map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); | 380 | map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); |
359 | if (!map) { | 381 | if (!map) { |
360 | status = -ENOMEM; | 382 | status = -ENOMEM; |
@@ -367,28 +389,19 @@ void rpcb_getport_async(struct rpc_task *task) | |||
367 | map->r_prot = xprt->prot; | 389 | map->r_prot = xprt->prot; |
368 | map->r_port = 0; | 390 | map->r_port = 0; |
369 | map->r_xprt = xprt_get(xprt); | 391 | map->r_xprt = xprt_get(xprt); |
370 | map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : | 392 | map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); |
371 | RPCB_NETID_UDP; | 393 | memcpy(map->r_addr, |
372 | memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR), | 394 | rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), |
373 | sizeof(map->r_addr)); | 395 | sizeof(map->r_addr)); |
374 | map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ | 396 | map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ |
375 | 397 | ||
376 | rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); | ||
377 | rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); | ||
378 | if (IS_ERR(rpcb_clnt)) { | ||
379 | status = PTR_ERR(rpcb_clnt); | ||
380 | dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", | ||
381 | task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); | ||
382 | goto bailout; | ||
383 | } | ||
384 | |||
385 | child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); | 398 | child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); |
386 | rpc_release_client(rpcb_clnt); | 399 | rpc_release_client(rpcb_clnt); |
387 | if (IS_ERR(child)) { | 400 | if (IS_ERR(child)) { |
388 | status = -EIO; | 401 | status = -EIO; |
389 | dprintk("RPC: %5u %s: rpc_run_task failed\n", | 402 | dprintk("RPC: %5u %s: rpc_run_task failed\n", |
390 | task->tk_pid, __FUNCTION__); | 403 | task->tk_pid, __FUNCTION__); |
391 | goto bailout_nofree; | 404 | goto bailout; |
392 | } | 405 | } |
393 | rpc_put_task(child); | 406 | rpc_put_task(child); |
394 | 407 | ||
@@ -403,6 +416,7 @@ bailout_nofree: | |||
403 | bailout_nowake: | 416 | bailout_nowake: |
404 | task->tk_status = status; | 417 | task->tk_status = status; |
405 | } | 418 | } |
419 | EXPORT_SYMBOL_GPL(rpcb_getport_async); | ||
406 | 420 | ||
407 | /* | 421 | /* |
408 | * Rpcbind child task calls this callback via tk_exit. | 422 | * Rpcbind child task calls this callback via tk_exit. |
@@ -413,6 +427,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) | |||
413 | struct rpc_xprt *xprt = map->r_xprt; | 427 | struct rpc_xprt *xprt = map->r_xprt; |
414 | int status = child->tk_status; | 428 | int status = child->tk_status; |
415 | 429 | ||
430 | /* Garbage reply: retry with a lesser rpcbind version */ | ||
431 | if (status == -EIO) | ||
432 | status = -EPROTONOSUPPORT; | ||
433 | |||
416 | /* rpcbind server doesn't support this rpcbind protocol version */ | 434 | /* rpcbind server doesn't support this rpcbind protocol version */ |
417 | if (status == -EPROTONOSUPPORT) | 435 | if (status == -EPROTONOSUPPORT) |
418 | xprt->bind_index++; | 436 | xprt->bind_index++; |
@@ -490,16 +508,24 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, | |||
490 | unsigned short *portp) | 508 | unsigned short *portp) |
491 | { | 509 | { |
492 | char *addr; | 510 | char *addr; |
493 | int addr_len, c, i, f, first, val; | 511 | u32 addr_len; |
512 | int c, i, f, first, val; | ||
494 | 513 | ||
495 | *portp = 0; | 514 | *portp = 0; |
496 | addr_len = (unsigned int) ntohl(*p++); | 515 | addr_len = ntohl(*p++); |
497 | if (addr_len > RPCB_MAXADDRLEN) /* sanity */ | 516 | |
498 | return -EINVAL; | 517 | /* |
499 | 518 | * Simple sanity check. The smallest possible universal | |
500 | dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n", | 519 | * address is an IPv4 address string containing 11 bytes. |
501 | (char *) p); | 520 | */ |
502 | 521 | if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN) | |
522 | goto out_err; | ||
523 | |||
524 | /* | ||
525 | * Start at the end and walk backwards until the first dot | ||
526 | * is encountered. When the second dot is found, we have | ||
527 | * both parts of the port number. | ||
528 | */ | ||
503 | addr = (char *)p; | 529 | addr = (char *)p; |
504 | val = 0; | 530 | val = 0; |
505 | first = 1; | 531 | first = 1; |
@@ -521,8 +547,19 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, | |||
521 | } | 547 | } |
522 | } | 548 | } |
523 | 549 | ||
550 | /* | ||
551 | * Simple sanity check. If we never saw a dot in the reply, | ||
552 | * then this was probably just garbage. | ||
553 | */ | ||
554 | if (first) | ||
555 | goto out_err; | ||
556 | |||
524 | dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); | 557 | dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); |
525 | return 0; | 558 | return 0; |
559 | |||
560 | out_err: | ||
561 | dprintk("RPC: rpcbind server returned malformed reply\n"); | ||
562 | return -EIO; | ||
526 | } | 563 | } |
527 | 564 | ||
528 | #define RPCB_program_sz (1u) | 565 | #define RPCB_program_sz (1u) |
@@ -531,7 +568,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, | |||
531 | #define RPCB_port_sz (1u) | 568 | #define RPCB_port_sz (1u) |
532 | #define RPCB_boolean_sz (1u) | 569 | #define RPCB_boolean_sz (1u) |
533 | 570 | ||
534 | #define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN)) | 571 | #define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) |
535 | #define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) | 572 | #define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) |
536 | #define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) | 573 | #define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) |
537 | 574 | ||
@@ -593,6 +630,14 @@ static struct rpcb_info rpcb_next_version[] = { | |||
593 | { 0, NULL }, | 630 | { 0, NULL }, |
594 | }; | 631 | }; |
595 | 632 | ||
633 | static struct rpcb_info rpcb_next_version6[] = { | ||
634 | #ifdef CONFIG_SUNRPC_BIND34 | ||
635 | { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, | ||
636 | { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, | ||
637 | #endif | ||
638 | { 0, NULL }, | ||
639 | }; | ||
640 | |||
596 | static struct rpc_version rpcb_version2 = { | 641 | static struct rpc_version rpcb_version2 = { |
597 | .number = 2, | 642 | .number = 2, |
598 | .nrprocs = RPCB_HIGHPROC_2, | 643 | .nrprocs = RPCB_HIGHPROC_2, |
@@ -621,7 +666,7 @@ static struct rpc_version *rpcb_version[] = { | |||
621 | 666 | ||
622 | static struct rpc_stat rpcb_stats; | 667 | static struct rpc_stat rpcb_stats; |
623 | 668 | ||
624 | struct rpc_program rpcb_program = { | 669 | static struct rpc_program rpcb_program = { |
625 | .name = "rpcbind", | 670 | .name = "rpcbind", |
626 | .number = RPCBIND_PROGRAM, | 671 | .number = RPCBIND_PROGRAM, |
627 | .nrvers = ARRAY_SIZE(rpcb_version), | 672 | .nrvers = ARRAY_SIZE(rpcb_version), |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 954d7ec86c7e..3c773c53e12e 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -777,6 +777,7 @@ void *rpc_malloc(struct rpc_task *task, size_t size) | |||
777 | task->tk_pid, size, buf); | 777 | task->tk_pid, size, buf); |
778 | return &buf->data; | 778 | return &buf->data; |
779 | } | 779 | } |
780 | EXPORT_SYMBOL_GPL(rpc_malloc); | ||
780 | 781 | ||
781 | /** | 782 | /** |
782 | * rpc_free - free buffer allocated via rpc_malloc | 783 | * rpc_free - free buffer allocated via rpc_malloc |
@@ -802,6 +803,7 @@ void rpc_free(void *buffer) | |||
802 | else | 803 | else |
803 | kfree(buf); | 804 | kfree(buf); |
804 | } | 805 | } |
806 | EXPORT_SYMBOL_GPL(rpc_free); | ||
805 | 807 | ||
806 | /* | 808 | /* |
807 | * Creation and deletion of RPC task structures | 809 | * Creation and deletion of RPC task structures |
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 1d377d1ab7f4..97ac45f034d6 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c | |||
@@ -34,6 +34,7 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) | |||
34 | desc->offset += len; | 34 | desc->offset += len; |
35 | return len; | 35 | return len; |
36 | } | 36 | } |
37 | EXPORT_SYMBOL_GPL(xdr_skb_read_bits); | ||
37 | 38 | ||
38 | /** | 39 | /** |
39 | * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer | 40 | * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer |
@@ -137,6 +138,7 @@ copy_tail: | |||
137 | out: | 138 | out: |
138 | return copied; | 139 | return copied; |
139 | } | 140 | } |
141 | EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb); | ||
140 | 142 | ||
141 | /** | 143 | /** |
142 | * csum_partial_copy_to_xdr - checksum and copy data | 144 | * csum_partial_copy_to_xdr - checksum and copy data |
@@ -179,3 +181,4 @@ no_checksum: | |||
179 | return -1; | 181 | return -1; |
180 | return 0; | 182 | return 0; |
181 | } | 183 | } |
184 | EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr); | ||
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 384c4ad5ab86..33d89e842c85 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -20,7 +20,7 @@ | |||
20 | #include <linux/sunrpc/auth.h> | 20 | #include <linux/sunrpc/auth.h> |
21 | #include <linux/workqueue.h> | 21 | #include <linux/workqueue.h> |
22 | #include <linux/sunrpc/rpc_pipe_fs.h> | 22 | #include <linux/sunrpc/rpc_pipe_fs.h> |
23 | 23 | #include <linux/sunrpc/xprtsock.h> | |
24 | 24 | ||
25 | /* RPC scheduler */ | 25 | /* RPC scheduler */ |
26 | EXPORT_SYMBOL(rpc_execute); | 26 | EXPORT_SYMBOL(rpc_execute); |
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index 8142fdb8a930..31becbf09263 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #include <linux/types.h> | 18 | #include <linux/types.h> |
19 | #include <linux/unistd.h> | 19 | #include <linux/unistd.h> |
20 | #include <linux/module.h> | ||
20 | 21 | ||
21 | #include <linux/sunrpc/clnt.h> | 22 | #include <linux/sunrpc/clnt.h> |
22 | 23 | ||
@@ -40,6 +41,7 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) | |||
40 | rt->ntimeouts[i] = 0; | 41 | rt->ntimeouts[i] = 0; |
41 | } | 42 | } |
42 | } | 43 | } |
44 | EXPORT_SYMBOL_GPL(rpc_init_rtt); | ||
43 | 45 | ||
44 | /* | 46 | /* |
45 | * NB: When computing the smoothed RTT and standard deviation, | 47 | * NB: When computing the smoothed RTT and standard deviation, |
@@ -75,6 +77,7 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) | |||
75 | if (*sdrtt < RPC_RTO_MIN) | 77 | if (*sdrtt < RPC_RTO_MIN) |
76 | *sdrtt = RPC_RTO_MIN; | 78 | *sdrtt = RPC_RTO_MIN; |
77 | } | 79 | } |
80 | EXPORT_SYMBOL_GPL(rpc_update_rtt); | ||
78 | 81 | ||
79 | /* | 82 | /* |
80 | * Estimate rto for an nfs rpc sent via. an unreliable datagram. | 83 | * Estimate rto for an nfs rpc sent via. an unreliable datagram. |
@@ -103,3 +106,4 @@ rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) | |||
103 | 106 | ||
104 | return res; | 107 | return res; |
105 | } | 108 | } |
109 | EXPORT_SYMBOL_GPL(rpc_calc_rto); | ||
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c8c2edccad7e..282a9a2ec90c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -62,6 +62,9 @@ static inline void do_xprt_reserve(struct rpc_task *); | |||
62 | static void xprt_connect_status(struct rpc_task *task); | 62 | static void xprt_connect_status(struct rpc_task *task); |
63 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); | 63 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); |
64 | 64 | ||
65 | static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED; | ||
66 | static LIST_HEAD(xprt_list); | ||
67 | |||
65 | /* | 68 | /* |
66 | * The transport code maintains an estimate on the maximum number of out- | 69 | * The transport code maintains an estimate on the maximum number of out- |
67 | * standing RPC requests, using a smoothed version of the congestion | 70 | * standing RPC requests, using a smoothed version of the congestion |
@@ -81,6 +84,78 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); | |||
81 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) | 84 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) |
82 | 85 | ||
83 | /** | 86 | /** |
87 | * xprt_register_transport - register a transport implementation | ||
88 | * @transport: transport to register | ||
89 | * | ||
90 | * If a transport implementation is loaded as a kernel module, it can | ||
91 | * call this interface to make itself known to the RPC client. | ||
92 | * | ||
93 | * Returns: | ||
94 | * 0: transport successfully registered | ||
95 | * -EEXIST: transport already registered | ||
96 | * -EINVAL: transport module being unloaded | ||
97 | */ | ||
98 | int xprt_register_transport(struct xprt_class *transport) | ||
99 | { | ||
100 | struct xprt_class *t; | ||
101 | int result; | ||
102 | |||
103 | result = -EEXIST; | ||
104 | spin_lock(&xprt_list_lock); | ||
105 | list_for_each_entry(t, &xprt_list, list) { | ||
106 | /* don't register the same transport class twice */ | ||
107 | if (t->ident == transport->ident) | ||
108 | goto out; | ||
109 | } | ||
110 | |||
111 | result = -EINVAL; | ||
112 | if (try_module_get(THIS_MODULE)) { | ||
113 | list_add_tail(&transport->list, &xprt_list); | ||
114 | printk(KERN_INFO "RPC: Registered %s transport module.\n", | ||
115 | transport->name); | ||
116 | result = 0; | ||
117 | } | ||
118 | |||
119 | out: | ||
120 | spin_unlock(&xprt_list_lock); | ||
121 | return result; | ||
122 | } | ||
123 | EXPORT_SYMBOL_GPL(xprt_register_transport); | ||
124 | |||
125 | /** | ||
126 | * xprt_unregister_transport - unregister a transport implementation | ||
127 | * transport: transport to unregister | ||
128 | * | ||
129 | * Returns: | ||
130 | * 0: transport successfully unregistered | ||
131 | * -ENOENT: transport never registered | ||
132 | */ | ||
133 | int xprt_unregister_transport(struct xprt_class *transport) | ||
134 | { | ||
135 | struct xprt_class *t; | ||
136 | int result; | ||
137 | |||
138 | result = 0; | ||
139 | spin_lock(&xprt_list_lock); | ||
140 | list_for_each_entry(t, &xprt_list, list) { | ||
141 | if (t == transport) { | ||
142 | printk(KERN_INFO | ||
143 | "RPC: Unregistered %s transport module.\n", | ||
144 | transport->name); | ||
145 | list_del_init(&transport->list); | ||
146 | module_put(THIS_MODULE); | ||
147 | goto out; | ||
148 | } | ||
149 | } | ||
150 | result = -ENOENT; | ||
151 | |||
152 | out: | ||
153 | spin_unlock(&xprt_list_lock); | ||
154 | return result; | ||
155 | } | ||
156 | EXPORT_SYMBOL_GPL(xprt_unregister_transport); | ||
157 | |||
158 | /** | ||
84 | * xprt_reserve_xprt - serialize write access to transports | 159 | * xprt_reserve_xprt - serialize write access to transports |
85 | * @task: task that is requesting access to the transport | 160 | * @task: task that is requesting access to the transport |
86 | * | 161 | * |
@@ -118,6 +193,7 @@ out_sleep: | |||
118 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | 193 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); |
119 | return 0; | 194 | return 0; |
120 | } | 195 | } |
196 | EXPORT_SYMBOL_GPL(xprt_reserve_xprt); | ||
121 | 197 | ||
122 | static void xprt_clear_locked(struct rpc_xprt *xprt) | 198 | static void xprt_clear_locked(struct rpc_xprt *xprt) |
123 | { | 199 | { |
@@ -167,6 +243,7 @@ out_sleep: | |||
167 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | 243 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); |
168 | return 0; | 244 | return 0; |
169 | } | 245 | } |
246 | EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); | ||
170 | 247 | ||
171 | static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | 248 | static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) |
172 | { | 249 | { |
@@ -246,6 +323,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) | |||
246 | __xprt_lock_write_next(xprt); | 323 | __xprt_lock_write_next(xprt); |
247 | } | 324 | } |
248 | } | 325 | } |
326 | EXPORT_SYMBOL_GPL(xprt_release_xprt); | ||
249 | 327 | ||
250 | /** | 328 | /** |
251 | * xprt_release_xprt_cong - allow other requests to use a transport | 329 | * xprt_release_xprt_cong - allow other requests to use a transport |
@@ -262,6 +340,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) | |||
262 | __xprt_lock_write_next_cong(xprt); | 340 | __xprt_lock_write_next_cong(xprt); |
263 | } | 341 | } |
264 | } | 342 | } |
343 | EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); | ||
265 | 344 | ||
266 | static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) | 345 | static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) |
267 | { | 346 | { |
@@ -314,6 +393,7 @@ void xprt_release_rqst_cong(struct rpc_task *task) | |||
314 | { | 393 | { |
315 | __xprt_put_cong(task->tk_xprt, task->tk_rqstp); | 394 | __xprt_put_cong(task->tk_xprt, task->tk_rqstp); |
316 | } | 395 | } |
396 | EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); | ||
317 | 397 | ||
318 | /** | 398 | /** |
319 | * xprt_adjust_cwnd - adjust transport congestion window | 399 | * xprt_adjust_cwnd - adjust transport congestion window |
@@ -345,6 +425,7 @@ void xprt_adjust_cwnd(struct rpc_task *task, int result) | |||
345 | xprt->cwnd = cwnd; | 425 | xprt->cwnd = cwnd; |
346 | __xprt_put_cong(xprt, req); | 426 | __xprt_put_cong(xprt, req); |
347 | } | 427 | } |
428 | EXPORT_SYMBOL_GPL(xprt_adjust_cwnd); | ||
348 | 429 | ||
349 | /** | 430 | /** |
350 | * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue | 431 | * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue |
@@ -359,6 +440,7 @@ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) | |||
359 | else | 440 | else |
360 | rpc_wake_up(&xprt->pending); | 441 | rpc_wake_up(&xprt->pending); |
361 | } | 442 | } |
443 | EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); | ||
362 | 444 | ||
363 | /** | 445 | /** |
364 | * xprt_wait_for_buffer_space - wait for transport output buffer to clear | 446 | * xprt_wait_for_buffer_space - wait for transport output buffer to clear |
@@ -373,6 +455,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task) | |||
373 | task->tk_timeout = req->rq_timeout; | 455 | task->tk_timeout = req->rq_timeout; |
374 | rpc_sleep_on(&xprt->pending, task, NULL, NULL); | 456 | rpc_sleep_on(&xprt->pending, task, NULL, NULL); |
375 | } | 457 | } |
458 | EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); | ||
376 | 459 | ||
377 | /** | 460 | /** |
378 | * xprt_write_space - wake the task waiting for transport output buffer space | 461 | * xprt_write_space - wake the task waiting for transport output buffer space |
@@ -393,6 +476,7 @@ void xprt_write_space(struct rpc_xprt *xprt) | |||
393 | } | 476 | } |
394 | spin_unlock_bh(&xprt->transport_lock); | 477 | spin_unlock_bh(&xprt->transport_lock); |
395 | } | 478 | } |
479 | EXPORT_SYMBOL_GPL(xprt_write_space); | ||
396 | 480 | ||
397 | /** | 481 | /** |
398 | * xprt_set_retrans_timeout_def - set a request's retransmit timeout | 482 | * xprt_set_retrans_timeout_def - set a request's retransmit timeout |
@@ -406,6 +490,7 @@ void xprt_set_retrans_timeout_def(struct rpc_task *task) | |||
406 | { | 490 | { |
407 | task->tk_timeout = task->tk_rqstp->rq_timeout; | 491 | task->tk_timeout = task->tk_rqstp->rq_timeout; |
408 | } | 492 | } |
493 | EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def); | ||
409 | 494 | ||
410 | /* | 495 | /* |
411 | * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout | 496 | * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout |
@@ -425,6 +510,7 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task) | |||
425 | if (task->tk_timeout > max_timeout || task->tk_timeout == 0) | 510 | if (task->tk_timeout > max_timeout || task->tk_timeout == 0) |
426 | task->tk_timeout = max_timeout; | 511 | task->tk_timeout = max_timeout; |
427 | } | 512 | } |
513 | EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt); | ||
428 | 514 | ||
429 | static void xprt_reset_majortimeo(struct rpc_rqst *req) | 515 | static void xprt_reset_majortimeo(struct rpc_rqst *req) |
430 | { | 516 | { |
@@ -500,6 +586,7 @@ void xprt_disconnect(struct rpc_xprt *xprt) | |||
500 | xprt_wake_pending_tasks(xprt, -ENOTCONN); | 586 | xprt_wake_pending_tasks(xprt, -ENOTCONN); |
501 | spin_unlock_bh(&xprt->transport_lock); | 587 | spin_unlock_bh(&xprt->transport_lock); |
502 | } | 588 | } |
589 | EXPORT_SYMBOL_GPL(xprt_disconnect); | ||
503 | 590 | ||
504 | static void | 591 | static void |
505 | xprt_init_autodisconnect(unsigned long data) | 592 | xprt_init_autodisconnect(unsigned long data) |
@@ -610,6 +697,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) | |||
610 | xprt->stat.bad_xids++; | 697 | xprt->stat.bad_xids++; |
611 | return NULL; | 698 | return NULL; |
612 | } | 699 | } |
700 | EXPORT_SYMBOL_GPL(xprt_lookup_rqst); | ||
613 | 701 | ||
614 | /** | 702 | /** |
615 | * xprt_update_rtt - update an RPC client's RTT state after receiving a reply | 703 | * xprt_update_rtt - update an RPC client's RTT state after receiving a reply |
@@ -629,6 +717,7 @@ void xprt_update_rtt(struct rpc_task *task) | |||
629 | rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); | 717 | rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); |
630 | } | 718 | } |
631 | } | 719 | } |
720 | EXPORT_SYMBOL_GPL(xprt_update_rtt); | ||
632 | 721 | ||
633 | /** | 722 | /** |
634 | * xprt_complete_rqst - called when reply processing is complete | 723 | * xprt_complete_rqst - called when reply processing is complete |
@@ -653,6 +742,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) | |||
653 | req->rq_received = req->rq_private_buf.len = copied; | 742 | req->rq_received = req->rq_private_buf.len = copied; |
654 | rpc_wake_up_task(task); | 743 | rpc_wake_up_task(task); |
655 | } | 744 | } |
745 | EXPORT_SYMBOL_GPL(xprt_complete_rqst); | ||
656 | 746 | ||
657 | static void xprt_timer(struct rpc_task *task) | 747 | static void xprt_timer(struct rpc_task *task) |
658 | { | 748 | { |
@@ -889,23 +979,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i | |||
889 | * @args: rpc transport creation arguments | 979 | * @args: rpc transport creation arguments |
890 | * | 980 | * |
891 | */ | 981 | */ |
892 | struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args) | 982 | struct rpc_xprt *xprt_create_transport(struct xprt_create *args) |
893 | { | 983 | { |
894 | struct rpc_xprt *xprt; | 984 | struct rpc_xprt *xprt; |
895 | struct rpc_rqst *req; | 985 | struct rpc_rqst *req; |
986 | struct xprt_class *t; | ||
896 | 987 | ||
897 | switch (args->proto) { | 988 | spin_lock(&xprt_list_lock); |
898 | case IPPROTO_UDP: | 989 | list_for_each_entry(t, &xprt_list, list) { |
899 | xprt = xs_setup_udp(args); | 990 | if (t->ident == args->ident) { |
900 | break; | 991 | spin_unlock(&xprt_list_lock); |
901 | case IPPROTO_TCP: | 992 | goto found; |
902 | xprt = xs_setup_tcp(args); | 993 | } |
903 | break; | ||
904 | default: | ||
905 | printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", | ||
906 | args->proto); | ||
907 | return ERR_PTR(-EIO); | ||
908 | } | 994 | } |
995 | spin_unlock(&xprt_list_lock); | ||
996 | printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); | ||
997 | return ERR_PTR(-EIO); | ||
998 | |||
999 | found: | ||
1000 | xprt = t->setup(args); | ||
909 | if (IS_ERR(xprt)) { | 1001 | if (IS_ERR(xprt)) { |
910 | dprintk("RPC: xprt_create_transport: failed, %ld\n", | 1002 | dprintk("RPC: xprt_create_transport: failed, %ld\n", |
911 | -PTR_ERR(xprt)); | 1003 | -PTR_ERR(xprt)); |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile new file mode 100644 index 000000000000..264f0feeb513 --- /dev/null +++ b/net/sunrpc/xprtrdma/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o | ||
2 | |||
3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | ||
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c new file mode 100644 index 000000000000..12db63580427 --- /dev/null +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -0,0 +1,868 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | /* | ||
41 | * rpc_rdma.c | ||
42 | * | ||
43 | * This file contains the guts of the RPC RDMA protocol, and | ||
44 | * does marshaling/unmarshaling, etc. It is also where interfacing | ||
45 | * to the Linux RPC framework lives. | ||
46 | */ | ||
47 | |||
48 | #include "xprt_rdma.h" | ||
49 | |||
50 | #include <linux/highmem.h> | ||
51 | |||
52 | #ifdef RPC_DEBUG | ||
53 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
54 | #endif | ||
55 | |||
56 | enum rpcrdma_chunktype { | ||
57 | rpcrdma_noch = 0, | ||
58 | rpcrdma_readch, | ||
59 | rpcrdma_areadch, | ||
60 | rpcrdma_writech, | ||
61 | rpcrdma_replych | ||
62 | }; | ||
63 | |||
64 | #ifdef RPC_DEBUG | ||
65 | static const char transfertypes[][12] = { | ||
66 | "pure inline", /* no chunks */ | ||
67 | " read chunk", /* some argument via rdma read */ | ||
68 | "*read chunk", /* entire request via rdma read */ | ||
69 | "write chunk", /* some result via rdma write */ | ||
70 | "reply chunk" /* entire reply via rdma write */ | ||
71 | }; | ||
72 | #endif | ||
73 | |||
74 | /* | ||
75 | * Chunk assembly from upper layer xdr_buf. | ||
76 | * | ||
77 | * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk | ||
78 | * elements. Segments are then coalesced when registered, if possible | ||
79 | * within the selected memreg mode. | ||
80 | * | ||
81 | * Note, this routine is never called if the connection's memory | ||
82 | * registration strategy is 0 (bounce buffers). | ||
83 | */ | ||
84 | |||
85 | static int | ||
86 | rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos, | ||
87 | enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) | ||
88 | { | ||
89 | int len, n = 0, p; | ||
90 | |||
91 | if (pos == 0 && xdrbuf->head[0].iov_len) { | ||
92 | seg[n].mr_page = NULL; | ||
93 | seg[n].mr_offset = xdrbuf->head[0].iov_base; | ||
94 | seg[n].mr_len = xdrbuf->head[0].iov_len; | ||
95 | pos += xdrbuf->head[0].iov_len; | ||
96 | ++n; | ||
97 | } | ||
98 | |||
99 | if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { | ||
100 | if (n == nsegs) | ||
101 | return 0; | ||
102 | seg[n].mr_page = xdrbuf->pages[0]; | ||
103 | seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; | ||
104 | seg[n].mr_len = min_t(u32, | ||
105 | PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); | ||
106 | len = xdrbuf->page_len - seg[n].mr_len; | ||
107 | pos += len; | ||
108 | ++n; | ||
109 | p = 1; | ||
110 | while (len > 0) { | ||
111 | if (n == nsegs) | ||
112 | return 0; | ||
113 | seg[n].mr_page = xdrbuf->pages[p]; | ||
114 | seg[n].mr_offset = NULL; | ||
115 | seg[n].mr_len = min_t(u32, PAGE_SIZE, len); | ||
116 | len -= seg[n].mr_len; | ||
117 | ++n; | ||
118 | ++p; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) { | ||
123 | if (n == nsegs) | ||
124 | return 0; | ||
125 | seg[n].mr_page = NULL; | ||
126 | seg[n].mr_offset = xdrbuf->tail[0].iov_base; | ||
127 | seg[n].mr_len = xdrbuf->tail[0].iov_len; | ||
128 | pos += xdrbuf->tail[0].iov_len; | ||
129 | ++n; | ||
130 | } | ||
131 | |||
132 | if (pos < xdrbuf->len) | ||
133 | dprintk("RPC: %s: marshaled only %d of %d\n", | ||
134 | __func__, pos, xdrbuf->len); | ||
135 | |||
136 | return n; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Create read/write chunk lists, and reply chunks, for RDMA | ||
141 | * | ||
142 | * Assume check against THRESHOLD has been done, and chunks are required. | ||
143 | * Assume only encoding one list entry for read|write chunks. The NFSv3 | ||
144 | * protocol is simple enough to allow this as it only has a single "bulk | ||
145 | * result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The | ||
146 | * RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.) | ||
147 | * | ||
148 | * When used for a single reply chunk (which is a special write | ||
149 | * chunk used for the entire reply, rather than just the data), it | ||
150 | * is used primarily for READDIR and READLINK which would otherwise | ||
151 | * be severely size-limited by a small rdma inline read max. The server | ||
152 | * response will come back as an RDMA Write, followed by a message | ||
153 | * of type RDMA_NOMSG carrying the xid and length. As a result, reply | ||
154 | * chunks do not provide data alignment, however they do not require | ||
155 | * "fixup" (moving the response to the upper layer buffer) either. | ||
156 | * | ||
157 | * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): | ||
158 | * | ||
159 | * Read chunklist (a linked list): | ||
160 | * N elements, position P (same P for all chunks of same arg!): | ||
161 | * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 | ||
162 | * | ||
163 | * Write chunklist (a list of (one) counted array): | ||
164 | * N elements: | ||
165 | * 1 - N - HLOO - HLOO - ... - HLOO - 0 | ||
166 | * | ||
167 | * Reply chunk (a counted array): | ||
168 | * N elements: | ||
169 | * 1 - N - HLOO - HLOO - ... - HLOO | ||
170 | */ | ||
171 | |||
172 | static unsigned int | ||
173 | rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | ||
174 | struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) | ||
175 | { | ||
176 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
177 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt); | ||
178 | int nsegs, nchunks = 0; | ||
179 | int pos; | ||
180 | struct rpcrdma_mr_seg *seg = req->rl_segments; | ||
181 | struct rpcrdma_read_chunk *cur_rchunk = NULL; | ||
182 | struct rpcrdma_write_array *warray = NULL; | ||
183 | struct rpcrdma_write_chunk *cur_wchunk = NULL; | ||
184 | u32 *iptr = headerp->rm_body.rm_chunks; | ||
185 | |||
186 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { | ||
187 | /* a read chunk - server will RDMA Read our memory */ | ||
188 | cur_rchunk = (struct rpcrdma_read_chunk *) iptr; | ||
189 | } else { | ||
190 | /* a write or reply chunk - server will RDMA Write our memory */ | ||
191 | *iptr++ = xdr_zero; /* encode a NULL read chunk list */ | ||
192 | if (type == rpcrdma_replych) | ||
193 | *iptr++ = xdr_zero; /* a NULL write chunk list */ | ||
194 | warray = (struct rpcrdma_write_array *) iptr; | ||
195 | cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1); | ||
196 | } | ||
197 | |||
198 | if (type == rpcrdma_replych || type == rpcrdma_areadch) | ||
199 | pos = 0; | ||
200 | else | ||
201 | pos = target->head[0].iov_len; | ||
202 | |||
203 | nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); | ||
204 | if (nsegs == 0) | ||
205 | return 0; | ||
206 | |||
207 | do { | ||
208 | /* bind/register the memory, then build chunk from result. */ | ||
209 | int n = rpcrdma_register_external(seg, nsegs, | ||
210 | cur_wchunk != NULL, r_xprt); | ||
211 | if (n <= 0) | ||
212 | goto out; | ||
213 | if (cur_rchunk) { /* read */ | ||
214 | cur_rchunk->rc_discrim = xdr_one; | ||
215 | /* all read chunks have the same "position" */ | ||
216 | cur_rchunk->rc_position = htonl(pos); | ||
217 | cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey); | ||
218 | cur_rchunk->rc_target.rs_length = htonl(seg->mr_len); | ||
219 | xdr_encode_hyper( | ||
220 | (u32 *)&cur_rchunk->rc_target.rs_offset, | ||
221 | seg->mr_base); | ||
222 | dprintk("RPC: %s: read chunk " | ||
223 | "elem %d@0x%llx:0x%x pos %d (%s)\n", __func__, | ||
224 | seg->mr_len, seg->mr_base, seg->mr_rkey, pos, | ||
225 | n < nsegs ? "more" : "last"); | ||
226 | cur_rchunk++; | ||
227 | r_xprt->rx_stats.read_chunk_count++; | ||
228 | } else { /* write/reply */ | ||
229 | cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey); | ||
230 | cur_wchunk->wc_target.rs_length = htonl(seg->mr_len); | ||
231 | xdr_encode_hyper( | ||
232 | (u32 *)&cur_wchunk->wc_target.rs_offset, | ||
233 | seg->mr_base); | ||
234 | dprintk("RPC: %s: %s chunk " | ||
235 | "elem %d@0x%llx:0x%x (%s)\n", __func__, | ||
236 | (type == rpcrdma_replych) ? "reply" : "write", | ||
237 | seg->mr_len, seg->mr_base, seg->mr_rkey, | ||
238 | n < nsegs ? "more" : "last"); | ||
239 | cur_wchunk++; | ||
240 | if (type == rpcrdma_replych) | ||
241 | r_xprt->rx_stats.reply_chunk_count++; | ||
242 | else | ||
243 | r_xprt->rx_stats.write_chunk_count++; | ||
244 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; | ||
245 | } | ||
246 | nchunks++; | ||
247 | seg += n; | ||
248 | nsegs -= n; | ||
249 | } while (nsegs); | ||
250 | |||
251 | /* success. all failures return above */ | ||
252 | req->rl_nchunks = nchunks; | ||
253 | |||
254 | BUG_ON(nchunks == 0); | ||
255 | |||
256 | /* | ||
257 | * finish off header. If write, marshal discrim and nchunks. | ||
258 | */ | ||
259 | if (cur_rchunk) { | ||
260 | iptr = (u32 *) cur_rchunk; | ||
261 | *iptr++ = xdr_zero; /* finish the read chunk list */ | ||
262 | *iptr++ = xdr_zero; /* encode a NULL write chunk list */ | ||
263 | *iptr++ = xdr_zero; /* encode a NULL reply chunk */ | ||
264 | } else { | ||
265 | warray->wc_discrim = xdr_one; | ||
266 | warray->wc_nchunks = htonl(nchunks); | ||
267 | iptr = (u32 *) cur_wchunk; | ||
268 | if (type == rpcrdma_writech) { | ||
269 | *iptr++ = xdr_zero; /* finish the write chunk list */ | ||
270 | *iptr++ = xdr_zero; /* encode a NULL reply chunk */ | ||
271 | } | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Return header size. | ||
276 | */ | ||
277 | return (unsigned char *)iptr - (unsigned char *)headerp; | ||
278 | |||
279 | out: | ||
280 | for (pos = 0; nchunks--;) | ||
281 | pos += rpcrdma_deregister_external( | ||
282 | &req->rl_segments[pos], r_xprt, NULL); | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * Copy write data inline. | ||
288 | * This function is used for "small" requests. Data which is passed | ||
289 | * to RPC via iovecs (or page list) is copied directly into the | ||
290 | * pre-registered memory buffer for this request. For small amounts | ||
291 | * of data, this is efficient. The cutoff value is tunable. | ||
292 | */ | ||
293 | static int | ||
294 | rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) | ||
295 | { | ||
296 | int i, npages, curlen; | ||
297 | int copy_len; | ||
298 | unsigned char *srcp, *destp; | ||
299 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | ||
300 | |||
301 | destp = rqst->rq_svec[0].iov_base; | ||
302 | curlen = rqst->rq_svec[0].iov_len; | ||
303 | destp += curlen; | ||
304 | /* | ||
305 | * Do optional padding where it makes sense. Alignment of write | ||
306 | * payload can help the server, if our setting is accurate. | ||
307 | */ | ||
308 | pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/); | ||
309 | if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH) | ||
310 | pad = 0; /* don't pad this request */ | ||
311 | |||
312 | dprintk("RPC: %s: pad %d destp 0x%p len %d hdrlen %d\n", | ||
313 | __func__, pad, destp, rqst->rq_slen, curlen); | ||
314 | |||
315 | copy_len = rqst->rq_snd_buf.page_len; | ||
316 | r_xprt->rx_stats.pullup_copy_count += copy_len; | ||
317 | npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; | ||
318 | for (i = 0; copy_len && i < npages; i++) { | ||
319 | if (i == 0) | ||
320 | curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base; | ||
321 | else | ||
322 | curlen = PAGE_SIZE; | ||
323 | if (curlen > copy_len) | ||
324 | curlen = copy_len; | ||
325 | dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", | ||
326 | __func__, i, destp, copy_len, curlen); | ||
327 | srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], | ||
328 | KM_SKB_SUNRPC_DATA); | ||
329 | if (i == 0) | ||
330 | memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen); | ||
331 | else | ||
332 | memcpy(destp, srcp, curlen); | ||
333 | kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); | ||
334 | rqst->rq_svec[0].iov_len += curlen; | ||
335 | destp += curlen; | ||
336 | copy_len -= curlen; | ||
337 | } | ||
338 | if (rqst->rq_snd_buf.tail[0].iov_len) { | ||
339 | curlen = rqst->rq_snd_buf.tail[0].iov_len; | ||
340 | if (destp != rqst->rq_snd_buf.tail[0].iov_base) { | ||
341 | memcpy(destp, | ||
342 | rqst->rq_snd_buf.tail[0].iov_base, curlen); | ||
343 | r_xprt->rx_stats.pullup_copy_count += curlen; | ||
344 | } | ||
345 | dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n", | ||
346 | __func__, destp, copy_len, curlen); | ||
347 | rqst->rq_svec[0].iov_len += curlen; | ||
348 | } | ||
349 | /* header now contains entire send message */ | ||
350 | return pad; | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * Marshal a request: the primary job of this routine is to choose | ||
355 | * the transfer modes. See comments below. | ||
356 | * | ||
357 | * Uses multiple RDMA IOVs for a request: | ||
358 | * [0] -- RPC RDMA header, which uses memory from the *start* of the | ||
359 | * preregistered buffer that already holds the RPC data in | ||
360 | * its middle. | ||
361 | * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. | ||
362 | * [2] -- optional padding. | ||
363 | * [3] -- if padded, header only in [1] and data here. | ||
364 | */ | ||
365 | |||
366 | int | ||
367 | rpcrdma_marshal_req(struct rpc_rqst *rqst) | ||
368 | { | ||
369 | struct rpc_xprt *xprt = rqst->rq_task->tk_xprt; | ||
370 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
371 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
372 | char *base; | ||
373 | size_t hdrlen, rpclen, padlen; | ||
374 | enum rpcrdma_chunktype rtype, wtype; | ||
375 | struct rpcrdma_msg *headerp; | ||
376 | |||
377 | /* | ||
378 | * rpclen gets amount of data in first buffer, which is the | ||
379 | * pre-registered buffer. | ||
380 | */ | ||
381 | base = rqst->rq_svec[0].iov_base; | ||
382 | rpclen = rqst->rq_svec[0].iov_len; | ||
383 | |||
384 | /* build RDMA header in private area at front */ | ||
385 | headerp = (struct rpcrdma_msg *) req->rl_base; | ||
386 | /* don't htonl XID, it's already done in request */ | ||
387 | headerp->rm_xid = rqst->rq_xid; | ||
388 | headerp->rm_vers = xdr_one; | ||
389 | headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); | ||
390 | headerp->rm_type = __constant_htonl(RDMA_MSG); | ||
391 | |||
392 | /* | ||
393 | * Chunks needed for results? | ||
394 | * | ||
395 | * o If the expected result is under the inline threshold, all ops | ||
396 | * return as inline (but see later). | ||
397 | * o Large non-read ops return as a single reply chunk. | ||
398 | * o Large read ops return data as write chunk(s), header as inline. | ||
399 | * | ||
400 | * Note: the NFS code sending down multiple result segments implies | ||
401 | * the op is one of read, readdir[plus], readlink or NFSv4 getacl. | ||
402 | */ | ||
403 | |||
404 | /* | ||
405 | * This code can handle read chunks, write chunks OR reply | ||
406 | * chunks -- only one type. If the request is too big to fit | ||
407 | * inline, then we will choose read chunks. If the request is | ||
408 | * a READ, then use write chunks to separate the file data | ||
409 | * into pages; otherwise use reply chunks. | ||
410 | */ | ||
411 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) | ||
412 | wtype = rpcrdma_noch; | ||
413 | else if (rqst->rq_rcv_buf.page_len == 0) | ||
414 | wtype = rpcrdma_replych; | ||
415 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | ||
416 | wtype = rpcrdma_writech; | ||
417 | else | ||
418 | wtype = rpcrdma_replych; | ||
419 | |||
420 | /* | ||
421 | * Chunks needed for arguments? | ||
422 | * | ||
423 | * o If the total request is under the inline threshold, all ops | ||
424 | * are sent as inline. | ||
425 | * o Large non-write ops are sent with the entire message as a | ||
426 | * single read chunk (protocol 0-position special case). | ||
427 | * o Large write ops transmit data as read chunk(s), header as | ||
428 | * inline. | ||
429 | * | ||
430 | * Note: the NFS code sending down multiple argument segments | ||
431 | * implies the op is a write. | ||
432 | * TBD check NFSv4 setacl | ||
433 | */ | ||
434 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) | ||
435 | rtype = rpcrdma_noch; | ||
436 | else if (rqst->rq_snd_buf.page_len == 0) | ||
437 | rtype = rpcrdma_areadch; | ||
438 | else | ||
439 | rtype = rpcrdma_readch; | ||
440 | |||
441 | /* The following simplification is not true forever */ | ||
442 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) | ||
443 | wtype = rpcrdma_noch; | ||
444 | BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch); | ||
445 | |||
446 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS && | ||
447 | (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) { | ||
448 | /* forced to "pure inline"? */ | ||
449 | dprintk("RPC: %s: too much data (%d/%d) for inline\n", | ||
450 | __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len); | ||
451 | return -1; | ||
452 | } | ||
453 | |||
454 | hdrlen = 28; /*sizeof *headerp;*/ | ||
455 | padlen = 0; | ||
456 | |||
457 | /* | ||
458 | * Pull up any extra send data into the preregistered buffer. | ||
459 | * When padding is in use and applies to the transfer, insert | ||
460 | * it and change the message type. | ||
461 | */ | ||
462 | if (rtype == rpcrdma_noch) { | ||
463 | |||
464 | padlen = rpcrdma_inline_pullup(rqst, | ||
465 | RPCRDMA_INLINE_PAD_VALUE(rqst)); | ||
466 | |||
467 | if (padlen) { | ||
468 | headerp->rm_type = __constant_htonl(RDMA_MSGP); | ||
469 | headerp->rm_body.rm_padded.rm_align = | ||
470 | htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); | ||
471 | headerp->rm_body.rm_padded.rm_thresh = | ||
472 | __constant_htonl(RPCRDMA_INLINE_PAD_THRESH); | ||
473 | headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; | ||
474 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; | ||
475 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; | ||
476 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ | ||
477 | BUG_ON(wtype != rpcrdma_noch); | ||
478 | |||
479 | } else { | ||
480 | headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; | ||
481 | headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; | ||
482 | headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero; | ||
483 | /* new length after pullup */ | ||
484 | rpclen = rqst->rq_svec[0].iov_len; | ||
485 | /* | ||
486 | * Currently we try to not actually use read inline. | ||
487 | * Reply chunks have the desirable property that | ||
488 | * they land, packed, directly in the target buffers | ||
489 | * without headers, so they require no fixup. The | ||
490 | * additional RDMA Write op sends the same amount | ||
491 | * of data, streams on-the-wire and adds no overhead | ||
492 | * on receive. Therefore, we request a reply chunk | ||
493 | * for non-writes wherever feasible and efficient. | ||
494 | */ | ||
495 | if (wtype == rpcrdma_noch && | ||
496 | r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER) | ||
497 | wtype = rpcrdma_replych; | ||
498 | } | ||
499 | } | ||
500 | |||
501 | /* | ||
502 | * Marshal chunks. This routine will return the header length | ||
503 | * consumed by marshaling. | ||
504 | */ | ||
505 | if (rtype != rpcrdma_noch) { | ||
506 | hdrlen = rpcrdma_create_chunks(rqst, | ||
507 | &rqst->rq_snd_buf, headerp, rtype); | ||
508 | wtype = rtype; /* simplify dprintk */ | ||
509 | |||
510 | } else if (wtype != rpcrdma_noch) { | ||
511 | hdrlen = rpcrdma_create_chunks(rqst, | ||
512 | &rqst->rq_rcv_buf, headerp, wtype); | ||
513 | } | ||
514 | |||
515 | if (hdrlen == 0) | ||
516 | return -1; | ||
517 | |||
518 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" | ||
519 | " headerp 0x%p base 0x%p lkey 0x%x\n", | ||
520 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, | ||
521 | headerp, base, req->rl_iov.lkey); | ||
522 | |||
523 | /* | ||
524 | * initialize send_iov's - normally only two: rdma chunk header and | ||
525 | * single preregistered RPC header buffer, but if padding is present, | ||
526 | * then use a preregistered (and zeroed) pad buffer between the RPC | ||
527 | * header and any write data. In all non-rdma cases, any following | ||
528 | * data has been copied into the RPC header buffer. | ||
529 | */ | ||
530 | req->rl_send_iov[0].addr = req->rl_iov.addr; | ||
531 | req->rl_send_iov[0].length = hdrlen; | ||
532 | req->rl_send_iov[0].lkey = req->rl_iov.lkey; | ||
533 | |||
534 | req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base); | ||
535 | req->rl_send_iov[1].length = rpclen; | ||
536 | req->rl_send_iov[1].lkey = req->rl_iov.lkey; | ||
537 | |||
538 | req->rl_niovs = 2; | ||
539 | |||
540 | if (padlen) { | ||
541 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; | ||
542 | |||
543 | req->rl_send_iov[2].addr = ep->rep_pad.addr; | ||
544 | req->rl_send_iov[2].length = padlen; | ||
545 | req->rl_send_iov[2].lkey = ep->rep_pad.lkey; | ||
546 | |||
547 | req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen; | ||
548 | req->rl_send_iov[3].length = rqst->rq_slen - rpclen; | ||
549 | req->rl_send_iov[3].lkey = req->rl_iov.lkey; | ||
550 | |||
551 | req->rl_niovs = 4; | ||
552 | } | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | /* | ||
558 | * Chase down a received write or reply chunklist to get length | ||
559 | * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) | ||
560 | */ | ||
561 | static int | ||
562 | rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp) | ||
563 | { | ||
564 | unsigned int i, total_len; | ||
565 | struct rpcrdma_write_chunk *cur_wchunk; | ||
566 | |||
567 | i = ntohl(**iptrp); /* get array count */ | ||
568 | if (i > max) | ||
569 | return -1; | ||
570 | cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); | ||
571 | total_len = 0; | ||
572 | while (i--) { | ||
573 | struct rpcrdma_segment *seg = &cur_wchunk->wc_target; | ||
574 | ifdebug(FACILITY) { | ||
575 | u64 off; | ||
576 | xdr_decode_hyper((u32 *)&seg->rs_offset, &off); | ||
577 | dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", | ||
578 | __func__, | ||
579 | ntohl(seg->rs_length), | ||
580 | off, | ||
581 | ntohl(seg->rs_handle)); | ||
582 | } | ||
583 | total_len += ntohl(seg->rs_length); | ||
584 | ++cur_wchunk; | ||
585 | } | ||
586 | /* check and adjust for properly terminated write chunk */ | ||
587 | if (wrchunk) { | ||
588 | u32 *w = (u32 *) cur_wchunk; | ||
589 | if (*w++ != xdr_zero) | ||
590 | return -1; | ||
591 | cur_wchunk = (struct rpcrdma_write_chunk *) w; | ||
592 | } | ||
593 | if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) | ||
594 | return -1; | ||
595 | |||
596 | *iptrp = (u32 *) cur_wchunk; | ||
597 | return total_len; | ||
598 | } | ||
599 | |||
600 | /* | ||
601 | * Scatter inline received data back into provided iov's. | ||
602 | */ | ||
603 | static void | ||
604 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) | ||
605 | { | ||
606 | int i, npages, curlen, olen; | ||
607 | char *destp; | ||
608 | |||
609 | curlen = rqst->rq_rcv_buf.head[0].iov_len; | ||
610 | if (curlen > copy_len) { /* write chunk header fixup */ | ||
611 | curlen = copy_len; | ||
612 | rqst->rq_rcv_buf.head[0].iov_len = curlen; | ||
613 | } | ||
614 | |||
615 | dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", | ||
616 | __func__, srcp, copy_len, curlen); | ||
617 | |||
618 | /* Shift pointer for first receive segment only */ | ||
619 | rqst->rq_rcv_buf.head[0].iov_base = srcp; | ||
620 | srcp += curlen; | ||
621 | copy_len -= curlen; | ||
622 | |||
623 | olen = copy_len; | ||
624 | i = 0; | ||
625 | rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; | ||
626 | if (copy_len && rqst->rq_rcv_buf.page_len) { | ||
627 | npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + | ||
628 | rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; | ||
629 | for (; i < npages; i++) { | ||
630 | if (i == 0) | ||
631 | curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base; | ||
632 | else | ||
633 | curlen = PAGE_SIZE; | ||
634 | if (curlen > copy_len) | ||
635 | curlen = copy_len; | ||
636 | dprintk("RPC: %s: page %d" | ||
637 | " srcp 0x%p len %d curlen %d\n", | ||
638 | __func__, i, srcp, copy_len, curlen); | ||
639 | destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], | ||
640 | KM_SKB_SUNRPC_DATA); | ||
641 | if (i == 0) | ||
642 | memcpy(destp + rqst->rq_rcv_buf.page_base, | ||
643 | srcp, curlen); | ||
644 | else | ||
645 | memcpy(destp, srcp, curlen); | ||
646 | flush_dcache_page(rqst->rq_rcv_buf.pages[i]); | ||
647 | kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); | ||
648 | srcp += curlen; | ||
649 | copy_len -= curlen; | ||
650 | if (copy_len == 0) | ||
651 | break; | ||
652 | } | ||
653 | rqst->rq_rcv_buf.page_len = olen - copy_len; | ||
654 | } else | ||
655 | rqst->rq_rcv_buf.page_len = 0; | ||
656 | |||
657 | if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { | ||
658 | curlen = copy_len; | ||
659 | if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) | ||
660 | curlen = rqst->rq_rcv_buf.tail[0].iov_len; | ||
661 | if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) | ||
662 | memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); | ||
663 | dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", | ||
664 | __func__, srcp, copy_len, curlen); | ||
665 | rqst->rq_rcv_buf.tail[0].iov_len = curlen; | ||
666 | copy_len -= curlen; ++i; | ||
667 | } else | ||
668 | rqst->rq_rcv_buf.tail[0].iov_len = 0; | ||
669 | |||
670 | if (copy_len) | ||
671 | dprintk("RPC: %s: %d bytes in" | ||
672 | " %d extra segments (%d lost)\n", | ||
673 | __func__, olen, i, copy_len); | ||
674 | |||
675 | /* TBD avoid a warning from call_decode() */ | ||
676 | rqst->rq_private_buf = rqst->rq_rcv_buf; | ||
677 | } | ||
678 | |||
679 | /* | ||
680 | * This function is called when an async event is posted to | ||
681 | * the connection which changes the connection state. All it | ||
682 | * does at this point is mark the connection up/down, the rpc | ||
683 | * timers do the rest. | ||
684 | */ | ||
685 | void | ||
686 | rpcrdma_conn_func(struct rpcrdma_ep *ep) | ||
687 | { | ||
688 | struct rpc_xprt *xprt = ep->rep_xprt; | ||
689 | |||
690 | spin_lock_bh(&xprt->transport_lock); | ||
691 | if (ep->rep_connected > 0) { | ||
692 | if (!xprt_test_and_set_connected(xprt)) | ||
693 | xprt_wake_pending_tasks(xprt, 0); | ||
694 | } else { | ||
695 | if (xprt_test_and_clear_connected(xprt)) | ||
696 | xprt_wake_pending_tasks(xprt, ep->rep_connected); | ||
697 | } | ||
698 | spin_unlock_bh(&xprt->transport_lock); | ||
699 | } | ||
700 | |||
701 | /* | ||
702 | * This function is called when memory window unbind which we are waiting | ||
703 | * for completes. Just use rr_func (zeroed by upcall) to signal completion. | ||
704 | */ | ||
705 | static void | ||
706 | rpcrdma_unbind_func(struct rpcrdma_rep *rep) | ||
707 | { | ||
708 | wake_up(&rep->rr_unbind); | ||
709 | } | ||
710 | |||
711 | /* | ||
712 | * Called as a tasklet to do req/reply match and complete a request | ||
713 | * Errors must result in the RPC task either being awakened, or | ||
714 | * allowed to timeout, to discover the errors at that time. | ||
715 | */ | ||
716 | void | ||
717 | rpcrdma_reply_handler(struct rpcrdma_rep *rep) | ||
718 | { | ||
719 | struct rpcrdma_msg *headerp; | ||
720 | struct rpcrdma_req *req; | ||
721 | struct rpc_rqst *rqst; | ||
722 | struct rpc_xprt *xprt = rep->rr_xprt; | ||
723 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
724 | u32 *iptr; | ||
725 | int i, rdmalen, status; | ||
726 | |||
727 | /* Check status. If bad, signal disconnect and return rep to pool */ | ||
728 | if (rep->rr_len == ~0U) { | ||
729 | rpcrdma_recv_buffer_put(rep); | ||
730 | if (r_xprt->rx_ep.rep_connected == 1) { | ||
731 | r_xprt->rx_ep.rep_connected = -EIO; | ||
732 | rpcrdma_conn_func(&r_xprt->rx_ep); | ||
733 | } | ||
734 | return; | ||
735 | } | ||
736 | if (rep->rr_len < 28) { | ||
737 | dprintk("RPC: %s: short/invalid reply\n", __func__); | ||
738 | goto repost; | ||
739 | } | ||
740 | headerp = (struct rpcrdma_msg *) rep->rr_base; | ||
741 | if (headerp->rm_vers != xdr_one) { | ||
742 | dprintk("RPC: %s: invalid version %d\n", | ||
743 | __func__, ntohl(headerp->rm_vers)); | ||
744 | goto repost; | ||
745 | } | ||
746 | |||
747 | /* Get XID and try for a match. */ | ||
748 | spin_lock(&xprt->transport_lock); | ||
749 | rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); | ||
750 | if (rqst == NULL) { | ||
751 | spin_unlock(&xprt->transport_lock); | ||
752 | dprintk("RPC: %s: reply 0x%p failed " | ||
753 | "to match any request xid 0x%08x len %d\n", | ||
754 | __func__, rep, headerp->rm_xid, rep->rr_len); | ||
755 | repost: | ||
756 | r_xprt->rx_stats.bad_reply_count++; | ||
757 | rep->rr_func = rpcrdma_reply_handler; | ||
758 | if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) | ||
759 | rpcrdma_recv_buffer_put(rep); | ||
760 | |||
761 | return; | ||
762 | } | ||
763 | |||
764 | /* get request object */ | ||
765 | req = rpcr_to_rdmar(rqst); | ||
766 | |||
767 | dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" | ||
768 | " RPC request 0x%p xid 0x%08x\n", | ||
769 | __func__, rep, req, rqst, headerp->rm_xid); | ||
770 | |||
771 | BUG_ON(!req || req->rl_reply); | ||
772 | |||
773 | /* from here on, the reply is no longer an orphan */ | ||
774 | req->rl_reply = rep; | ||
775 | |||
776 | /* check for expected message types */ | ||
777 | /* The order of some of these tests is important. */ | ||
778 | switch (headerp->rm_type) { | ||
779 | case __constant_htonl(RDMA_MSG): | ||
780 | /* never expect read chunks */ | ||
781 | /* never expect reply chunks (two ways to check) */ | ||
782 | /* never expect write chunks without having offered RDMA */ | ||
783 | if (headerp->rm_body.rm_chunks[0] != xdr_zero || | ||
784 | (headerp->rm_body.rm_chunks[1] == xdr_zero && | ||
785 | headerp->rm_body.rm_chunks[2] != xdr_zero) || | ||
786 | (headerp->rm_body.rm_chunks[1] != xdr_zero && | ||
787 | req->rl_nchunks == 0)) | ||
788 | goto badheader; | ||
789 | if (headerp->rm_body.rm_chunks[1] != xdr_zero) { | ||
790 | /* count any expected write chunks in read reply */ | ||
791 | /* start at write chunk array count */ | ||
792 | iptr = &headerp->rm_body.rm_chunks[2]; | ||
793 | rdmalen = rpcrdma_count_chunks(rep, | ||
794 | req->rl_nchunks, 1, &iptr); | ||
795 | /* check for validity, and no reply chunk after */ | ||
796 | if (rdmalen < 0 || *iptr++ != xdr_zero) | ||
797 | goto badheader; | ||
798 | rep->rr_len -= | ||
799 | ((unsigned char *)iptr - (unsigned char *)headerp); | ||
800 | status = rep->rr_len + rdmalen; | ||
801 | r_xprt->rx_stats.total_rdma_reply += rdmalen; | ||
802 | } else { | ||
803 | /* else ordinary inline */ | ||
804 | iptr = (u32 *)((unsigned char *)headerp + 28); | ||
805 | rep->rr_len -= 28; /*sizeof *headerp;*/ | ||
806 | status = rep->rr_len; | ||
807 | } | ||
808 | /* Fix up the rpc results for upper layer */ | ||
809 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); | ||
810 | break; | ||
811 | |||
812 | case __constant_htonl(RDMA_NOMSG): | ||
813 | /* never expect read or write chunks, always reply chunks */ | ||
814 | if (headerp->rm_body.rm_chunks[0] != xdr_zero || | ||
815 | headerp->rm_body.rm_chunks[1] != xdr_zero || | ||
816 | headerp->rm_body.rm_chunks[2] != xdr_one || | ||
817 | req->rl_nchunks == 0) | ||
818 | goto badheader; | ||
819 | iptr = (u32 *)((unsigned char *)headerp + 28); | ||
820 | rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); | ||
821 | if (rdmalen < 0) | ||
822 | goto badheader; | ||
823 | r_xprt->rx_stats.total_rdma_reply += rdmalen; | ||
824 | /* Reply chunk buffer already is the reply vector - no fixup. */ | ||
825 | status = rdmalen; | ||
826 | break; | ||
827 | |||
828 | badheader: | ||
829 | default: | ||
830 | dprintk("%s: invalid rpcrdma reply header (type %d):" | ||
831 | " chunks[012] == %d %d %d" | ||
832 | " expected chunks <= %d\n", | ||
833 | __func__, ntohl(headerp->rm_type), | ||
834 | headerp->rm_body.rm_chunks[0], | ||
835 | headerp->rm_body.rm_chunks[1], | ||
836 | headerp->rm_body.rm_chunks[2], | ||
837 | req->rl_nchunks); | ||
838 | status = -EIO; | ||
839 | r_xprt->rx_stats.bad_reply_count++; | ||
840 | break; | ||
841 | } | ||
842 | |||
843 | /* If using mw bind, start the deregister process now. */ | ||
844 | /* (Note: if mr_free(), cannot perform it here, in tasklet context) */ | ||
845 | if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) { | ||
846 | case RPCRDMA_MEMWINDOWS: | ||
847 | for (i = 0; req->rl_nchunks-- > 1;) | ||
848 | i += rpcrdma_deregister_external( | ||
849 | &req->rl_segments[i], r_xprt, NULL); | ||
850 | /* Optionally wait (not here) for unbinds to complete */ | ||
851 | rep->rr_func = rpcrdma_unbind_func; | ||
852 | (void) rpcrdma_deregister_external(&req->rl_segments[i], | ||
853 | r_xprt, rep); | ||
854 | break; | ||
855 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
856 | for (i = 0; req->rl_nchunks--;) | ||
857 | i += rpcrdma_deregister_external(&req->rl_segments[i], | ||
858 | r_xprt, NULL); | ||
859 | break; | ||
860 | default: | ||
861 | break; | ||
862 | } | ||
863 | |||
864 | dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", | ||
865 | __func__, xprt, rqst, status); | ||
866 | xprt_complete_rqst(rqst->rq_task, status); | ||
867 | spin_unlock(&xprt->transport_lock); | ||
868 | } | ||
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c new file mode 100644 index 000000000000..dc55cc974c90 --- /dev/null +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -0,0 +1,800 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | /* | ||
41 | * transport.c | ||
42 | * | ||
43 | * This file contains the top-level implementation of an RPC RDMA | ||
44 | * transport. | ||
45 | * | ||
46 | * Naming convention: functions beginning with xprt_ are part of the | ||
47 | * transport switch. All others are RPC RDMA internal. | ||
48 | */ | ||
49 | |||
50 | #include <linux/module.h> | ||
51 | #include <linux/init.h> | ||
52 | #include <linux/seq_file.h> | ||
53 | |||
54 | #include "xprt_rdma.h" | ||
55 | |||
56 | #ifdef RPC_DEBUG | ||
57 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
58 | #endif | ||
59 | |||
60 | MODULE_LICENSE("Dual BSD/GPL"); | ||
61 | |||
62 | MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS"); | ||
63 | MODULE_AUTHOR("Network Appliance, Inc."); | ||
64 | |||
65 | /* | ||
66 | * tunables | ||
67 | */ | ||
68 | |||
69 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | ||
70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | ||
71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | ||
72 | static unsigned int xprt_rdma_inline_write_padding; | ||
73 | #if !RPCRDMA_PERSISTENT_REGISTRATION | ||
74 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ | ||
75 | #else | ||
76 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; | ||
77 | #endif | ||
78 | |||
79 | #ifdef RPC_DEBUG | ||
80 | |||
81 | static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; | ||
82 | static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; | ||
83 | static unsigned int zero; | ||
84 | static unsigned int max_padding = PAGE_SIZE; | ||
85 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; | ||
86 | static unsigned int max_memreg = RPCRDMA_LAST - 1; | ||
87 | |||
88 | static struct ctl_table_header *sunrpc_table_header; | ||
89 | |||
90 | static ctl_table xr_tunables_table[] = { | ||
91 | { | ||
92 | .ctl_name = CTL_SLOTTABLE_RDMA, | ||
93 | .procname = "rdma_slot_table_entries", | ||
94 | .data = &xprt_rdma_slot_table_entries, | ||
95 | .maxlen = sizeof(unsigned int), | ||
96 | .mode = 0644, | ||
97 | .proc_handler = &proc_dointvec_minmax, | ||
98 | .strategy = &sysctl_intvec, | ||
99 | .extra1 = &min_slot_table_size, | ||
100 | .extra2 = &max_slot_table_size | ||
101 | }, | ||
102 | { | ||
103 | .ctl_name = CTL_RDMA_MAXINLINEREAD, | ||
104 | .procname = "rdma_max_inline_read", | ||
105 | .data = &xprt_rdma_max_inline_read, | ||
106 | .maxlen = sizeof(unsigned int), | ||
107 | .mode = 0644, | ||
108 | .proc_handler = &proc_dointvec, | ||
109 | .strategy = &sysctl_intvec, | ||
110 | }, | ||
111 | { | ||
112 | .ctl_name = CTL_RDMA_MAXINLINEWRITE, | ||
113 | .procname = "rdma_max_inline_write", | ||
114 | .data = &xprt_rdma_max_inline_write, | ||
115 | .maxlen = sizeof(unsigned int), | ||
116 | .mode = 0644, | ||
117 | .proc_handler = &proc_dointvec, | ||
118 | .strategy = &sysctl_intvec, | ||
119 | }, | ||
120 | { | ||
121 | .ctl_name = CTL_RDMA_WRITEPADDING, | ||
122 | .procname = "rdma_inline_write_padding", | ||
123 | .data = &xprt_rdma_inline_write_padding, | ||
124 | .maxlen = sizeof(unsigned int), | ||
125 | .mode = 0644, | ||
126 | .proc_handler = &proc_dointvec_minmax, | ||
127 | .strategy = &sysctl_intvec, | ||
128 | .extra1 = &zero, | ||
129 | .extra2 = &max_padding, | ||
130 | }, | ||
131 | { | ||
132 | .ctl_name = CTL_RDMA_MEMREG, | ||
133 | .procname = "rdma_memreg_strategy", | ||
134 | .data = &xprt_rdma_memreg_strategy, | ||
135 | .maxlen = sizeof(unsigned int), | ||
136 | .mode = 0644, | ||
137 | .proc_handler = &proc_dointvec_minmax, | ||
138 | .strategy = &sysctl_intvec, | ||
139 | .extra1 = &min_memreg, | ||
140 | .extra2 = &max_memreg, | ||
141 | }, | ||
142 | { | ||
143 | .ctl_name = 0, | ||
144 | }, | ||
145 | }; | ||
146 | |||
147 | static ctl_table sunrpc_table[] = { | ||
148 | { | ||
149 | .ctl_name = CTL_SUNRPC, | ||
150 | .procname = "sunrpc", | ||
151 | .mode = 0555, | ||
152 | .child = xr_tunables_table | ||
153 | }, | ||
154 | { | ||
155 | .ctl_name = 0, | ||
156 | }, | ||
157 | }; | ||
158 | |||
159 | #endif | ||
160 | |||
161 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | ||
162 | |||
163 | static void | ||
164 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) | ||
165 | { | ||
166 | struct sockaddr_in *addr = (struct sockaddr_in *) | ||
167 | &rpcx_to_rdmad(xprt).addr; | ||
168 | char *buf; | ||
169 | |||
170 | buf = kzalloc(20, GFP_KERNEL); | ||
171 | if (buf) | ||
172 | snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); | ||
173 | xprt->address_strings[RPC_DISPLAY_ADDR] = buf; | ||
174 | |||
175 | buf = kzalloc(8, GFP_KERNEL); | ||
176 | if (buf) | ||
177 | snprintf(buf, 8, "%u", ntohs(addr->sin_port)); | ||
178 | xprt->address_strings[RPC_DISPLAY_PORT] = buf; | ||
179 | |||
180 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; | ||
181 | |||
182 | buf = kzalloc(48, GFP_KERNEL); | ||
183 | if (buf) | ||
184 | snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", | ||
185 | NIPQUAD(addr->sin_addr.s_addr), | ||
186 | ntohs(addr->sin_port), "rdma"); | ||
187 | xprt->address_strings[RPC_DISPLAY_ALL] = buf; | ||
188 | |||
189 | buf = kzalloc(10, GFP_KERNEL); | ||
190 | if (buf) | ||
191 | snprintf(buf, 10, "%02x%02x%02x%02x", | ||
192 | NIPQUAD(addr->sin_addr.s_addr)); | ||
193 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; | ||
194 | |||
195 | buf = kzalloc(8, GFP_KERNEL); | ||
196 | if (buf) | ||
197 | snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); | ||
198 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; | ||
199 | |||
200 | buf = kzalloc(30, GFP_KERNEL); | ||
201 | if (buf) | ||
202 | snprintf(buf, 30, NIPQUAD_FMT".%u.%u", | ||
203 | NIPQUAD(addr->sin_addr.s_addr), | ||
204 | ntohs(addr->sin_port) >> 8, | ||
205 | ntohs(addr->sin_port) & 0xff); | ||
206 | xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; | ||
207 | |||
208 | /* netid */ | ||
209 | xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; | ||
210 | } | ||
211 | |||
212 | static void | ||
213 | xprt_rdma_free_addresses(struct rpc_xprt *xprt) | ||
214 | { | ||
215 | kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); | ||
216 | kfree(xprt->address_strings[RPC_DISPLAY_PORT]); | ||
217 | kfree(xprt->address_strings[RPC_DISPLAY_ALL]); | ||
218 | kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]); | ||
219 | kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); | ||
220 | kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]); | ||
221 | } | ||
222 | |||
223 | static void | ||
224 | xprt_rdma_connect_worker(struct work_struct *work) | ||
225 | { | ||
226 | struct rpcrdma_xprt *r_xprt = | ||
227 | container_of(work, struct rpcrdma_xprt, rdma_connect.work); | ||
228 | struct rpc_xprt *xprt = &r_xprt->xprt; | ||
229 | int rc = 0; | ||
230 | |||
231 | if (!xprt->shutdown) { | ||
232 | xprt_clear_connected(xprt); | ||
233 | |||
234 | dprintk("RPC: %s: %sconnect\n", __func__, | ||
235 | r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); | ||
236 | rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); | ||
237 | if (rc) | ||
238 | goto out; | ||
239 | } | ||
240 | goto out_clear; | ||
241 | |||
242 | out: | ||
243 | xprt_wake_pending_tasks(xprt, rc); | ||
244 | |||
245 | out_clear: | ||
246 | dprintk("RPC: %s: exit\n", __func__); | ||
247 | xprt_clear_connecting(xprt); | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * xprt_rdma_destroy | ||
252 | * | ||
253 | * Destroy the xprt. | ||
254 | * Free all memory associated with the object, including its own. | ||
255 | * NOTE: none of the *destroy methods free memory for their top-level | ||
256 | * objects, even though they may have allocated it (they do free | ||
257 | * private memory). It's up to the caller to handle it. In this | ||
258 | * case (RDMA transport), all structure memory is inlined with the | ||
259 | * struct rpcrdma_xprt. | ||
260 | */ | ||
261 | static void | ||
262 | xprt_rdma_destroy(struct rpc_xprt *xprt) | ||
263 | { | ||
264 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
265 | int rc; | ||
266 | |||
267 | dprintk("RPC: %s: called\n", __func__); | ||
268 | |||
269 | cancel_delayed_work(&r_xprt->rdma_connect); | ||
270 | flush_scheduled_work(); | ||
271 | |||
272 | xprt_clear_connected(xprt); | ||
273 | |||
274 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); | ||
275 | rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); | ||
276 | if (rc) | ||
277 | dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", | ||
278 | __func__, rc); | ||
279 | rpcrdma_ia_close(&r_xprt->rx_ia); | ||
280 | |||
281 | xprt_rdma_free_addresses(xprt); | ||
282 | |||
283 | kfree(xprt->slot); | ||
284 | xprt->slot = NULL; | ||
285 | kfree(xprt); | ||
286 | |||
287 | dprintk("RPC: %s: returning\n", __func__); | ||
288 | |||
289 | module_put(THIS_MODULE); | ||
290 | } | ||
291 | |||
292 | /** | ||
293 | * xprt_setup_rdma - Set up transport to use RDMA | ||
294 | * | ||
295 | * @args: rpc transport arguments | ||
296 | */ | ||
297 | static struct rpc_xprt * | ||
298 | xprt_setup_rdma(struct xprt_create *args) | ||
299 | { | ||
300 | struct rpcrdma_create_data_internal cdata; | ||
301 | struct rpc_xprt *xprt; | ||
302 | struct rpcrdma_xprt *new_xprt; | ||
303 | struct rpcrdma_ep *new_ep; | ||
304 | struct sockaddr_in *sin; | ||
305 | int rc; | ||
306 | |||
307 | if (args->addrlen > sizeof(xprt->addr)) { | ||
308 | dprintk("RPC: %s: address too large\n", __func__); | ||
309 | return ERR_PTR(-EBADF); | ||
310 | } | ||
311 | |||
312 | xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); | ||
313 | if (xprt == NULL) { | ||
314 | dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", | ||
315 | __func__); | ||
316 | return ERR_PTR(-ENOMEM); | ||
317 | } | ||
318 | |||
319 | xprt->max_reqs = xprt_rdma_slot_table_entries; | ||
320 | xprt->slot = kcalloc(xprt->max_reqs, | ||
321 | sizeof(struct rpc_rqst), GFP_KERNEL); | ||
322 | if (xprt->slot == NULL) { | ||
323 | kfree(xprt); | ||
324 | dprintk("RPC: %s: couldn't allocate %d slots\n", | ||
325 | __func__, xprt->max_reqs); | ||
326 | return ERR_PTR(-ENOMEM); | ||
327 | } | ||
328 | |||
329 | /* 60 second timeout, no retries */ | ||
330 | xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ); | ||
331 | xprt->bind_timeout = (60U * HZ); | ||
332 | xprt->connect_timeout = (60U * HZ); | ||
333 | xprt->reestablish_timeout = (5U * HZ); | ||
334 | xprt->idle_timeout = (5U * 60 * HZ); | ||
335 | |||
336 | xprt->resvport = 0; /* privileged port not needed */ | ||
337 | xprt->tsh_size = 0; /* RPC-RDMA handles framing */ | ||
338 | xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE; | ||
339 | xprt->ops = &xprt_rdma_procs; | ||
340 | |||
341 | /* | ||
342 | * Set up RDMA-specific connect data. | ||
343 | */ | ||
344 | |||
345 | /* Put server RDMA address in local cdata */ | ||
346 | memcpy(&cdata.addr, args->dstaddr, args->addrlen); | ||
347 | |||
348 | /* Ensure xprt->addr holds valid server TCP (not RDMA) | ||
349 | * address, for any side protocols which peek at it */ | ||
350 | xprt->prot = IPPROTO_TCP; | ||
351 | xprt->addrlen = args->addrlen; | ||
352 | memcpy(&xprt->addr, &cdata.addr, xprt->addrlen); | ||
353 | |||
354 | sin = (struct sockaddr_in *)&cdata.addr; | ||
355 | if (ntohs(sin->sin_port) != 0) | ||
356 | xprt_set_bound(xprt); | ||
357 | |||
358 | dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__, | ||
359 | NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); | ||
360 | |||
361 | /* Set max requests */ | ||
362 | cdata.max_requests = xprt->max_reqs; | ||
363 | |||
364 | /* Set some length limits */ | ||
365 | cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ | ||
366 | cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ | ||
367 | |||
368 | cdata.inline_wsize = xprt_rdma_max_inline_write; | ||
369 | if (cdata.inline_wsize > cdata.wsize) | ||
370 | cdata.inline_wsize = cdata.wsize; | ||
371 | |||
372 | cdata.inline_rsize = xprt_rdma_max_inline_read; | ||
373 | if (cdata.inline_rsize > cdata.rsize) | ||
374 | cdata.inline_rsize = cdata.rsize; | ||
375 | |||
376 | cdata.padding = xprt_rdma_inline_write_padding; | ||
377 | |||
378 | /* | ||
379 | * Create new transport instance, which includes initialized | ||
380 | * o ia | ||
381 | * o endpoint | ||
382 | * o buffers | ||
383 | */ | ||
384 | |||
385 | new_xprt = rpcx_to_rdmax(xprt); | ||
386 | |||
387 | rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr, | ||
388 | xprt_rdma_memreg_strategy); | ||
389 | if (rc) | ||
390 | goto out1; | ||
391 | |||
392 | /* | ||
393 | * initialize and create ep | ||
394 | */ | ||
395 | new_xprt->rx_data = cdata; | ||
396 | new_ep = &new_xprt->rx_ep; | ||
397 | new_ep->rep_remote_addr = cdata.addr; | ||
398 | |||
399 | rc = rpcrdma_ep_create(&new_xprt->rx_ep, | ||
400 | &new_xprt->rx_ia, &new_xprt->rx_data); | ||
401 | if (rc) | ||
402 | goto out2; | ||
403 | |||
404 | /* | ||
405 | * Allocate pre-registered send and receive buffers for headers and | ||
406 | * any inline data. Also specify any padding which will be provided | ||
407 | * from a preregistered zero buffer. | ||
408 | */ | ||
409 | rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, | ||
410 | &new_xprt->rx_data); | ||
411 | if (rc) | ||
412 | goto out3; | ||
413 | |||
414 | /* | ||
415 | * Register a callback for connection events. This is necessary because | ||
416 | * connection loss notification is async. We also catch connection loss | ||
417 | * when reaping receives. | ||
418 | */ | ||
419 | INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); | ||
420 | new_ep->rep_func = rpcrdma_conn_func; | ||
421 | new_ep->rep_xprt = xprt; | ||
422 | |||
423 | xprt_rdma_format_addresses(xprt); | ||
424 | |||
425 | if (!try_module_get(THIS_MODULE)) | ||
426 | goto out4; | ||
427 | |||
428 | return xprt; | ||
429 | |||
430 | out4: | ||
431 | xprt_rdma_free_addresses(xprt); | ||
432 | rc = -EINVAL; | ||
433 | out3: | ||
434 | (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); | ||
435 | out2: | ||
436 | rpcrdma_ia_close(&new_xprt->rx_ia); | ||
437 | out1: | ||
438 | kfree(xprt->slot); | ||
439 | kfree(xprt); | ||
440 | return ERR_PTR(rc); | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * Close a connection, during shutdown or timeout/reconnect | ||
445 | */ | ||
446 | static void | ||
447 | xprt_rdma_close(struct rpc_xprt *xprt) | ||
448 | { | ||
449 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
450 | |||
451 | dprintk("RPC: %s: closing\n", __func__); | ||
452 | xprt_disconnect(xprt); | ||
453 | (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); | ||
454 | } | ||
455 | |||
456 | static void | ||
457 | xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) | ||
458 | { | ||
459 | struct sockaddr_in *sap; | ||
460 | |||
461 | sap = (struct sockaddr_in *)&xprt->addr; | ||
462 | sap->sin_port = htons(port); | ||
463 | sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; | ||
464 | sap->sin_port = htons(port); | ||
465 | dprintk("RPC: %s: %u\n", __func__, port); | ||
466 | } | ||
467 | |||
468 | static void | ||
469 | xprt_rdma_connect(struct rpc_task *task) | ||
470 | { | ||
471 | struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt; | ||
472 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
473 | |||
474 | if (!xprt_test_and_set_connecting(xprt)) { | ||
475 | if (r_xprt->rx_ep.rep_connected != 0) { | ||
476 | /* Reconnect */ | ||
477 | schedule_delayed_work(&r_xprt->rdma_connect, | ||
478 | xprt->reestablish_timeout); | ||
479 | } else { | ||
480 | schedule_delayed_work(&r_xprt->rdma_connect, 0); | ||
481 | if (!RPC_IS_ASYNC(task)) | ||
482 | flush_scheduled_work(); | ||
483 | } | ||
484 | } | ||
485 | } | ||
486 | |||
487 | static int | ||
488 | xprt_rdma_reserve_xprt(struct rpc_task *task) | ||
489 | { | ||
490 | struct rpc_xprt *xprt = task->tk_xprt; | ||
491 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
492 | int credits = atomic_read(&r_xprt->rx_buf.rb_credits); | ||
493 | |||
494 | /* == RPC_CWNDSCALE @ init, but *after* setup */ | ||
495 | if (r_xprt->rx_buf.rb_cwndscale == 0UL) { | ||
496 | r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; | ||
497 | dprintk("RPC: %s: cwndscale %lu\n", __func__, | ||
498 | r_xprt->rx_buf.rb_cwndscale); | ||
499 | BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); | ||
500 | } | ||
501 | xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; | ||
502 | return xprt_reserve_xprt_cong(task); | ||
503 | } | ||
504 | |||
505 | /* | ||
506 | * The RDMA allocate/free functions need the task structure as a place | ||
507 | * to hide the struct rpcrdma_req, which is necessary for the actual send/recv | ||
508 | * sequence. For this reason, the recv buffers are attached to send | ||
509 | * buffers for portions of the RPC. Note that the RPC layer allocates | ||
510 | * both send and receive buffers in the same call. We may register | ||
511 | * the receive buffer portion when using reply chunks. | ||
512 | */ | ||
513 | static void * | ||
514 | xprt_rdma_allocate(struct rpc_task *task, size_t size) | ||
515 | { | ||
516 | struct rpc_xprt *xprt = task->tk_xprt; | ||
517 | struct rpcrdma_req *req, *nreq; | ||
518 | |||
519 | req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); | ||
520 | BUG_ON(NULL == req); | ||
521 | |||
522 | if (size > req->rl_size) { | ||
523 | dprintk("RPC: %s: size %zd too large for buffer[%zd]: " | ||
524 | "prog %d vers %d proc %d\n", | ||
525 | __func__, size, req->rl_size, | ||
526 | task->tk_client->cl_prog, task->tk_client->cl_vers, | ||
527 | task->tk_msg.rpc_proc->p_proc); | ||
528 | /* | ||
529 | * Outgoing length shortage. Our inline write max must have | ||
530 | * been configured to perform direct i/o. | ||
531 | * | ||
532 | * This is therefore a large metadata operation, and the | ||
533 | * allocate call was made on the maximum possible message, | ||
534 | * e.g. containing long filename(s) or symlink data. In | ||
535 | * fact, while these metadata operations *might* carry | ||
536 | * large outgoing payloads, they rarely *do*. However, we | ||
537 | * have to commit to the request here, so reallocate and | ||
538 | * register it now. The data path will never require this | ||
539 | * reallocation. | ||
540 | * | ||
541 | * If the allocation or registration fails, the RPC framework | ||
542 | * will (doggedly) retry. | ||
543 | */ | ||
544 | if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == | ||
545 | RPCRDMA_BOUNCEBUFFERS) { | ||
546 | /* forced to "pure inline" */ | ||
547 | dprintk("RPC: %s: too much data (%zd) for inline " | ||
548 | "(r/w max %d/%d)\n", __func__, size, | ||
549 | rpcx_to_rdmad(xprt).inline_rsize, | ||
550 | rpcx_to_rdmad(xprt).inline_wsize); | ||
551 | size = req->rl_size; | ||
552 | rpc_exit(task, -EIO); /* fail the operation */ | ||
553 | rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; | ||
554 | goto out; | ||
555 | } | ||
556 | if (task->tk_flags & RPC_TASK_SWAPPER) | ||
557 | nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); | ||
558 | else | ||
559 | nreq = kmalloc(sizeof *req + size, GFP_NOFS); | ||
560 | if (nreq == NULL) | ||
561 | goto outfail; | ||
562 | |||
563 | if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia, | ||
564 | nreq->rl_base, size + sizeof(struct rpcrdma_req) | ||
565 | - offsetof(struct rpcrdma_req, rl_base), | ||
566 | &nreq->rl_handle, &nreq->rl_iov)) { | ||
567 | kfree(nreq); | ||
568 | goto outfail; | ||
569 | } | ||
570 | rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size; | ||
571 | nreq->rl_size = size; | ||
572 | nreq->rl_niovs = 0; | ||
573 | nreq->rl_nchunks = 0; | ||
574 | nreq->rl_buffer = (struct rpcrdma_buffer *)req; | ||
575 | nreq->rl_reply = req->rl_reply; | ||
576 | memcpy(nreq->rl_segments, | ||
577 | req->rl_segments, sizeof nreq->rl_segments); | ||
578 | /* flag the swap with an unused field */ | ||
579 | nreq->rl_iov.length = 0; | ||
580 | req->rl_reply = NULL; | ||
581 | req = nreq; | ||
582 | } | ||
583 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); | ||
584 | out: | ||
585 | return req->rl_xdr_buf; | ||
586 | |||
587 | outfail: | ||
588 | rpcrdma_buffer_put(req); | ||
589 | rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; | ||
590 | return NULL; | ||
591 | } | ||
592 | |||
593 | /* | ||
594 | * This function returns all RDMA resources to the pool. | ||
595 | */ | ||
596 | static void | ||
597 | xprt_rdma_free(void *buffer) | ||
598 | { | ||
599 | struct rpcrdma_req *req; | ||
600 | struct rpcrdma_xprt *r_xprt; | ||
601 | struct rpcrdma_rep *rep; | ||
602 | int i; | ||
603 | |||
604 | if (buffer == NULL) | ||
605 | return; | ||
606 | |||
607 | req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); | ||
608 | r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); | ||
609 | rep = req->rl_reply; | ||
610 | |||
611 | dprintk("RPC: %s: called on 0x%p%s\n", | ||
612 | __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); | ||
613 | |||
614 | /* | ||
615 | * Finish the deregistration. When using mw bind, this was | ||
616 | * begun in rpcrdma_reply_handler(). In all other modes, we | ||
617 | * do it here, in thread context. The process is considered | ||
618 | * complete when the rr_func vector becomes NULL - this | ||
619 | * was put in place during rpcrdma_reply_handler() - the wait | ||
620 | * call below will not block if the dereg is "done". If | ||
621 | * interrupted, our framework will clean up. | ||
622 | */ | ||
623 | for (i = 0; req->rl_nchunks;) { | ||
624 | --req->rl_nchunks; | ||
625 | i += rpcrdma_deregister_external( | ||
626 | &req->rl_segments[i], r_xprt, NULL); | ||
627 | } | ||
628 | |||
629 | if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { | ||
630 | rep->rr_func = NULL; /* abandon the callback */ | ||
631 | req->rl_reply = NULL; | ||
632 | } | ||
633 | |||
634 | if (req->rl_iov.length == 0) { /* see allocate above */ | ||
635 | struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer; | ||
636 | oreq->rl_reply = req->rl_reply; | ||
637 | (void) rpcrdma_deregister_internal(&r_xprt->rx_ia, | ||
638 | req->rl_handle, | ||
639 | &req->rl_iov); | ||
640 | kfree(req); | ||
641 | req = oreq; | ||
642 | } | ||
643 | |||
644 | /* Put back request+reply buffers */ | ||
645 | rpcrdma_buffer_put(req); | ||
646 | } | ||
647 | |||
648 | /* | ||
649 | * send_request invokes the meat of RPC RDMA. It must do the following: | ||
650 | * 1. Marshal the RPC request into an RPC RDMA request, which means | ||
651 | * putting a header in front of data, and creating IOVs for RDMA | ||
652 | * from those in the request. | ||
653 | * 2. In marshaling, detect opportunities for RDMA, and use them. | ||
654 | * 3. Post a recv message to set up asynch completion, then send | ||
655 | * the request (rpcrdma_ep_post). | ||
656 | * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). | ||
657 | */ | ||
658 | |||
659 | static int | ||
660 | xprt_rdma_send_request(struct rpc_task *task) | ||
661 | { | ||
662 | struct rpc_rqst *rqst = task->tk_rqstp; | ||
663 | struct rpc_xprt *xprt = task->tk_xprt; | ||
664 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
665 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
666 | |||
667 | /* marshal the send itself */ | ||
668 | if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { | ||
669 | r_xprt->rx_stats.failed_marshal_count++; | ||
670 | dprintk("RPC: %s: rpcrdma_marshal_req failed\n", | ||
671 | __func__); | ||
672 | return -EIO; | ||
673 | } | ||
674 | |||
675 | if (req->rl_reply == NULL) /* e.g. reconnection */ | ||
676 | rpcrdma_recv_buffer_get(req); | ||
677 | |||
678 | if (req->rl_reply) { | ||
679 | req->rl_reply->rr_func = rpcrdma_reply_handler; | ||
680 | /* this need only be done once, but... */ | ||
681 | req->rl_reply->rr_xprt = xprt; | ||
682 | } | ||
683 | |||
684 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { | ||
685 | xprt_disconnect(xprt); | ||
686 | return -ENOTCONN; /* implies disconnect */ | ||
687 | } | ||
688 | |||
689 | rqst->rq_bytes_sent = 0; | ||
690 | return 0; | ||
691 | } | ||
692 | |||
693 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | ||
694 | { | ||
695 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
696 | long idle_time = 0; | ||
697 | |||
698 | if (xprt_connected(xprt)) | ||
699 | idle_time = (long)(jiffies - xprt->last_used) / HZ; | ||
700 | |||
701 | seq_printf(seq, | ||
702 | "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu " | ||
703 | "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n", | ||
704 | |||
705 | 0, /* need a local port? */ | ||
706 | xprt->stat.bind_count, | ||
707 | xprt->stat.connect_count, | ||
708 | xprt->stat.connect_time, | ||
709 | idle_time, | ||
710 | xprt->stat.sends, | ||
711 | xprt->stat.recvs, | ||
712 | xprt->stat.bad_xids, | ||
713 | xprt->stat.req_u, | ||
714 | xprt->stat.bklog_u, | ||
715 | |||
716 | r_xprt->rx_stats.read_chunk_count, | ||
717 | r_xprt->rx_stats.write_chunk_count, | ||
718 | r_xprt->rx_stats.reply_chunk_count, | ||
719 | r_xprt->rx_stats.total_rdma_request, | ||
720 | r_xprt->rx_stats.total_rdma_reply, | ||
721 | r_xprt->rx_stats.pullup_copy_count, | ||
722 | r_xprt->rx_stats.fixup_copy_count, | ||
723 | r_xprt->rx_stats.hardway_register_count, | ||
724 | r_xprt->rx_stats.failed_marshal_count, | ||
725 | r_xprt->rx_stats.bad_reply_count); | ||
726 | } | ||
727 | |||
728 | /* | ||
729 | * Plumbing for rpc transport switch and kernel module | ||
730 | */ | ||
731 | |||
732 | static struct rpc_xprt_ops xprt_rdma_procs = { | ||
733 | .reserve_xprt = xprt_rdma_reserve_xprt, | ||
734 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ | ||
735 | .release_request = xprt_release_rqst_cong, /* ditto */ | ||
736 | .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ | ||
737 | .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ | ||
738 | .set_port = xprt_rdma_set_port, | ||
739 | .connect = xprt_rdma_connect, | ||
740 | .buf_alloc = xprt_rdma_allocate, | ||
741 | .buf_free = xprt_rdma_free, | ||
742 | .send_request = xprt_rdma_send_request, | ||
743 | .close = xprt_rdma_close, | ||
744 | .destroy = xprt_rdma_destroy, | ||
745 | .print_stats = xprt_rdma_print_stats | ||
746 | }; | ||
747 | |||
748 | static struct xprt_class xprt_rdma = { | ||
749 | .list = LIST_HEAD_INIT(xprt_rdma.list), | ||
750 | .name = "rdma", | ||
751 | .owner = THIS_MODULE, | ||
752 | .ident = XPRT_TRANSPORT_RDMA, | ||
753 | .setup = xprt_setup_rdma, | ||
754 | }; | ||
755 | |||
756 | static void __exit xprt_rdma_cleanup(void) | ||
757 | { | ||
758 | int rc; | ||
759 | |||
760 | dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); | ||
761 | #ifdef RPC_DEBUG | ||
762 | if (sunrpc_table_header) { | ||
763 | unregister_sysctl_table(sunrpc_table_header); | ||
764 | sunrpc_table_header = NULL; | ||
765 | } | ||
766 | #endif | ||
767 | rc = xprt_unregister_transport(&xprt_rdma); | ||
768 | if (rc) | ||
769 | dprintk("RPC: %s: xprt_unregister returned %i\n", | ||
770 | __func__, rc); | ||
771 | } | ||
772 | |||
773 | static int __init xprt_rdma_init(void) | ||
774 | { | ||
775 | int rc; | ||
776 | |||
777 | rc = xprt_register_transport(&xprt_rdma); | ||
778 | |||
779 | if (rc) | ||
780 | return rc; | ||
781 | |||
782 | dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); | ||
783 | |||
784 | dprintk(KERN_INFO "Defaults:\n"); | ||
785 | dprintk(KERN_INFO "\tSlots %d\n" | ||
786 | "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", | ||
787 | xprt_rdma_slot_table_entries, | ||
788 | xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); | ||
789 | dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", | ||
790 | xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); | ||
791 | |||
792 | #ifdef RPC_DEBUG | ||
793 | if (!sunrpc_table_header) | ||
794 | sunrpc_table_header = register_sysctl_table(sunrpc_table); | ||
795 | #endif | ||
796 | return 0; | ||
797 | } | ||
798 | |||
799 | module_init(xprt_rdma_init); | ||
800 | module_exit(xprt_rdma_cleanup); | ||
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c new file mode 100644 index 000000000000..9ec8ca4f6028 --- /dev/null +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -0,0 +1,1626 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | /* | ||
41 | * verbs.c | ||
42 | * | ||
43 | * Encapsulates the major functions managing: | ||
44 | * o adapters | ||
45 | * o endpoints | ||
46 | * o connections | ||
47 | * o buffer memory | ||
48 | */ | ||
49 | |||
50 | #include <linux/pci.h> /* for Tavor hack below */ | ||
51 | |||
52 | #include "xprt_rdma.h" | ||
53 | |||
54 | /* | ||
55 | * Globals/Macros | ||
56 | */ | ||
57 | |||
58 | #ifdef RPC_DEBUG | ||
59 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
60 | #endif | ||
61 | |||
62 | /* | ||
63 | * internal functions | ||
64 | */ | ||
65 | |||
66 | /* | ||
67 | * handle replies in tasklet context, using a single, global list | ||
68 | * rdma tasklet function -- just turn around and call the func | ||
69 | * for all replies on the list | ||
70 | */ | ||
71 | |||
72 | static DEFINE_SPINLOCK(rpcrdma_tk_lock_g); | ||
73 | static LIST_HEAD(rpcrdma_tasklets_g); | ||
74 | |||
75 | static void | ||
76 | rpcrdma_run_tasklet(unsigned long data) | ||
77 | { | ||
78 | struct rpcrdma_rep *rep; | ||
79 | void (*func)(struct rpcrdma_rep *); | ||
80 | unsigned long flags; | ||
81 | |||
82 | data = data; | ||
83 | spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); | ||
84 | while (!list_empty(&rpcrdma_tasklets_g)) { | ||
85 | rep = list_entry(rpcrdma_tasklets_g.next, | ||
86 | struct rpcrdma_rep, rr_list); | ||
87 | list_del(&rep->rr_list); | ||
88 | func = rep->rr_func; | ||
89 | rep->rr_func = NULL; | ||
90 | spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); | ||
91 | |||
92 | if (func) | ||
93 | func(rep); | ||
94 | else | ||
95 | rpcrdma_recv_buffer_put(rep); | ||
96 | |||
97 | spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); | ||
98 | } | ||
99 | spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); | ||
100 | } | ||
101 | |||
102 | static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); | ||
103 | |||
104 | static inline void | ||
105 | rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep) | ||
106 | { | ||
107 | unsigned long flags; | ||
108 | |||
109 | spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); | ||
110 | list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g); | ||
111 | spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); | ||
112 | tasklet_schedule(&rpcrdma_tasklet_g); | ||
113 | } | ||
114 | |||
115 | static void | ||
116 | rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) | ||
117 | { | ||
118 | struct rpcrdma_ep *ep = context; | ||
119 | |||
120 | dprintk("RPC: %s: QP error %X on device %s ep %p\n", | ||
121 | __func__, event->event, event->device->name, context); | ||
122 | if (ep->rep_connected == 1) { | ||
123 | ep->rep_connected = -EIO; | ||
124 | ep->rep_func(ep); | ||
125 | wake_up_all(&ep->rep_connect_wait); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static void | ||
130 | rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) | ||
131 | { | ||
132 | struct rpcrdma_ep *ep = context; | ||
133 | |||
134 | dprintk("RPC: %s: CQ error %X on device %s ep %p\n", | ||
135 | __func__, event->event, event->device->name, context); | ||
136 | if (ep->rep_connected == 1) { | ||
137 | ep->rep_connected = -EIO; | ||
138 | ep->rep_func(ep); | ||
139 | wake_up_all(&ep->rep_connect_wait); | ||
140 | } | ||
141 | } | ||
142 | |||
143 | static inline | ||
144 | void rpcrdma_event_process(struct ib_wc *wc) | ||
145 | { | ||
146 | struct rpcrdma_rep *rep = | ||
147 | (struct rpcrdma_rep *)(unsigned long) wc->wr_id; | ||
148 | |||
149 | dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", | ||
150 | __func__, rep, wc->status, wc->opcode, wc->byte_len); | ||
151 | |||
152 | if (!rep) /* send or bind completion that we don't care about */ | ||
153 | return; | ||
154 | |||
155 | if (IB_WC_SUCCESS != wc->status) { | ||
156 | dprintk("RPC: %s: %s WC status %X, connection lost\n", | ||
157 | __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", | ||
158 | wc->status); | ||
159 | rep->rr_len = ~0U; | ||
160 | rpcrdma_schedule_tasklet(rep); | ||
161 | return; | ||
162 | } | ||
163 | |||
164 | switch (wc->opcode) { | ||
165 | case IB_WC_RECV: | ||
166 | rep->rr_len = wc->byte_len; | ||
167 | ib_dma_sync_single_for_cpu( | ||
168 | rdmab_to_ia(rep->rr_buffer)->ri_id->device, | ||
169 | rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); | ||
170 | /* Keep (only) the most recent credits, after check validity */ | ||
171 | if (rep->rr_len >= 16) { | ||
172 | struct rpcrdma_msg *p = | ||
173 | (struct rpcrdma_msg *) rep->rr_base; | ||
174 | unsigned int credits = ntohl(p->rm_credit); | ||
175 | if (credits == 0) { | ||
176 | dprintk("RPC: %s: server" | ||
177 | " dropped credits to 0!\n", __func__); | ||
178 | /* don't deadlock */ | ||
179 | credits = 1; | ||
180 | } else if (credits > rep->rr_buffer->rb_max_requests) { | ||
181 | dprintk("RPC: %s: server" | ||
182 | " over-crediting: %d (%d)\n", | ||
183 | __func__, credits, | ||
184 | rep->rr_buffer->rb_max_requests); | ||
185 | credits = rep->rr_buffer->rb_max_requests; | ||
186 | } | ||
187 | atomic_set(&rep->rr_buffer->rb_credits, credits); | ||
188 | } | ||
189 | /* fall through */ | ||
190 | case IB_WC_BIND_MW: | ||
191 | rpcrdma_schedule_tasklet(rep); | ||
192 | break; | ||
193 | default: | ||
194 | dprintk("RPC: %s: unexpected WC event %X\n", | ||
195 | __func__, wc->opcode); | ||
196 | break; | ||
197 | } | ||
198 | } | ||
199 | |||
200 | static inline int | ||
201 | rpcrdma_cq_poll(struct ib_cq *cq) | ||
202 | { | ||
203 | struct ib_wc wc; | ||
204 | int rc; | ||
205 | |||
206 | for (;;) { | ||
207 | rc = ib_poll_cq(cq, 1, &wc); | ||
208 | if (rc < 0) { | ||
209 | dprintk("RPC: %s: ib_poll_cq failed %i\n", | ||
210 | __func__, rc); | ||
211 | return rc; | ||
212 | } | ||
213 | if (rc == 0) | ||
214 | break; | ||
215 | |||
216 | rpcrdma_event_process(&wc); | ||
217 | } | ||
218 | |||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * rpcrdma_cq_event_upcall | ||
224 | * | ||
225 | * This upcall handles recv, send, bind and unbind events. | ||
226 | * It is reentrant but processes single events in order to maintain | ||
227 | * ordering of receives to keep server credits. | ||
228 | * | ||
229 | * It is the responsibility of the scheduled tasklet to return | ||
230 | * recv buffers to the pool. NOTE: this affects synchronization of | ||
231 | * connection shutdown. That is, the structures required for | ||
232 | * the completion of the reply handler must remain intact until | ||
233 | * all memory has been reclaimed. | ||
234 | * | ||
235 | * Note that send events are suppressed and do not result in an upcall. | ||
236 | */ | ||
237 | static void | ||
238 | rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) | ||
239 | { | ||
240 | int rc; | ||
241 | |||
242 | rc = rpcrdma_cq_poll(cq); | ||
243 | if (rc) | ||
244 | return; | ||
245 | |||
246 | rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); | ||
247 | if (rc) { | ||
248 | dprintk("RPC: %s: ib_req_notify_cq failed %i\n", | ||
249 | __func__, rc); | ||
250 | return; | ||
251 | } | ||
252 | |||
253 | rpcrdma_cq_poll(cq); | ||
254 | } | ||
255 | |||
256 | #ifdef RPC_DEBUG | ||
257 | static const char * const conn[] = { | ||
258 | "address resolved", | ||
259 | "address error", | ||
260 | "route resolved", | ||
261 | "route error", | ||
262 | "connect request", | ||
263 | "connect response", | ||
264 | "connect error", | ||
265 | "unreachable", | ||
266 | "rejected", | ||
267 | "established", | ||
268 | "disconnected", | ||
269 | "device removal" | ||
270 | }; | ||
271 | #endif | ||
272 | |||
273 | static int | ||
274 | rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | ||
275 | { | ||
276 | struct rpcrdma_xprt *xprt = id->context; | ||
277 | struct rpcrdma_ia *ia = &xprt->rx_ia; | ||
278 | struct rpcrdma_ep *ep = &xprt->rx_ep; | ||
279 | struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; | ||
280 | struct ib_qp_attr attr; | ||
281 | struct ib_qp_init_attr iattr; | ||
282 | int connstate = 0; | ||
283 | |||
284 | switch (event->event) { | ||
285 | case RDMA_CM_EVENT_ADDR_RESOLVED: | ||
286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | ||
287 | complete(&ia->ri_done); | ||
288 | break; | ||
289 | case RDMA_CM_EVENT_ADDR_ERROR: | ||
290 | ia->ri_async_rc = -EHOSTUNREACH; | ||
291 | dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", | ||
292 | __func__, ep); | ||
293 | complete(&ia->ri_done); | ||
294 | break; | ||
295 | case RDMA_CM_EVENT_ROUTE_ERROR: | ||
296 | ia->ri_async_rc = -ENETUNREACH; | ||
297 | dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", | ||
298 | __func__, ep); | ||
299 | complete(&ia->ri_done); | ||
300 | break; | ||
301 | case RDMA_CM_EVENT_ESTABLISHED: | ||
302 | connstate = 1; | ||
303 | ib_query_qp(ia->ri_id->qp, &attr, | ||
304 | IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, | ||
305 | &iattr); | ||
306 | dprintk("RPC: %s: %d responder resources" | ||
307 | " (%d initiator)\n", | ||
308 | __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); | ||
309 | goto connected; | ||
310 | case RDMA_CM_EVENT_CONNECT_ERROR: | ||
311 | connstate = -ENOTCONN; | ||
312 | goto connected; | ||
313 | case RDMA_CM_EVENT_UNREACHABLE: | ||
314 | connstate = -ENETDOWN; | ||
315 | goto connected; | ||
316 | case RDMA_CM_EVENT_REJECTED: | ||
317 | connstate = -ECONNREFUSED; | ||
318 | goto connected; | ||
319 | case RDMA_CM_EVENT_DISCONNECTED: | ||
320 | connstate = -ECONNABORTED; | ||
321 | goto connected; | ||
322 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
323 | connstate = -ENODEV; | ||
324 | connected: | ||
325 | dprintk("RPC: %s: %s: %u.%u.%u.%u:%u" | ||
326 | " (ep 0x%p event 0x%x)\n", | ||
327 | __func__, | ||
328 | (event->event <= 11) ? conn[event->event] : | ||
329 | "unknown connection error", | ||
330 | NIPQUAD(addr->sin_addr.s_addr), | ||
331 | ntohs(addr->sin_port), | ||
332 | ep, event->event); | ||
333 | atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); | ||
334 | dprintk("RPC: %s: %sconnected\n", | ||
335 | __func__, connstate > 0 ? "" : "dis"); | ||
336 | ep->rep_connected = connstate; | ||
337 | ep->rep_func(ep); | ||
338 | wake_up_all(&ep->rep_connect_wait); | ||
339 | break; | ||
340 | default: | ||
341 | ia->ri_async_rc = -EINVAL; | ||
342 | dprintk("RPC: %s: unexpected CM event %X\n", | ||
343 | __func__, event->event); | ||
344 | complete(&ia->ri_done); | ||
345 | break; | ||
346 | } | ||
347 | |||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | static struct rdma_cm_id * | ||
352 | rpcrdma_create_id(struct rpcrdma_xprt *xprt, | ||
353 | struct rpcrdma_ia *ia, struct sockaddr *addr) | ||
354 | { | ||
355 | struct rdma_cm_id *id; | ||
356 | int rc; | ||
357 | |||
358 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); | ||
359 | if (IS_ERR(id)) { | ||
360 | rc = PTR_ERR(id); | ||
361 | dprintk("RPC: %s: rdma_create_id() failed %i\n", | ||
362 | __func__, rc); | ||
363 | return id; | ||
364 | } | ||
365 | |||
366 | ia->ri_async_rc = 0; | ||
367 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); | ||
368 | if (rc) { | ||
369 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", | ||
370 | __func__, rc); | ||
371 | goto out; | ||
372 | } | ||
373 | wait_for_completion(&ia->ri_done); | ||
374 | rc = ia->ri_async_rc; | ||
375 | if (rc) | ||
376 | goto out; | ||
377 | |||
378 | ia->ri_async_rc = 0; | ||
379 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); | ||
380 | if (rc) { | ||
381 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", | ||
382 | __func__, rc); | ||
383 | goto out; | ||
384 | } | ||
385 | wait_for_completion(&ia->ri_done); | ||
386 | rc = ia->ri_async_rc; | ||
387 | if (rc) | ||
388 | goto out; | ||
389 | |||
390 | return id; | ||
391 | |||
392 | out: | ||
393 | rdma_destroy_id(id); | ||
394 | return ERR_PTR(rc); | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * Drain any cq, prior to teardown. | ||
399 | */ | ||
400 | static void | ||
401 | rpcrdma_clean_cq(struct ib_cq *cq) | ||
402 | { | ||
403 | struct ib_wc wc; | ||
404 | int count = 0; | ||
405 | |||
406 | while (1 == ib_poll_cq(cq, 1, &wc)) | ||
407 | ++count; | ||
408 | |||
409 | if (count) | ||
410 | dprintk("RPC: %s: flushed %d events (last 0x%x)\n", | ||
411 | __func__, count, wc.opcode); | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Exported functions. | ||
416 | */ | ||
417 | |||
418 | /* | ||
419 | * Open and initialize an Interface Adapter. | ||
420 | * o initializes fields of struct rpcrdma_ia, including | ||
421 | * interface and provider attributes and protection zone. | ||
422 | */ | ||
423 | int | ||
424 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | ||
425 | { | ||
426 | int rc; | ||
427 | struct rpcrdma_ia *ia = &xprt->rx_ia; | ||
428 | |||
429 | init_completion(&ia->ri_done); | ||
430 | |||
431 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); | ||
432 | if (IS_ERR(ia->ri_id)) { | ||
433 | rc = PTR_ERR(ia->ri_id); | ||
434 | goto out1; | ||
435 | } | ||
436 | |||
437 | ia->ri_pd = ib_alloc_pd(ia->ri_id->device); | ||
438 | if (IS_ERR(ia->ri_pd)) { | ||
439 | rc = PTR_ERR(ia->ri_pd); | ||
440 | dprintk("RPC: %s: ib_alloc_pd() failed %i\n", | ||
441 | __func__, rc); | ||
442 | goto out2; | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * Optionally obtain an underlying physical identity mapping in | ||
447 | * order to do a memory window-based bind. This base registration | ||
448 | * is protected from remote access - that is enabled only by binding | ||
449 | * for the specific bytes targeted during each RPC operation, and | ||
450 | * revoked after the corresponding completion similar to a storage | ||
451 | * adapter. | ||
452 | */ | ||
453 | if (memreg > RPCRDMA_REGISTER) { | ||
454 | int mem_priv = IB_ACCESS_LOCAL_WRITE; | ||
455 | switch (memreg) { | ||
456 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
457 | case RPCRDMA_ALLPHYSICAL: | ||
458 | mem_priv |= IB_ACCESS_REMOTE_WRITE; | ||
459 | mem_priv |= IB_ACCESS_REMOTE_READ; | ||
460 | break; | ||
461 | #endif | ||
462 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
463 | case RPCRDMA_MEMWINDOWS: | ||
464 | mem_priv |= IB_ACCESS_MW_BIND; | ||
465 | break; | ||
466 | default: | ||
467 | break; | ||
468 | } | ||
469 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); | ||
470 | if (IS_ERR(ia->ri_bind_mem)) { | ||
471 | printk(KERN_ALERT "%s: ib_get_dma_mr for " | ||
472 | "phys register failed with %lX\n\t" | ||
473 | "Will continue with degraded performance\n", | ||
474 | __func__, PTR_ERR(ia->ri_bind_mem)); | ||
475 | memreg = RPCRDMA_REGISTER; | ||
476 | ia->ri_bind_mem = NULL; | ||
477 | } | ||
478 | } | ||
479 | |||
480 | /* Else will do memory reg/dereg for each chunk */ | ||
481 | ia->ri_memreg_strategy = memreg; | ||
482 | |||
483 | return 0; | ||
484 | out2: | ||
485 | rdma_destroy_id(ia->ri_id); | ||
486 | out1: | ||
487 | return rc; | ||
488 | } | ||
489 | |||
490 | /* | ||
491 | * Clean up/close an IA. | ||
492 | * o if event handles and PD have been initialized, free them. | ||
493 | * o close the IA | ||
494 | */ | ||
495 | void | ||
496 | rpcrdma_ia_close(struct rpcrdma_ia *ia) | ||
497 | { | ||
498 | int rc; | ||
499 | |||
500 | dprintk("RPC: %s: entering\n", __func__); | ||
501 | if (ia->ri_bind_mem != NULL) { | ||
502 | rc = ib_dereg_mr(ia->ri_bind_mem); | ||
503 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", | ||
504 | __func__, rc); | ||
505 | } | ||
506 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) | ||
507 | rdma_destroy_qp(ia->ri_id); | ||
508 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { | ||
509 | rc = ib_dealloc_pd(ia->ri_pd); | ||
510 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", | ||
511 | __func__, rc); | ||
512 | } | ||
513 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) | ||
514 | rdma_destroy_id(ia->ri_id); | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Create unconnected endpoint. | ||
519 | */ | ||
520 | int | ||
521 | rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | ||
522 | struct rpcrdma_create_data_internal *cdata) | ||
523 | { | ||
524 | struct ib_device_attr devattr; | ||
525 | int rc; | ||
526 | |||
527 | rc = ib_query_device(ia->ri_id->device, &devattr); | ||
528 | if (rc) { | ||
529 | dprintk("RPC: %s: ib_query_device failed %d\n", | ||
530 | __func__, rc); | ||
531 | return rc; | ||
532 | } | ||
533 | |||
534 | /* check provider's send/recv wr limits */ | ||
535 | if (cdata->max_requests > devattr.max_qp_wr) | ||
536 | cdata->max_requests = devattr.max_qp_wr; | ||
537 | |||
538 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; | ||
539 | ep->rep_attr.qp_context = ep; | ||
540 | /* send_cq and recv_cq initialized below */ | ||
541 | ep->rep_attr.srq = NULL; | ||
542 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | ||
543 | switch (ia->ri_memreg_strategy) { | ||
544 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
545 | case RPCRDMA_MEMWINDOWS: | ||
546 | /* Add room for mw_binds+unbinds - overkill! */ | ||
547 | ep->rep_attr.cap.max_send_wr++; | ||
548 | ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); | ||
549 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
550 | return -EINVAL; | ||
551 | break; | ||
552 | default: | ||
553 | break; | ||
554 | } | ||
555 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; | ||
556 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); | ||
557 | ep->rep_attr.cap.max_recv_sge = 1; | ||
558 | ep->rep_attr.cap.max_inline_data = 0; | ||
559 | ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; | ||
560 | ep->rep_attr.qp_type = IB_QPT_RC; | ||
561 | ep->rep_attr.port_num = ~0; | ||
562 | |||
563 | dprintk("RPC: %s: requested max: dtos: send %d recv %d; " | ||
564 | "iovs: send %d recv %d\n", | ||
565 | __func__, | ||
566 | ep->rep_attr.cap.max_send_wr, | ||
567 | ep->rep_attr.cap.max_recv_wr, | ||
568 | ep->rep_attr.cap.max_send_sge, | ||
569 | ep->rep_attr.cap.max_recv_sge); | ||
570 | |||
571 | /* set trigger for requesting send completion */ | ||
572 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; | ||
573 | switch (ia->ri_memreg_strategy) { | ||
574 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
575 | case RPCRDMA_MEMWINDOWS: | ||
576 | ep->rep_cqinit -= RPCRDMA_MAX_SEGS; | ||
577 | break; | ||
578 | default: | ||
579 | break; | ||
580 | } | ||
581 | if (ep->rep_cqinit <= 2) | ||
582 | ep->rep_cqinit = 0; | ||
583 | INIT_CQCOUNT(ep); | ||
584 | ep->rep_ia = ia; | ||
585 | init_waitqueue_head(&ep->rep_connect_wait); | ||
586 | |||
587 | /* | ||
588 | * Create a single cq for receive dto and mw_bind (only ever | ||
589 | * care about unbind, really). Send completions are suppressed. | ||
590 | * Use single threaded tasklet upcalls to maintain ordering. | ||
591 | */ | ||
592 | ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, | ||
593 | rpcrdma_cq_async_error_upcall, NULL, | ||
594 | ep->rep_attr.cap.max_recv_wr + | ||
595 | ep->rep_attr.cap.max_send_wr + 1, 0); | ||
596 | if (IS_ERR(ep->rep_cq)) { | ||
597 | rc = PTR_ERR(ep->rep_cq); | ||
598 | dprintk("RPC: %s: ib_create_cq failed: %i\n", | ||
599 | __func__, rc); | ||
600 | goto out1; | ||
601 | } | ||
602 | |||
603 | rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); | ||
604 | if (rc) { | ||
605 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
606 | __func__, rc); | ||
607 | goto out2; | ||
608 | } | ||
609 | |||
610 | ep->rep_attr.send_cq = ep->rep_cq; | ||
611 | ep->rep_attr.recv_cq = ep->rep_cq; | ||
612 | |||
613 | /* Initialize cma parameters */ | ||
614 | |||
615 | /* RPC/RDMA does not use private data */ | ||
616 | ep->rep_remote_cma.private_data = NULL; | ||
617 | ep->rep_remote_cma.private_data_len = 0; | ||
618 | |||
619 | /* Client offers RDMA Read but does not initiate */ | ||
620 | switch (ia->ri_memreg_strategy) { | ||
621 | case RPCRDMA_BOUNCEBUFFERS: | ||
622 | ep->rep_remote_cma.responder_resources = 0; | ||
623 | break; | ||
624 | case RPCRDMA_MTHCAFMR: | ||
625 | case RPCRDMA_REGISTER: | ||
626 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
627 | (RPCRDMA_MAX_DATA_SEGS / 8); | ||
628 | break; | ||
629 | case RPCRDMA_MEMWINDOWS: | ||
630 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
631 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
632 | case RPCRDMA_ALLPHYSICAL: | ||
633 | #endif | ||
634 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
635 | (RPCRDMA_MAX_DATA_SEGS / 2); | ||
636 | break; | ||
637 | default: | ||
638 | break; | ||
639 | } | ||
640 | if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) | ||
641 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; | ||
642 | ep->rep_remote_cma.initiator_depth = 0; | ||
643 | |||
644 | ep->rep_remote_cma.retry_count = 7; | ||
645 | ep->rep_remote_cma.flow_control = 0; | ||
646 | ep->rep_remote_cma.rnr_retry_count = 0; | ||
647 | |||
648 | return 0; | ||
649 | |||
650 | out2: | ||
651 | if (ib_destroy_cq(ep->rep_cq)) | ||
652 | ; | ||
653 | out1: | ||
654 | return rc; | ||
655 | } | ||
656 | |||
657 | /* | ||
658 | * rpcrdma_ep_destroy | ||
659 | * | ||
660 | * Disconnect and destroy endpoint. After this, the only | ||
661 | * valid operations on the ep are to free it (if dynamically | ||
662 | * allocated) or re-create it. | ||
663 | * | ||
664 | * The caller's error handling must be sure to not leak the endpoint | ||
665 | * if this function fails. | ||
666 | */ | ||
667 | int | ||
668 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | ||
669 | { | ||
670 | int rc; | ||
671 | |||
672 | dprintk("RPC: %s: entering, connected is %d\n", | ||
673 | __func__, ep->rep_connected); | ||
674 | |||
675 | if (ia->ri_id->qp) { | ||
676 | rc = rpcrdma_ep_disconnect(ep, ia); | ||
677 | if (rc) | ||
678 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | ||
679 | " returned %i\n", __func__, rc); | ||
680 | } | ||
681 | |||
682 | ep->rep_func = NULL; | ||
683 | |||
684 | /* padding - could be done in rpcrdma_buffer_destroy... */ | ||
685 | if (ep->rep_pad_mr) { | ||
686 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); | ||
687 | ep->rep_pad_mr = NULL; | ||
688 | } | ||
689 | |||
690 | if (ia->ri_id->qp) { | ||
691 | rdma_destroy_qp(ia->ri_id); | ||
692 | ia->ri_id->qp = NULL; | ||
693 | } | ||
694 | |||
695 | rpcrdma_clean_cq(ep->rep_cq); | ||
696 | rc = ib_destroy_cq(ep->rep_cq); | ||
697 | if (rc) | ||
698 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | ||
699 | __func__, rc); | ||
700 | |||
701 | return rc; | ||
702 | } | ||
703 | |||
704 | /* | ||
705 | * Connect unconnected endpoint. | ||
706 | */ | ||
707 | int | ||
708 | rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | ||
709 | { | ||
710 | struct rdma_cm_id *id; | ||
711 | int rc = 0; | ||
712 | int retry_count = 0; | ||
713 | int reconnect = (ep->rep_connected != 0); | ||
714 | |||
715 | if (reconnect) { | ||
716 | struct rpcrdma_xprt *xprt; | ||
717 | retry: | ||
718 | rc = rpcrdma_ep_disconnect(ep, ia); | ||
719 | if (rc && rc != -ENOTCONN) | ||
720 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | ||
721 | " status %i\n", __func__, rc); | ||
722 | rpcrdma_clean_cq(ep->rep_cq); | ||
723 | |||
724 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
725 | id = rpcrdma_create_id(xprt, ia, | ||
726 | (struct sockaddr *)&xprt->rx_data.addr); | ||
727 | if (IS_ERR(id)) { | ||
728 | rc = PTR_ERR(id); | ||
729 | goto out; | ||
730 | } | ||
731 | /* TEMP TEMP TEMP - fail if new device: | ||
732 | * Deregister/remarshal *all* requests! | ||
733 | * Close and recreate adapter, pd, etc! | ||
734 | * Re-determine all attributes still sane! | ||
735 | * More stuff I haven't thought of! | ||
736 | * Rrrgh! | ||
737 | */ | ||
738 | if (ia->ri_id->device != id->device) { | ||
739 | printk("RPC: %s: can't reconnect on " | ||
740 | "different device!\n", __func__); | ||
741 | rdma_destroy_id(id); | ||
742 | rc = -ENETDOWN; | ||
743 | goto out; | ||
744 | } | ||
745 | /* END TEMP */ | ||
746 | rdma_destroy_id(ia->ri_id); | ||
747 | ia->ri_id = id; | ||
748 | } | ||
749 | |||
750 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | ||
751 | if (rc) { | ||
752 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
753 | __func__, rc); | ||
754 | goto out; | ||
755 | } | ||
756 | |||
757 | /* XXX Tavor device performs badly with 2K MTU! */ | ||
758 | if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | ||
759 | struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); | ||
760 | if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && | ||
761 | (pcid->vendor == PCI_VENDOR_ID_MELLANOX || | ||
762 | pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { | ||
763 | struct ib_qp_attr attr = { | ||
764 | .path_mtu = IB_MTU_1024 | ||
765 | }; | ||
766 | rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); | ||
767 | } | ||
768 | } | ||
769 | |||
770 | /* Theoretically a client initiator_depth > 0 is not needed, | ||
771 | * but many peers fail to complete the connection unless they | ||
772 | * == responder_resources! */ | ||
773 | if (ep->rep_remote_cma.initiator_depth != | ||
774 | ep->rep_remote_cma.responder_resources) | ||
775 | ep->rep_remote_cma.initiator_depth = | ||
776 | ep->rep_remote_cma.responder_resources; | ||
777 | |||
778 | ep->rep_connected = 0; | ||
779 | |||
780 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | ||
781 | if (rc) { | ||
782 | dprintk("RPC: %s: rdma_connect() failed with %i\n", | ||
783 | __func__, rc); | ||
784 | goto out; | ||
785 | } | ||
786 | |||
787 | if (reconnect) | ||
788 | return 0; | ||
789 | |||
790 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | ||
791 | |||
792 | /* | ||
793 | * Check state. A non-peer reject indicates no listener | ||
794 | * (ECONNREFUSED), which may be a transient state. All | ||
795 | * others indicate a transport condition which has already | ||
796 | * undergone a best-effort. | ||
797 | */ | ||
798 | if (ep->rep_connected == -ECONNREFUSED | ||
799 | && ++retry_count <= RDMA_CONNECT_RETRY_MAX) { | ||
800 | dprintk("RPC: %s: non-peer_reject, retry\n", __func__); | ||
801 | goto retry; | ||
802 | } | ||
803 | if (ep->rep_connected <= 0) { | ||
804 | /* Sometimes, the only way to reliably connect to remote | ||
805 | * CMs is to use same nonzero values for ORD and IRD. */ | ||
806 | ep->rep_remote_cma.initiator_depth = | ||
807 | ep->rep_remote_cma.responder_resources; | ||
808 | if (ep->rep_remote_cma.initiator_depth == 0) | ||
809 | ++ep->rep_remote_cma.initiator_depth; | ||
810 | if (ep->rep_remote_cma.responder_resources == 0) | ||
811 | ++ep->rep_remote_cma.responder_resources; | ||
812 | if (retry_count++ == 0) | ||
813 | goto retry; | ||
814 | rc = ep->rep_connected; | ||
815 | } else { | ||
816 | dprintk("RPC: %s: connected\n", __func__); | ||
817 | } | ||
818 | |||
819 | out: | ||
820 | if (rc) | ||
821 | ep->rep_connected = rc; | ||
822 | return rc; | ||
823 | } | ||
824 | |||
825 | /* | ||
826 | * rpcrdma_ep_disconnect | ||
827 | * | ||
828 | * This is separate from destroy to facilitate the ability | ||
829 | * to reconnect without recreating the endpoint. | ||
830 | * | ||
831 | * This call is not reentrant, and must not be made in parallel | ||
832 | * on the same endpoint. | ||
833 | */ | ||
834 | int | ||
835 | rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | ||
836 | { | ||
837 | int rc; | ||
838 | |||
839 | rpcrdma_clean_cq(ep->rep_cq); | ||
840 | rc = rdma_disconnect(ia->ri_id); | ||
841 | if (!rc) { | ||
842 | /* returns without wait if not connected */ | ||
843 | wait_event_interruptible(ep->rep_connect_wait, | ||
844 | ep->rep_connected != 1); | ||
845 | dprintk("RPC: %s: after wait, %sconnected\n", __func__, | ||
846 | (ep->rep_connected == 1) ? "still " : "dis"); | ||
847 | } else { | ||
848 | dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); | ||
849 | ep->rep_connected = rc; | ||
850 | } | ||
851 | return rc; | ||
852 | } | ||
853 | |||
854 | /* | ||
855 | * Initialize buffer memory | ||
856 | */ | ||
857 | int | ||
858 | rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | ||
859 | struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) | ||
860 | { | ||
861 | char *p; | ||
862 | size_t len; | ||
863 | int i, rc; | ||
864 | |||
865 | buf->rb_max_requests = cdata->max_requests; | ||
866 | spin_lock_init(&buf->rb_lock); | ||
867 | atomic_set(&buf->rb_credits, 1); | ||
868 | |||
869 | /* Need to allocate: | ||
870 | * 1. arrays for send and recv pointers | ||
871 | * 2. arrays of struct rpcrdma_req to fill in pointers | ||
872 | * 3. array of struct rpcrdma_rep for replies | ||
873 | * 4. padding, if any | ||
874 | * 5. mw's, if any | ||
875 | * Send/recv buffers in req/rep need to be registered | ||
876 | */ | ||
877 | |||
878 | len = buf->rb_max_requests * | ||
879 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); | ||
880 | len += cdata->padding; | ||
881 | switch (ia->ri_memreg_strategy) { | ||
882 | case RPCRDMA_MTHCAFMR: | ||
883 | /* TBD we are perhaps overallocating here */ | ||
884 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | ||
885 | sizeof(struct rpcrdma_mw); | ||
886 | break; | ||
887 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
888 | case RPCRDMA_MEMWINDOWS: | ||
889 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | ||
890 | sizeof(struct rpcrdma_mw); | ||
891 | break; | ||
892 | default: | ||
893 | break; | ||
894 | } | ||
895 | |||
896 | /* allocate 1, 4 and 5 in one shot */ | ||
897 | p = kzalloc(len, GFP_KERNEL); | ||
898 | if (p == NULL) { | ||
899 | dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", | ||
900 | __func__, len); | ||
901 | rc = -ENOMEM; | ||
902 | goto out; | ||
903 | } | ||
904 | buf->rb_pool = p; /* for freeing it later */ | ||
905 | |||
906 | buf->rb_send_bufs = (struct rpcrdma_req **) p; | ||
907 | p = (char *) &buf->rb_send_bufs[buf->rb_max_requests]; | ||
908 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; | ||
909 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; | ||
910 | |||
911 | /* | ||
912 | * Register the zeroed pad buffer, if any. | ||
913 | */ | ||
914 | if (cdata->padding) { | ||
915 | rc = rpcrdma_register_internal(ia, p, cdata->padding, | ||
916 | &ep->rep_pad_mr, &ep->rep_pad); | ||
917 | if (rc) | ||
918 | goto out; | ||
919 | } | ||
920 | p += cdata->padding; | ||
921 | |||
922 | /* | ||
923 | * Allocate the fmr's, or mw's for mw_bind chunk registration. | ||
924 | * We "cycle" the mw's in order to minimize rkey reuse, | ||
925 | * and also reduce unbind-to-bind collision. | ||
926 | */ | ||
927 | INIT_LIST_HEAD(&buf->rb_mws); | ||
928 | switch (ia->ri_memreg_strategy) { | ||
929 | case RPCRDMA_MTHCAFMR: | ||
930 | { | ||
931 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
932 | struct ib_fmr_attr fa = { | ||
933 | RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT | ||
934 | }; | ||
935 | /* TBD we are perhaps overallocating here */ | ||
936 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | ||
937 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, | ||
938 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, | ||
939 | &fa); | ||
940 | if (IS_ERR(r->r.fmr)) { | ||
941 | rc = PTR_ERR(r->r.fmr); | ||
942 | dprintk("RPC: %s: ib_alloc_fmr" | ||
943 | " failed %i\n", __func__, rc); | ||
944 | goto out; | ||
945 | } | ||
946 | list_add(&r->mw_list, &buf->rb_mws); | ||
947 | ++r; | ||
948 | } | ||
949 | } | ||
950 | break; | ||
951 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
952 | case RPCRDMA_MEMWINDOWS: | ||
953 | { | ||
954 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
955 | /* Allocate one extra request's worth, for full cycling */ | ||
956 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | ||
957 | r->r.mw = ib_alloc_mw(ia->ri_pd); | ||
958 | if (IS_ERR(r->r.mw)) { | ||
959 | rc = PTR_ERR(r->r.mw); | ||
960 | dprintk("RPC: %s: ib_alloc_mw" | ||
961 | " failed %i\n", __func__, rc); | ||
962 | goto out; | ||
963 | } | ||
964 | list_add(&r->mw_list, &buf->rb_mws); | ||
965 | ++r; | ||
966 | } | ||
967 | } | ||
968 | break; | ||
969 | default: | ||
970 | break; | ||
971 | } | ||
972 | |||
973 | /* | ||
974 | * Allocate/init the request/reply buffers. Doing this | ||
975 | * using kmalloc for now -- one for each buf. | ||
976 | */ | ||
977 | for (i = 0; i < buf->rb_max_requests; i++) { | ||
978 | struct rpcrdma_req *req; | ||
979 | struct rpcrdma_rep *rep; | ||
980 | |||
981 | len = cdata->inline_wsize + sizeof(struct rpcrdma_req); | ||
982 | /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ | ||
983 | /* Typical ~2400b, so rounding up saves work later */ | ||
984 | if (len < 4096) | ||
985 | len = 4096; | ||
986 | req = kmalloc(len, GFP_KERNEL); | ||
987 | if (req == NULL) { | ||
988 | dprintk("RPC: %s: request buffer %d alloc" | ||
989 | " failed\n", __func__, i); | ||
990 | rc = -ENOMEM; | ||
991 | goto out; | ||
992 | } | ||
993 | memset(req, 0, sizeof(struct rpcrdma_req)); | ||
994 | buf->rb_send_bufs[i] = req; | ||
995 | buf->rb_send_bufs[i]->rl_buffer = buf; | ||
996 | |||
997 | rc = rpcrdma_register_internal(ia, req->rl_base, | ||
998 | len - offsetof(struct rpcrdma_req, rl_base), | ||
999 | &buf->rb_send_bufs[i]->rl_handle, | ||
1000 | &buf->rb_send_bufs[i]->rl_iov); | ||
1001 | if (rc) | ||
1002 | goto out; | ||
1003 | |||
1004 | buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); | ||
1005 | |||
1006 | len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); | ||
1007 | rep = kmalloc(len, GFP_KERNEL); | ||
1008 | if (rep == NULL) { | ||
1009 | dprintk("RPC: %s: reply buffer %d alloc failed\n", | ||
1010 | __func__, i); | ||
1011 | rc = -ENOMEM; | ||
1012 | goto out; | ||
1013 | } | ||
1014 | memset(rep, 0, sizeof(struct rpcrdma_rep)); | ||
1015 | buf->rb_recv_bufs[i] = rep; | ||
1016 | buf->rb_recv_bufs[i]->rr_buffer = buf; | ||
1017 | init_waitqueue_head(&rep->rr_unbind); | ||
1018 | |||
1019 | rc = rpcrdma_register_internal(ia, rep->rr_base, | ||
1020 | len - offsetof(struct rpcrdma_rep, rr_base), | ||
1021 | &buf->rb_recv_bufs[i]->rr_handle, | ||
1022 | &buf->rb_recv_bufs[i]->rr_iov); | ||
1023 | if (rc) | ||
1024 | goto out; | ||
1025 | |||
1026 | } | ||
1027 | dprintk("RPC: %s: max_requests %d\n", | ||
1028 | __func__, buf->rb_max_requests); | ||
1029 | /* done */ | ||
1030 | return 0; | ||
1031 | out: | ||
1032 | rpcrdma_buffer_destroy(buf); | ||
1033 | return rc; | ||
1034 | } | ||
1035 | |||
1036 | /* | ||
1037 | * Unregister and destroy buffer memory. Need to deal with | ||
1038 | * partial initialization, so it's callable from failed create. | ||
1039 | * Must be called before destroying endpoint, as registrations | ||
1040 | * reference it. | ||
1041 | */ | ||
1042 | void | ||
1043 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | ||
1044 | { | ||
1045 | int rc, i; | ||
1046 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | ||
1047 | |||
1048 | /* clean up in reverse order from create | ||
1049 | * 1. recv mr memory (mr free, then kfree) | ||
1050 | * 1a. bind mw memory | ||
1051 | * 2. send mr memory (mr free, then kfree) | ||
1052 | * 3. padding (if any) [moved to rpcrdma_ep_destroy] | ||
1053 | * 4. arrays | ||
1054 | */ | ||
1055 | dprintk("RPC: %s: entering\n", __func__); | ||
1056 | |||
1057 | for (i = 0; i < buf->rb_max_requests; i++) { | ||
1058 | if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { | ||
1059 | rpcrdma_deregister_internal(ia, | ||
1060 | buf->rb_recv_bufs[i]->rr_handle, | ||
1061 | &buf->rb_recv_bufs[i]->rr_iov); | ||
1062 | kfree(buf->rb_recv_bufs[i]); | ||
1063 | } | ||
1064 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { | ||
1065 | while (!list_empty(&buf->rb_mws)) { | ||
1066 | struct rpcrdma_mw *r; | ||
1067 | r = list_entry(buf->rb_mws.next, | ||
1068 | struct rpcrdma_mw, mw_list); | ||
1069 | list_del(&r->mw_list); | ||
1070 | switch (ia->ri_memreg_strategy) { | ||
1071 | case RPCRDMA_MTHCAFMR: | ||
1072 | rc = ib_dealloc_fmr(r->r.fmr); | ||
1073 | if (rc) | ||
1074 | dprintk("RPC: %s:" | ||
1075 | " ib_dealloc_fmr" | ||
1076 | " failed %i\n", | ||
1077 | __func__, rc); | ||
1078 | break; | ||
1079 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1080 | case RPCRDMA_MEMWINDOWS: | ||
1081 | rc = ib_dealloc_mw(r->r.mw); | ||
1082 | if (rc) | ||
1083 | dprintk("RPC: %s:" | ||
1084 | " ib_dealloc_mw" | ||
1085 | " failed %i\n", | ||
1086 | __func__, rc); | ||
1087 | break; | ||
1088 | default: | ||
1089 | break; | ||
1090 | } | ||
1091 | } | ||
1092 | rpcrdma_deregister_internal(ia, | ||
1093 | buf->rb_send_bufs[i]->rl_handle, | ||
1094 | &buf->rb_send_bufs[i]->rl_iov); | ||
1095 | kfree(buf->rb_send_bufs[i]); | ||
1096 | } | ||
1097 | } | ||
1098 | |||
1099 | kfree(buf->rb_pool); | ||
1100 | } | ||
1101 | |||
1102 | /* | ||
1103 | * Get a set of request/reply buffers. | ||
1104 | * | ||
1105 | * Reply buffer (if needed) is attached to send buffer upon return. | ||
1106 | * Rule: | ||
1107 | * rb_send_index and rb_recv_index MUST always be pointing to the | ||
1108 | * *next* available buffer (non-NULL). They are incremented after | ||
1109 | * removing buffers, and decremented *before* returning them. | ||
1110 | */ | ||
1111 | struct rpcrdma_req * | ||
1112 | rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | ||
1113 | { | ||
1114 | struct rpcrdma_req *req; | ||
1115 | unsigned long flags; | ||
1116 | |||
1117 | spin_lock_irqsave(&buffers->rb_lock, flags); | ||
1118 | if (buffers->rb_send_index == buffers->rb_max_requests) { | ||
1119 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | ||
1120 | dprintk("RPC: %s: out of request buffers\n", __func__); | ||
1121 | return ((struct rpcrdma_req *)NULL); | ||
1122 | } | ||
1123 | |||
1124 | req = buffers->rb_send_bufs[buffers->rb_send_index]; | ||
1125 | if (buffers->rb_send_index < buffers->rb_recv_index) { | ||
1126 | dprintk("RPC: %s: %d extra receives outstanding (ok)\n", | ||
1127 | __func__, | ||
1128 | buffers->rb_recv_index - buffers->rb_send_index); | ||
1129 | req->rl_reply = NULL; | ||
1130 | } else { | ||
1131 | req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; | ||
1132 | buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; | ||
1133 | } | ||
1134 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; | ||
1135 | if (!list_empty(&buffers->rb_mws)) { | ||
1136 | int i = RPCRDMA_MAX_SEGS - 1; | ||
1137 | do { | ||
1138 | struct rpcrdma_mw *r; | ||
1139 | r = list_entry(buffers->rb_mws.next, | ||
1140 | struct rpcrdma_mw, mw_list); | ||
1141 | list_del(&r->mw_list); | ||
1142 | req->rl_segments[i].mr_chunk.rl_mw = r; | ||
1143 | } while (--i >= 0); | ||
1144 | } | ||
1145 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | ||
1146 | return req; | ||
1147 | } | ||
1148 | |||
1149 | /* | ||
1150 | * Put request/reply buffers back into pool. | ||
1151 | * Pre-decrement counter/array index. | ||
1152 | */ | ||
1153 | void | ||
1154 | rpcrdma_buffer_put(struct rpcrdma_req *req) | ||
1155 | { | ||
1156 | struct rpcrdma_buffer *buffers = req->rl_buffer; | ||
1157 | struct rpcrdma_ia *ia = rdmab_to_ia(buffers); | ||
1158 | int i; | ||
1159 | unsigned long flags; | ||
1160 | |||
1161 | BUG_ON(req->rl_nchunks != 0); | ||
1162 | spin_lock_irqsave(&buffers->rb_lock, flags); | ||
1163 | buffers->rb_send_bufs[--buffers->rb_send_index] = req; | ||
1164 | req->rl_niovs = 0; | ||
1165 | if (req->rl_reply) { | ||
1166 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; | ||
1167 | init_waitqueue_head(&req->rl_reply->rr_unbind); | ||
1168 | req->rl_reply->rr_func = NULL; | ||
1169 | req->rl_reply = NULL; | ||
1170 | } | ||
1171 | switch (ia->ri_memreg_strategy) { | ||
1172 | case RPCRDMA_MTHCAFMR: | ||
1173 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1174 | case RPCRDMA_MEMWINDOWS: | ||
1175 | /* | ||
1176 | * Cycle mw's back in reverse order, and "spin" them. | ||
1177 | * This delays and scrambles reuse as much as possible. | ||
1178 | */ | ||
1179 | i = 1; | ||
1180 | do { | ||
1181 | struct rpcrdma_mw **mw; | ||
1182 | mw = &req->rl_segments[i].mr_chunk.rl_mw; | ||
1183 | list_add_tail(&(*mw)->mw_list, &buffers->rb_mws); | ||
1184 | *mw = NULL; | ||
1185 | } while (++i < RPCRDMA_MAX_SEGS); | ||
1186 | list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list, | ||
1187 | &buffers->rb_mws); | ||
1188 | req->rl_segments[0].mr_chunk.rl_mw = NULL; | ||
1189 | break; | ||
1190 | default: | ||
1191 | break; | ||
1192 | } | ||
1193 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | ||
1194 | } | ||
1195 | |||
1196 | /* | ||
1197 | * Recover reply buffers from pool. | ||
1198 | * This happens when recovering from error conditions. | ||
1199 | * Post-increment counter/array index. | ||
1200 | */ | ||
1201 | void | ||
1202 | rpcrdma_recv_buffer_get(struct rpcrdma_req *req) | ||
1203 | { | ||
1204 | struct rpcrdma_buffer *buffers = req->rl_buffer; | ||
1205 | unsigned long flags; | ||
1206 | |||
1207 | if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */ | ||
1208 | buffers = ((struct rpcrdma_req *) buffers)->rl_buffer; | ||
1209 | spin_lock_irqsave(&buffers->rb_lock, flags); | ||
1210 | if (buffers->rb_recv_index < buffers->rb_max_requests) { | ||
1211 | req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; | ||
1212 | buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; | ||
1213 | } | ||
1214 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | ||
1215 | } | ||
1216 | |||
1217 | /* | ||
1218 | * Put reply buffers back into pool when not attached to | ||
1219 | * request. This happens in error conditions, and when | ||
1220 | * aborting unbinds. Pre-decrement counter/array index. | ||
1221 | */ | ||
1222 | void | ||
1223 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | ||
1224 | { | ||
1225 | struct rpcrdma_buffer *buffers = rep->rr_buffer; | ||
1226 | unsigned long flags; | ||
1227 | |||
1228 | rep->rr_func = NULL; | ||
1229 | spin_lock_irqsave(&buffers->rb_lock, flags); | ||
1230 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; | ||
1231 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | ||
1232 | } | ||
1233 | |||
1234 | /* | ||
1235 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. | ||
1236 | */ | ||
1237 | |||
1238 | int | ||
1239 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | ||
1240 | struct ib_mr **mrp, struct ib_sge *iov) | ||
1241 | { | ||
1242 | struct ib_phys_buf ipb; | ||
1243 | struct ib_mr *mr; | ||
1244 | int rc; | ||
1245 | |||
1246 | /* | ||
1247 | * All memory passed here was kmalloc'ed, therefore phys-contiguous. | ||
1248 | */ | ||
1249 | iov->addr = ib_dma_map_single(ia->ri_id->device, | ||
1250 | va, len, DMA_BIDIRECTIONAL); | ||
1251 | iov->length = len; | ||
1252 | |||
1253 | if (ia->ri_bind_mem != NULL) { | ||
1254 | *mrp = NULL; | ||
1255 | iov->lkey = ia->ri_bind_mem->lkey; | ||
1256 | return 0; | ||
1257 | } | ||
1258 | |||
1259 | ipb.addr = iov->addr; | ||
1260 | ipb.size = iov->length; | ||
1261 | mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1, | ||
1262 | IB_ACCESS_LOCAL_WRITE, &iov->addr); | ||
1263 | |||
1264 | dprintk("RPC: %s: phys convert: 0x%llx " | ||
1265 | "registered 0x%llx length %d\n", | ||
1266 | __func__, ipb.addr, iov->addr, len); | ||
1267 | |||
1268 | if (IS_ERR(mr)) { | ||
1269 | *mrp = NULL; | ||
1270 | rc = PTR_ERR(mr); | ||
1271 | dprintk("RPC: %s: failed with %i\n", __func__, rc); | ||
1272 | } else { | ||
1273 | *mrp = mr; | ||
1274 | iov->lkey = mr->lkey; | ||
1275 | rc = 0; | ||
1276 | } | ||
1277 | |||
1278 | return rc; | ||
1279 | } | ||
1280 | |||
1281 | int | ||
1282 | rpcrdma_deregister_internal(struct rpcrdma_ia *ia, | ||
1283 | struct ib_mr *mr, struct ib_sge *iov) | ||
1284 | { | ||
1285 | int rc; | ||
1286 | |||
1287 | ib_dma_unmap_single(ia->ri_id->device, | ||
1288 | iov->addr, iov->length, DMA_BIDIRECTIONAL); | ||
1289 | |||
1290 | if (NULL == mr) | ||
1291 | return 0; | ||
1292 | |||
1293 | rc = ib_dereg_mr(mr); | ||
1294 | if (rc) | ||
1295 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc); | ||
1296 | return rc; | ||
1297 | } | ||
1298 | |||
1299 | /* | ||
1300 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
1301 | */ | ||
1302 | |||
1303 | static void | ||
1304 | rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) | ||
1305 | { | ||
1306 | seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
1307 | seg->mr_dmalen = seg->mr_len; | ||
1308 | if (seg->mr_page) | ||
1309 | seg->mr_dma = ib_dma_map_page(ia->ri_id->device, | ||
1310 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
1311 | seg->mr_dmalen, seg->mr_dir); | ||
1312 | else | ||
1313 | seg->mr_dma = ib_dma_map_single(ia->ri_id->device, | ||
1314 | seg->mr_offset, | ||
1315 | seg->mr_dmalen, seg->mr_dir); | ||
1316 | } | ||
1317 | |||
1318 | static void | ||
1319 | rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | ||
1320 | { | ||
1321 | if (seg->mr_page) | ||
1322 | ib_dma_unmap_page(ia->ri_id->device, | ||
1323 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1324 | else | ||
1325 | ib_dma_unmap_single(ia->ri_id->device, | ||
1326 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1327 | } | ||
1328 | |||
1329 | int | ||
1330 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | ||
1331 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | ||
1332 | { | ||
1333 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
1334 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1335 | IB_ACCESS_REMOTE_READ); | ||
1336 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1337 | int i; | ||
1338 | int rc = 0; | ||
1339 | |||
1340 | switch (ia->ri_memreg_strategy) { | ||
1341 | |||
1342 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
1343 | case RPCRDMA_ALLPHYSICAL: | ||
1344 | rpcrdma_map_one(ia, seg, writing); | ||
1345 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
1346 | seg->mr_base = seg->mr_dma; | ||
1347 | seg->mr_nsegs = 1; | ||
1348 | nsegs = 1; | ||
1349 | break; | ||
1350 | #endif | ||
1351 | |||
1352 | /* Registration using fast memory registration */ | ||
1353 | case RPCRDMA_MTHCAFMR: | ||
1354 | { | ||
1355 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
1356 | int len, pageoff = offset_in_page(seg->mr_offset); | ||
1357 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1358 | seg1->mr_len += pageoff; | ||
1359 | len = -pageoff; | ||
1360 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1361 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1362 | for (i = 0; i < nsegs;) { | ||
1363 | rpcrdma_map_one(ia, seg, writing); | ||
1364 | physaddrs[i] = seg->mr_dma; | ||
1365 | len += seg->mr_len; | ||
1366 | ++seg; | ||
1367 | ++i; | ||
1368 | /* Check for holes */ | ||
1369 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1370 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1371 | break; | ||
1372 | } | ||
1373 | nsegs = i; | ||
1374 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
1375 | physaddrs, nsegs, seg1->mr_dma); | ||
1376 | if (rc) { | ||
1377 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
1378 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
1379 | len, (unsigned long long)seg1->mr_dma, | ||
1380 | pageoff, nsegs, rc); | ||
1381 | while (nsegs--) | ||
1382 | rpcrdma_unmap_one(ia, --seg); | ||
1383 | } else { | ||
1384 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
1385 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1386 | seg1->mr_nsegs = nsegs; | ||
1387 | seg1->mr_len = len; | ||
1388 | } | ||
1389 | } | ||
1390 | break; | ||
1391 | |||
1392 | /* Registration using memory windows */ | ||
1393 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1394 | case RPCRDMA_MEMWINDOWS: | ||
1395 | { | ||
1396 | struct ib_mw_bind param; | ||
1397 | rpcrdma_map_one(ia, seg, writing); | ||
1398 | param.mr = ia->ri_bind_mem; | ||
1399 | param.wr_id = 0ULL; /* no send cookie */ | ||
1400 | param.addr = seg->mr_dma; | ||
1401 | param.length = seg->mr_len; | ||
1402 | param.send_flags = 0; | ||
1403 | param.mw_access_flags = mem_priv; | ||
1404 | |||
1405 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1406 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1407 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1408 | if (rc) { | ||
1409 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1410 | "%u@0x%llx status %i\n", | ||
1411 | __func__, seg->mr_len, | ||
1412 | (unsigned long long)seg->mr_dma, rc); | ||
1413 | rpcrdma_unmap_one(ia, seg); | ||
1414 | } else { | ||
1415 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1416 | seg->mr_base = param.addr; | ||
1417 | seg->mr_nsegs = 1; | ||
1418 | nsegs = 1; | ||
1419 | } | ||
1420 | } | ||
1421 | break; | ||
1422 | |||
1423 | /* Default registration each time */ | ||
1424 | default: | ||
1425 | { | ||
1426 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1427 | int len = 0; | ||
1428 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1429 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1430 | for (i = 0; i < nsegs;) { | ||
1431 | rpcrdma_map_one(ia, seg, writing); | ||
1432 | ipb[i].addr = seg->mr_dma; | ||
1433 | ipb[i].size = seg->mr_len; | ||
1434 | len += seg->mr_len; | ||
1435 | ++seg; | ||
1436 | ++i; | ||
1437 | /* Check for holes */ | ||
1438 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1439 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1440 | break; | ||
1441 | } | ||
1442 | nsegs = i; | ||
1443 | seg1->mr_base = seg1->mr_dma; | ||
1444 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1445 | ipb, nsegs, mem_priv, &seg1->mr_base); | ||
1446 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1447 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1448 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1449 | "%u@0x%llx (%d)... status %i\n", | ||
1450 | __func__, len, | ||
1451 | (unsigned long long)seg1->mr_dma, nsegs, rc); | ||
1452 | while (nsegs--) | ||
1453 | rpcrdma_unmap_one(ia, --seg); | ||
1454 | } else { | ||
1455 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1456 | seg1->mr_nsegs = nsegs; | ||
1457 | seg1->mr_len = len; | ||
1458 | } | ||
1459 | } | ||
1460 | break; | ||
1461 | } | ||
1462 | if (rc) | ||
1463 | return -1; | ||
1464 | |||
1465 | return nsegs; | ||
1466 | } | ||
1467 | |||
1468 | int | ||
1469 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | ||
1470 | struct rpcrdma_xprt *r_xprt, void *r) | ||
1471 | { | ||
1472 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
1473 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1474 | int nsegs = seg->mr_nsegs, rc; | ||
1475 | |||
1476 | switch (ia->ri_memreg_strategy) { | ||
1477 | |||
1478 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
1479 | case RPCRDMA_ALLPHYSICAL: | ||
1480 | BUG_ON(nsegs != 1); | ||
1481 | rpcrdma_unmap_one(ia, seg); | ||
1482 | rc = 0; | ||
1483 | break; | ||
1484 | #endif | ||
1485 | |||
1486 | case RPCRDMA_MTHCAFMR: | ||
1487 | { | ||
1488 | LIST_HEAD(l); | ||
1489 | list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); | ||
1490 | rc = ib_unmap_fmr(&l); | ||
1491 | while (seg1->mr_nsegs--) | ||
1492 | rpcrdma_unmap_one(ia, seg++); | ||
1493 | } | ||
1494 | if (rc) | ||
1495 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
1496 | " status %i\n", __func__, rc); | ||
1497 | break; | ||
1498 | |||
1499 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1500 | case RPCRDMA_MEMWINDOWS: | ||
1501 | { | ||
1502 | struct ib_mw_bind param; | ||
1503 | BUG_ON(nsegs != 1); | ||
1504 | param.mr = ia->ri_bind_mem; | ||
1505 | param.addr = 0ULL; /* unbind */ | ||
1506 | param.length = 0; | ||
1507 | param.mw_access_flags = 0; | ||
1508 | if (r) { | ||
1509 | param.wr_id = (u64) (unsigned long) r; | ||
1510 | param.send_flags = IB_SEND_SIGNALED; | ||
1511 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1512 | } else { | ||
1513 | param.wr_id = 0ULL; | ||
1514 | param.send_flags = 0; | ||
1515 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1516 | } | ||
1517 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1518 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1519 | rpcrdma_unmap_one(ia, seg); | ||
1520 | } | ||
1521 | if (rc) | ||
1522 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1523 | " status %i\n", __func__, rc); | ||
1524 | else | ||
1525 | r = NULL; /* will upcall on completion */ | ||
1526 | break; | ||
1527 | |||
1528 | default: | ||
1529 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | ||
1530 | seg1->mr_chunk.rl_mr = NULL; | ||
1531 | while (seg1->mr_nsegs--) | ||
1532 | rpcrdma_unmap_one(ia, seg++); | ||
1533 | if (rc) | ||
1534 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1535 | " status %i\n", __func__, rc); | ||
1536 | break; | ||
1537 | } | ||
1538 | if (r) { | ||
1539 | struct rpcrdma_rep *rep = r; | ||
1540 | void (*func)(struct rpcrdma_rep *) = rep->rr_func; | ||
1541 | rep->rr_func = NULL; | ||
1542 | func(rep); /* dereg done, callback now */ | ||
1543 | } | ||
1544 | return nsegs; | ||
1545 | } | ||
1546 | |||
1547 | /* | ||
1548 | * Prepost any receive buffer, then post send. | ||
1549 | * | ||
1550 | * Receive buffer is donated to hardware, reclaimed upon recv completion. | ||
1551 | */ | ||
1552 | int | ||
1553 | rpcrdma_ep_post(struct rpcrdma_ia *ia, | ||
1554 | struct rpcrdma_ep *ep, | ||
1555 | struct rpcrdma_req *req) | ||
1556 | { | ||
1557 | struct ib_send_wr send_wr, *send_wr_fail; | ||
1558 | struct rpcrdma_rep *rep = req->rl_reply; | ||
1559 | int rc; | ||
1560 | |||
1561 | if (rep) { | ||
1562 | rc = rpcrdma_ep_post_recv(ia, ep, rep); | ||
1563 | if (rc) | ||
1564 | goto out; | ||
1565 | req->rl_reply = NULL; | ||
1566 | } | ||
1567 | |||
1568 | send_wr.next = NULL; | ||
1569 | send_wr.wr_id = 0ULL; /* no send cookie */ | ||
1570 | send_wr.sg_list = req->rl_send_iov; | ||
1571 | send_wr.num_sge = req->rl_niovs; | ||
1572 | send_wr.opcode = IB_WR_SEND; | ||
1573 | send_wr.imm_data = 0; | ||
1574 | if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ | ||
1575 | ib_dma_sync_single_for_device(ia->ri_id->device, | ||
1576 | req->rl_send_iov[3].addr, req->rl_send_iov[3].length, | ||
1577 | DMA_TO_DEVICE); | ||
1578 | ib_dma_sync_single_for_device(ia->ri_id->device, | ||
1579 | req->rl_send_iov[1].addr, req->rl_send_iov[1].length, | ||
1580 | DMA_TO_DEVICE); | ||
1581 | ib_dma_sync_single_for_device(ia->ri_id->device, | ||
1582 | req->rl_send_iov[0].addr, req->rl_send_iov[0].length, | ||
1583 | DMA_TO_DEVICE); | ||
1584 | |||
1585 | if (DECR_CQCOUNT(ep) > 0) | ||
1586 | send_wr.send_flags = 0; | ||
1587 | else { /* Provider must take a send completion every now and then */ | ||
1588 | INIT_CQCOUNT(ep); | ||
1589 | send_wr.send_flags = IB_SEND_SIGNALED; | ||
1590 | } | ||
1591 | |||
1592 | rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); | ||
1593 | if (rc) | ||
1594 | dprintk("RPC: %s: ib_post_send returned %i\n", __func__, | ||
1595 | rc); | ||
1596 | out: | ||
1597 | return rc; | ||
1598 | } | ||
1599 | |||
1600 | /* | ||
1601 | * (Re)post a receive buffer. | ||
1602 | */ | ||
1603 | int | ||
1604 | rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | ||
1605 | struct rpcrdma_ep *ep, | ||
1606 | struct rpcrdma_rep *rep) | ||
1607 | { | ||
1608 | struct ib_recv_wr recv_wr, *recv_wr_fail; | ||
1609 | int rc; | ||
1610 | |||
1611 | recv_wr.next = NULL; | ||
1612 | recv_wr.wr_id = (u64) (unsigned long) rep; | ||
1613 | recv_wr.sg_list = &rep->rr_iov; | ||
1614 | recv_wr.num_sge = 1; | ||
1615 | |||
1616 | ib_dma_sync_single_for_cpu(ia->ri_id->device, | ||
1617 | rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); | ||
1618 | |||
1619 | DECR_CQCOUNT(ep); | ||
1620 | rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); | ||
1621 | |||
1622 | if (rc) | ||
1623 | dprintk("RPC: %s: ib_post_recv returned %i\n", __func__, | ||
1624 | rc); | ||
1625 | return rc; | ||
1626 | } | ||
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h new file mode 100644 index 000000000000..2427822f8bd4 --- /dev/null +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -0,0 +1,330 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | #ifndef _LINUX_SUNRPC_XPRT_RDMA_H | ||
41 | #define _LINUX_SUNRPC_XPRT_RDMA_H | ||
42 | |||
43 | #include <linux/wait.h> /* wait_queue_head_t, etc */ | ||
44 | #include <linux/spinlock.h> /* spinlock_t, etc */ | ||
45 | #include <asm/atomic.h> /* atomic_t, etc */ | ||
46 | |||
47 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | ||
48 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | ||
49 | |||
50 | #include <linux/sunrpc/clnt.h> /* rpc_xprt */ | ||
51 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | ||
52 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | ||
53 | |||
54 | /* | ||
55 | * Interface Adapter -- one per transport instance | ||
56 | */ | ||
57 | struct rpcrdma_ia { | ||
58 | struct rdma_cm_id *ri_id; | ||
59 | struct ib_pd *ri_pd; | ||
60 | struct ib_mr *ri_bind_mem; | ||
61 | struct completion ri_done; | ||
62 | int ri_async_rc; | ||
63 | enum rpcrdma_memreg ri_memreg_strategy; | ||
64 | }; | ||
65 | |||
66 | /* | ||
67 | * RDMA Endpoint -- one per transport instance | ||
68 | */ | ||
69 | |||
70 | struct rpcrdma_ep { | ||
71 | atomic_t rep_cqcount; | ||
72 | int rep_cqinit; | ||
73 | int rep_connected; | ||
74 | struct rpcrdma_ia *rep_ia; | ||
75 | struct ib_cq *rep_cq; | ||
76 | struct ib_qp_init_attr rep_attr; | ||
77 | wait_queue_head_t rep_connect_wait; | ||
78 | struct ib_sge rep_pad; /* holds zeroed pad */ | ||
79 | struct ib_mr *rep_pad_mr; /* holds zeroed pad */ | ||
80 | void (*rep_func)(struct rpcrdma_ep *); | ||
81 | struct rpc_xprt *rep_xprt; /* for rep_func */ | ||
82 | struct rdma_conn_param rep_remote_cma; | ||
83 | struct sockaddr_storage rep_remote_addr; | ||
84 | }; | ||
85 | |||
86 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | ||
87 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | ||
88 | |||
89 | /* | ||
90 | * struct rpcrdma_rep -- this structure encapsulates state required to recv | ||
91 | * and complete a reply, asychronously. It needs several pieces of | ||
92 | * state: | ||
93 | * o recv buffer (posted to provider) | ||
94 | * o ib_sge (also donated to provider) | ||
95 | * o status of reply (length, success or not) | ||
96 | * o bookkeeping state to get run by tasklet (list, etc) | ||
97 | * | ||
98 | * These are allocated during initialization, per-transport instance; | ||
99 | * however, the tasklet execution list itself is global, as it should | ||
100 | * always be pretty short. | ||
101 | * | ||
102 | * N of these are associated with a transport instance, and stored in | ||
103 | * struct rpcrdma_buffer. N is the max number of outstanding requests. | ||
104 | */ | ||
105 | |||
106 | /* temporary static scatter/gather max */ | ||
107 | #define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ | ||
108 | #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ | ||
109 | #define MAX_RPCRDMAHDR (\ | ||
110 | /* max supported RPC/RDMA header */ \ | ||
111 | sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \ | ||
112 | (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32)) | ||
113 | |||
114 | struct rpcrdma_buffer; | ||
115 | |||
116 | struct rpcrdma_rep { | ||
117 | unsigned int rr_len; /* actual received reply length */ | ||
118 | struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ | ||
119 | struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ | ||
120 | void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ | ||
121 | struct list_head rr_list; /* tasklet list */ | ||
122 | wait_queue_head_t rr_unbind; /* optional unbind wait */ | ||
123 | struct ib_sge rr_iov; /* for posting */ | ||
124 | struct ib_mr *rr_handle; /* handle for mem in rr_iov */ | ||
125 | char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ | ||
126 | }; | ||
127 | |||
128 | /* | ||
129 | * struct rpcrdma_req -- structure central to the request/reply sequence. | ||
130 | * | ||
131 | * N of these are associated with a transport instance, and stored in | ||
132 | * struct rpcrdma_buffer. N is the max number of outstanding requests. | ||
133 | * | ||
134 | * It includes pre-registered buffer memory for send AND recv. | ||
135 | * The recv buffer, however, is not owned by this structure, and | ||
136 | * is "donated" to the hardware when a recv is posted. When a | ||
137 | * reply is handled, the recv buffer used is given back to the | ||
138 | * struct rpcrdma_req associated with the request. | ||
139 | * | ||
140 | * In addition to the basic memory, this structure includes an array | ||
141 | * of iovs for send operations. The reason is that the iovs passed to | ||
142 | * ib_post_{send,recv} must not be modified until the work request | ||
143 | * completes. | ||
144 | * | ||
145 | * NOTES: | ||
146 | * o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we | ||
147 | * marshal. The number needed varies depending on the iov lists that | ||
148 | * are passed to us, the memory registration mode we are in, and if | ||
149 | * physical addressing is used, the layout. | ||
150 | */ | ||
151 | |||
152 | struct rpcrdma_mr_seg { /* chunk descriptors */ | ||
153 | union { /* chunk memory handles */ | ||
154 | struct ib_mr *rl_mr; /* if registered directly */ | ||
155 | struct rpcrdma_mw { /* if registered from region */ | ||
156 | union { | ||
157 | struct ib_mw *mw; | ||
158 | struct ib_fmr *fmr; | ||
159 | } r; | ||
160 | struct list_head mw_list; | ||
161 | } *rl_mw; | ||
162 | } mr_chunk; | ||
163 | u64 mr_base; /* registration result */ | ||
164 | u32 mr_rkey; /* registration result */ | ||
165 | u32 mr_len; /* length of chunk or segment */ | ||
166 | int mr_nsegs; /* number of segments in chunk or 0 */ | ||
167 | enum dma_data_direction mr_dir; /* segment mapping direction */ | ||
168 | dma_addr_t mr_dma; /* segment mapping address */ | ||
169 | size_t mr_dmalen; /* segment mapping length */ | ||
170 | struct page *mr_page; /* owning page, if any */ | ||
171 | char *mr_offset; /* kva if no page, else offset */ | ||
172 | }; | ||
173 | |||
174 | struct rpcrdma_req { | ||
175 | size_t rl_size; /* actual length of buffer */ | ||
176 | unsigned int rl_niovs; /* 0, 2 or 4 */ | ||
177 | unsigned int rl_nchunks; /* non-zero if chunks */ | ||
178 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | ||
179 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | ||
180 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ | ||
181 | struct ib_sge rl_send_iov[4]; /* for active requests */ | ||
182 | struct ib_sge rl_iov; /* for posting */ | ||
183 | struct ib_mr *rl_handle; /* handle for mem in rl_iov */ | ||
184 | char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ | ||
185 | __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */ | ||
186 | }; | ||
187 | #define rpcr_to_rdmar(r) \ | ||
188 | container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) | ||
189 | |||
190 | /* | ||
191 | * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for | ||
192 | * inline requests/replies, and client/server credits. | ||
193 | * | ||
194 | * One of these is associated with a transport instance | ||
195 | */ | ||
196 | struct rpcrdma_buffer { | ||
197 | spinlock_t rb_lock; /* protects indexes */ | ||
198 | atomic_t rb_credits; /* most recent server credits */ | ||
199 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ | ||
200 | int rb_max_requests;/* client max requests */ | ||
201 | struct list_head rb_mws; /* optional memory windows/fmrs */ | ||
202 | int rb_send_index; | ||
203 | struct rpcrdma_req **rb_send_bufs; | ||
204 | int rb_recv_index; | ||
205 | struct rpcrdma_rep **rb_recv_bufs; | ||
206 | char *rb_pool; | ||
207 | }; | ||
208 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) | ||
209 | |||
210 | /* | ||
211 | * Internal structure for transport instance creation. This | ||
212 | * exists primarily for modularity. | ||
213 | * | ||
214 | * This data should be set with mount options | ||
215 | */ | ||
216 | struct rpcrdma_create_data_internal { | ||
217 | struct sockaddr_storage addr; /* RDMA server address */ | ||
218 | unsigned int max_requests; /* max requests (slots) in flight */ | ||
219 | unsigned int rsize; /* mount rsize - max read hdr+data */ | ||
220 | unsigned int wsize; /* mount wsize - max write hdr+data */ | ||
221 | unsigned int inline_rsize; /* max non-rdma read data payload */ | ||
222 | unsigned int inline_wsize; /* max non-rdma write data payload */ | ||
223 | unsigned int padding; /* non-rdma write header padding */ | ||
224 | }; | ||
225 | |||
226 | #define RPCRDMA_INLINE_READ_THRESHOLD(rq) \ | ||
227 | (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize) | ||
228 | |||
229 | #define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\ | ||
230 | (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize) | ||
231 | |||
232 | #define RPCRDMA_INLINE_PAD_VALUE(rq)\ | ||
233 | rpcx_to_rdmad(rq->rq_task->tk_xprt).padding | ||
234 | |||
235 | /* | ||
236 | * Statistics for RPCRDMA | ||
237 | */ | ||
238 | struct rpcrdma_stats { | ||
239 | unsigned long read_chunk_count; | ||
240 | unsigned long write_chunk_count; | ||
241 | unsigned long reply_chunk_count; | ||
242 | |||
243 | unsigned long long total_rdma_request; | ||
244 | unsigned long long total_rdma_reply; | ||
245 | |||
246 | unsigned long long pullup_copy_count; | ||
247 | unsigned long long fixup_copy_count; | ||
248 | unsigned long hardway_register_count; | ||
249 | unsigned long failed_marshal_count; | ||
250 | unsigned long bad_reply_count; | ||
251 | }; | ||
252 | |||
253 | /* | ||
254 | * RPCRDMA transport -- encapsulates the structures above for | ||
255 | * integration with RPC. | ||
256 | * | ||
257 | * The contained structures are embedded, not pointers, | ||
258 | * for convenience. This structure need not be visible externally. | ||
259 | * | ||
260 | * It is allocated and initialized during mount, and released | ||
261 | * during unmount. | ||
262 | */ | ||
263 | struct rpcrdma_xprt { | ||
264 | struct rpc_xprt xprt; | ||
265 | struct rpcrdma_ia rx_ia; | ||
266 | struct rpcrdma_ep rx_ep; | ||
267 | struct rpcrdma_buffer rx_buf; | ||
268 | struct rpcrdma_create_data_internal rx_data; | ||
269 | struct delayed_work rdma_connect; | ||
270 | struct rpcrdma_stats rx_stats; | ||
271 | }; | ||
272 | |||
273 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) | ||
274 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) | ||
275 | |||
276 | /* | ||
277 | * Interface Adapter calls - xprtrdma/verbs.c | ||
278 | */ | ||
279 | int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); | ||
280 | void rpcrdma_ia_close(struct rpcrdma_ia *); | ||
281 | |||
282 | /* | ||
283 | * Endpoint calls - xprtrdma/verbs.c | ||
284 | */ | ||
285 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, | ||
286 | struct rpcrdma_create_data_internal *); | ||
287 | int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); | ||
288 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); | ||
289 | int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); | ||
290 | |||
291 | int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, | ||
292 | struct rpcrdma_req *); | ||
293 | int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, | ||
294 | struct rpcrdma_rep *); | ||
295 | |||
296 | /* | ||
297 | * Buffer calls - xprtrdma/verbs.c | ||
298 | */ | ||
299 | int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, | ||
300 | struct rpcrdma_ia *, | ||
301 | struct rpcrdma_create_data_internal *); | ||
302 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); | ||
303 | |||
304 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); | ||
305 | void rpcrdma_buffer_put(struct rpcrdma_req *); | ||
306 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | ||
307 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | ||
308 | |||
309 | int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int, | ||
310 | struct ib_mr **, struct ib_sge *); | ||
311 | int rpcrdma_deregister_internal(struct rpcrdma_ia *, | ||
312 | struct ib_mr *, struct ib_sge *); | ||
313 | |||
314 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, | ||
315 | int, int, struct rpcrdma_xprt *); | ||
316 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, | ||
317 | struct rpcrdma_xprt *, void *); | ||
318 | |||
319 | /* | ||
320 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | ||
321 | */ | ||
322 | void rpcrdma_conn_func(struct rpcrdma_ep *); | ||
323 | void rpcrdma_reply_handler(struct rpcrdma_rep *); | ||
324 | |||
325 | /* | ||
326 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | ||
327 | */ | ||
328 | int rpcrdma_marshal_req(struct rpc_rqst *); | ||
329 | |||
330 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ | ||
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 282efd447a61..02298f529dad 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -13,10 +13,14 @@ | |||
13 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> | 13 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> |
14 | * | 14 | * |
15 | * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> | 15 | * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> |
16 | * | ||
17 | * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. | ||
18 | * <gilles.quillard@bull.net> | ||
16 | */ | 19 | */ |
17 | 20 | ||
18 | #include <linux/types.h> | 21 | #include <linux/types.h> |
19 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/module.h> | ||
20 | #include <linux/capability.h> | 24 | #include <linux/capability.h> |
21 | #include <linux/pagemap.h> | 25 | #include <linux/pagemap.h> |
22 | #include <linux/errno.h> | 26 | #include <linux/errno.h> |
@@ -28,6 +32,7 @@ | |||
28 | #include <linux/tcp.h> | 32 | #include <linux/tcp.h> |
29 | #include <linux/sunrpc/clnt.h> | 33 | #include <linux/sunrpc/clnt.h> |
30 | #include <linux/sunrpc/sched.h> | 34 | #include <linux/sunrpc/sched.h> |
35 | #include <linux/sunrpc/xprtsock.h> | ||
31 | #include <linux/file.h> | 36 | #include <linux/file.h> |
32 | 37 | ||
33 | #include <net/sock.h> | 38 | #include <net/sock.h> |
@@ -260,14 +265,29 @@ struct sock_xprt { | |||
260 | #define TCP_RCV_COPY_XID (1UL << 2) | 265 | #define TCP_RCV_COPY_XID (1UL << 2) |
261 | #define TCP_RCV_COPY_DATA (1UL << 3) | 266 | #define TCP_RCV_COPY_DATA (1UL << 3) |
262 | 267 | ||
263 | static void xs_format_peer_addresses(struct rpc_xprt *xprt) | 268 | static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) |
269 | { | ||
270 | return (struct sockaddr *) &xprt->addr; | ||
271 | } | ||
272 | |||
273 | static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) | ||
264 | { | 274 | { |
265 | struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; | 275 | return (struct sockaddr_in *) &xprt->addr; |
276 | } | ||
277 | |||
278 | static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) | ||
279 | { | ||
280 | return (struct sockaddr_in6 *) &xprt->addr; | ||
281 | } | ||
282 | |||
283 | static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) | ||
284 | { | ||
285 | struct sockaddr_in *addr = xs_addr_in(xprt); | ||
266 | char *buf; | 286 | char *buf; |
267 | 287 | ||
268 | buf = kzalloc(20, GFP_KERNEL); | 288 | buf = kzalloc(20, GFP_KERNEL); |
269 | if (buf) { | 289 | if (buf) { |
270 | snprintf(buf, 20, "%u.%u.%u.%u", | 290 | snprintf(buf, 20, NIPQUAD_FMT, |
271 | NIPQUAD(addr->sin_addr.s_addr)); | 291 | NIPQUAD(addr->sin_addr.s_addr)); |
272 | } | 292 | } |
273 | xprt->address_strings[RPC_DISPLAY_ADDR] = buf; | 293 | xprt->address_strings[RPC_DISPLAY_ADDR] = buf; |
@@ -279,26 +299,123 @@ static void xs_format_peer_addresses(struct rpc_xprt *xprt) | |||
279 | } | 299 | } |
280 | xprt->address_strings[RPC_DISPLAY_PORT] = buf; | 300 | xprt->address_strings[RPC_DISPLAY_PORT] = buf; |
281 | 301 | ||
282 | if (xprt->prot == IPPROTO_UDP) | 302 | buf = kzalloc(8, GFP_KERNEL); |
283 | xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; | 303 | if (buf) { |
284 | else | 304 | if (xprt->prot == IPPROTO_UDP) |
285 | xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; | 305 | snprintf(buf, 8, "udp"); |
306 | else | ||
307 | snprintf(buf, 8, "tcp"); | ||
308 | } | ||
309 | xprt->address_strings[RPC_DISPLAY_PROTO] = buf; | ||
286 | 310 | ||
287 | buf = kzalloc(48, GFP_KERNEL); | 311 | buf = kzalloc(48, GFP_KERNEL); |
288 | if (buf) { | 312 | if (buf) { |
289 | snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", | 313 | snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", |
290 | NIPQUAD(addr->sin_addr.s_addr), | 314 | NIPQUAD(addr->sin_addr.s_addr), |
291 | ntohs(addr->sin_port), | 315 | ntohs(addr->sin_port), |
292 | xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); | 316 | xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); |
293 | } | 317 | } |
294 | xprt->address_strings[RPC_DISPLAY_ALL] = buf; | 318 | xprt->address_strings[RPC_DISPLAY_ALL] = buf; |
319 | |||
320 | buf = kzalloc(10, GFP_KERNEL); | ||
321 | if (buf) { | ||
322 | snprintf(buf, 10, "%02x%02x%02x%02x", | ||
323 | NIPQUAD(addr->sin_addr.s_addr)); | ||
324 | } | ||
325 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; | ||
326 | |||
327 | buf = kzalloc(8, GFP_KERNEL); | ||
328 | if (buf) { | ||
329 | snprintf(buf, 8, "%4hx", | ||
330 | ntohs(addr->sin_port)); | ||
331 | } | ||
332 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; | ||
333 | |||
334 | buf = kzalloc(30, GFP_KERNEL); | ||
335 | if (buf) { | ||
336 | snprintf(buf, 30, NIPQUAD_FMT".%u.%u", | ||
337 | NIPQUAD(addr->sin_addr.s_addr), | ||
338 | ntohs(addr->sin_port) >> 8, | ||
339 | ntohs(addr->sin_port) & 0xff); | ||
340 | } | ||
341 | xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; | ||
342 | |||
343 | xprt->address_strings[RPC_DISPLAY_NETID] = | ||
344 | kstrdup(xprt->prot == IPPROTO_UDP ? | ||
345 | RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL); | ||
346 | } | ||
347 | |||
348 | static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) | ||
349 | { | ||
350 | struct sockaddr_in6 *addr = xs_addr_in6(xprt); | ||
351 | char *buf; | ||
352 | |||
353 | buf = kzalloc(40, GFP_KERNEL); | ||
354 | if (buf) { | ||
355 | snprintf(buf, 40, NIP6_FMT, | ||
356 | NIP6(addr->sin6_addr)); | ||
357 | } | ||
358 | xprt->address_strings[RPC_DISPLAY_ADDR] = buf; | ||
359 | |||
360 | buf = kzalloc(8, GFP_KERNEL); | ||
361 | if (buf) { | ||
362 | snprintf(buf, 8, "%u", | ||
363 | ntohs(addr->sin6_port)); | ||
364 | } | ||
365 | xprt->address_strings[RPC_DISPLAY_PORT] = buf; | ||
366 | |||
367 | buf = kzalloc(8, GFP_KERNEL); | ||
368 | if (buf) { | ||
369 | if (xprt->prot == IPPROTO_UDP) | ||
370 | snprintf(buf, 8, "udp"); | ||
371 | else | ||
372 | snprintf(buf, 8, "tcp"); | ||
373 | } | ||
374 | xprt->address_strings[RPC_DISPLAY_PROTO] = buf; | ||
375 | |||
376 | buf = kzalloc(64, GFP_KERNEL); | ||
377 | if (buf) { | ||
378 | snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", | ||
379 | NIP6(addr->sin6_addr), | ||
380 | ntohs(addr->sin6_port), | ||
381 | xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); | ||
382 | } | ||
383 | xprt->address_strings[RPC_DISPLAY_ALL] = buf; | ||
384 | |||
385 | buf = kzalloc(36, GFP_KERNEL); | ||
386 | if (buf) { | ||
387 | snprintf(buf, 36, NIP6_SEQFMT, | ||
388 | NIP6(addr->sin6_addr)); | ||
389 | } | ||
390 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; | ||
391 | |||
392 | buf = kzalloc(8, GFP_KERNEL); | ||
393 | if (buf) { | ||
394 | snprintf(buf, 8, "%4hx", | ||
395 | ntohs(addr->sin6_port)); | ||
396 | } | ||
397 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; | ||
398 | |||
399 | buf = kzalloc(50, GFP_KERNEL); | ||
400 | if (buf) { | ||
401 | snprintf(buf, 50, NIP6_FMT".%u.%u", | ||
402 | NIP6(addr->sin6_addr), | ||
403 | ntohs(addr->sin6_port) >> 8, | ||
404 | ntohs(addr->sin6_port) & 0xff); | ||
405 | } | ||
406 | xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; | ||
407 | |||
408 | xprt->address_strings[RPC_DISPLAY_NETID] = | ||
409 | kstrdup(xprt->prot == IPPROTO_UDP ? | ||
410 | RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL); | ||
295 | } | 411 | } |
296 | 412 | ||
297 | static void xs_free_peer_addresses(struct rpc_xprt *xprt) | 413 | static void xs_free_peer_addresses(struct rpc_xprt *xprt) |
298 | { | 414 | { |
299 | kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); | 415 | int i; |
300 | kfree(xprt->address_strings[RPC_DISPLAY_PORT]); | 416 | |
301 | kfree(xprt->address_strings[RPC_DISPLAY_ALL]); | 417 | for (i = 0; i < RPC_DISPLAY_MAX; i++) |
418 | kfree(xprt->address_strings[i]); | ||
302 | } | 419 | } |
303 | 420 | ||
304 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) | 421 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) |
@@ -463,19 +580,20 @@ static int xs_udp_send_request(struct rpc_task *task) | |||
463 | 580 | ||
464 | req->rq_xtime = jiffies; | 581 | req->rq_xtime = jiffies; |
465 | status = xs_sendpages(transport->sock, | 582 | status = xs_sendpages(transport->sock, |
466 | (struct sockaddr *) &xprt->addr, | 583 | xs_addr(xprt), |
467 | xprt->addrlen, xdr, | 584 | xprt->addrlen, xdr, |
468 | req->rq_bytes_sent); | 585 | req->rq_bytes_sent); |
469 | 586 | ||
470 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", | 587 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", |
471 | xdr->len - req->rq_bytes_sent, status); | 588 | xdr->len - req->rq_bytes_sent, status); |
472 | 589 | ||
473 | if (likely(status >= (int) req->rq_slen)) | 590 | if (status >= 0) { |
474 | return 0; | 591 | task->tk_bytes_sent += status; |
475 | 592 | if (status >= req->rq_slen) | |
476 | /* Still some bytes left; set up for a retry later. */ | 593 | return 0; |
477 | if (status > 0) | 594 | /* Still some bytes left; set up for a retry later. */ |
478 | status = -EAGAIN; | 595 | status = -EAGAIN; |
596 | } | ||
479 | 597 | ||
480 | switch (status) { | 598 | switch (status) { |
481 | case -ENETUNREACH: | 599 | case -ENETUNREACH: |
@@ -523,7 +641,8 @@ static int xs_tcp_send_request(struct rpc_task *task) | |||
523 | struct rpc_xprt *xprt = req->rq_xprt; | 641 | struct rpc_xprt *xprt = req->rq_xprt; |
524 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | 642 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
525 | struct xdr_buf *xdr = &req->rq_snd_buf; | 643 | struct xdr_buf *xdr = &req->rq_snd_buf; |
526 | int status, retry = 0; | 644 | int status; |
645 | unsigned int retry = 0; | ||
527 | 646 | ||
528 | xs_encode_tcp_record_marker(&req->rq_snd_buf); | 647 | xs_encode_tcp_record_marker(&req->rq_snd_buf); |
529 | 648 | ||
@@ -661,6 +780,7 @@ static void xs_destroy(struct rpc_xprt *xprt) | |||
661 | xs_free_peer_addresses(xprt); | 780 | xs_free_peer_addresses(xprt); |
662 | kfree(xprt->slot); | 781 | kfree(xprt->slot); |
663 | kfree(xprt); | 782 | kfree(xprt); |
783 | module_put(THIS_MODULE); | ||
664 | } | 784 | } |
665 | 785 | ||
666 | static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) | 786 | static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) |
@@ -1139,14 +1259,23 @@ static unsigned short xs_get_random_port(void) | |||
1139 | */ | 1259 | */ |
1140 | static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) | 1260 | static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) |
1141 | { | 1261 | { |
1142 | struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; | 1262 | struct sockaddr *addr = xs_addr(xprt); |
1143 | 1263 | ||
1144 | dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); | 1264 | dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); |
1145 | 1265 | ||
1146 | sap->sin_port = htons(port); | 1266 | switch (addr->sa_family) { |
1267 | case AF_INET: | ||
1268 | ((struct sockaddr_in *)addr)->sin_port = htons(port); | ||
1269 | break; | ||
1270 | case AF_INET6: | ||
1271 | ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); | ||
1272 | break; | ||
1273 | default: | ||
1274 | BUG(); | ||
1275 | } | ||
1147 | } | 1276 | } |
1148 | 1277 | ||
1149 | static int xs_bind(struct sock_xprt *transport, struct socket *sock) | 1278 | static int xs_bind4(struct sock_xprt *transport, struct socket *sock) |
1150 | { | 1279 | { |
1151 | struct sockaddr_in myaddr = { | 1280 | struct sockaddr_in myaddr = { |
1152 | .sin_family = AF_INET, | 1281 | .sin_family = AF_INET, |
@@ -1174,8 +1303,42 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) | |||
1174 | else | 1303 | else |
1175 | port--; | 1304 | port--; |
1176 | } while (err == -EADDRINUSE && port != transport->port); | 1305 | } while (err == -EADDRINUSE && port != transport->port); |
1177 | dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", | 1306 | dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", |
1178 | NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); | 1307 | __FUNCTION__, NIPQUAD(myaddr.sin_addr), |
1308 | port, err ? "failed" : "ok", err); | ||
1309 | return err; | ||
1310 | } | ||
1311 | |||
1312 | static int xs_bind6(struct sock_xprt *transport, struct socket *sock) | ||
1313 | { | ||
1314 | struct sockaddr_in6 myaddr = { | ||
1315 | .sin6_family = AF_INET6, | ||
1316 | }; | ||
1317 | struct sockaddr_in6 *sa; | ||
1318 | int err; | ||
1319 | unsigned short port = transport->port; | ||
1320 | |||
1321 | if (!transport->xprt.resvport) | ||
1322 | port = 0; | ||
1323 | sa = (struct sockaddr_in6 *)&transport->addr; | ||
1324 | myaddr.sin6_addr = sa->sin6_addr; | ||
1325 | do { | ||
1326 | myaddr.sin6_port = htons(port); | ||
1327 | err = kernel_bind(sock, (struct sockaddr *) &myaddr, | ||
1328 | sizeof(myaddr)); | ||
1329 | if (!transport->xprt.resvport) | ||
1330 | break; | ||
1331 | if (err == 0) { | ||
1332 | transport->port = port; | ||
1333 | break; | ||
1334 | } | ||
1335 | if (port <= xprt_min_resvport) | ||
1336 | port = xprt_max_resvport; | ||
1337 | else | ||
1338 | port--; | ||
1339 | } while (err == -EADDRINUSE && port != transport->port); | ||
1340 | dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", | ||
1341 | NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); | ||
1179 | return err; | 1342 | return err; |
1180 | } | 1343 | } |
1181 | 1344 | ||
@@ -1183,38 +1346,69 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) | |||
1183 | static struct lock_class_key xs_key[2]; | 1346 | static struct lock_class_key xs_key[2]; |
1184 | static struct lock_class_key xs_slock_key[2]; | 1347 | static struct lock_class_key xs_slock_key[2]; |
1185 | 1348 | ||
1186 | static inline void xs_reclassify_socket(struct socket *sock) | 1349 | static inline void xs_reclassify_socket4(struct socket *sock) |
1187 | { | 1350 | { |
1188 | struct sock *sk = sock->sk; | 1351 | struct sock *sk = sock->sk; |
1352 | |||
1189 | BUG_ON(sock_owned_by_user(sk)); | 1353 | BUG_ON(sock_owned_by_user(sk)); |
1190 | switch (sk->sk_family) { | 1354 | sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", |
1191 | case AF_INET: | 1355 | &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); |
1192 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", | 1356 | } |
1193 | &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); | ||
1194 | break; | ||
1195 | 1357 | ||
1196 | case AF_INET6: | 1358 | static inline void xs_reclassify_socket6(struct socket *sock) |
1197 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", | 1359 | { |
1198 | &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); | 1360 | struct sock *sk = sock->sk; |
1199 | break; | ||
1200 | 1361 | ||
1201 | default: | 1362 | BUG_ON(sock_owned_by_user(sk)); |
1202 | BUG(); | 1363 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", |
1203 | } | 1364 | &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); |
1204 | } | 1365 | } |
1205 | #else | 1366 | #else |
1206 | static inline void xs_reclassify_socket(struct socket *sock) | 1367 | static inline void xs_reclassify_socket4(struct socket *sock) |
1368 | { | ||
1369 | } | ||
1370 | |||
1371 | static inline void xs_reclassify_socket6(struct socket *sock) | ||
1207 | { | 1372 | { |
1208 | } | 1373 | } |
1209 | #endif | 1374 | #endif |
1210 | 1375 | ||
1376 | static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | ||
1377 | { | ||
1378 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | ||
1379 | |||
1380 | if (!transport->inet) { | ||
1381 | struct sock *sk = sock->sk; | ||
1382 | |||
1383 | write_lock_bh(&sk->sk_callback_lock); | ||
1384 | |||
1385 | sk->sk_user_data = xprt; | ||
1386 | transport->old_data_ready = sk->sk_data_ready; | ||
1387 | transport->old_state_change = sk->sk_state_change; | ||
1388 | transport->old_write_space = sk->sk_write_space; | ||
1389 | sk->sk_data_ready = xs_udp_data_ready; | ||
1390 | sk->sk_write_space = xs_udp_write_space; | ||
1391 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
1392 | sk->sk_allocation = GFP_ATOMIC; | ||
1393 | |||
1394 | xprt_set_connected(xprt); | ||
1395 | |||
1396 | /* Reset to new socket */ | ||
1397 | transport->sock = sock; | ||
1398 | transport->inet = sk; | ||
1399 | |||
1400 | write_unlock_bh(&sk->sk_callback_lock); | ||
1401 | } | ||
1402 | xs_udp_do_set_buffer_size(xprt); | ||
1403 | } | ||
1404 | |||
1211 | /** | 1405 | /** |
1212 | * xs_udp_connect_worker - set up a UDP socket | 1406 | * xs_udp_connect_worker4 - set up a UDP socket |
1213 | * @work: RPC transport to connect | 1407 | * @work: RPC transport to connect |
1214 | * | 1408 | * |
1215 | * Invoked by a work queue tasklet. | 1409 | * Invoked by a work queue tasklet. |
1216 | */ | 1410 | */ |
1217 | static void xs_udp_connect_worker(struct work_struct *work) | 1411 | static void xs_udp_connect_worker4(struct work_struct *work) |
1218 | { | 1412 | { |
1219 | struct sock_xprt *transport = | 1413 | struct sock_xprt *transport = |
1220 | container_of(work, struct sock_xprt, connect_worker.work); | 1414 | container_of(work, struct sock_xprt, connect_worker.work); |
@@ -1232,9 +1426,9 @@ static void xs_udp_connect_worker(struct work_struct *work) | |||
1232 | dprintk("RPC: can't create UDP transport socket (%d).\n", -err); | 1426 | dprintk("RPC: can't create UDP transport socket (%d).\n", -err); |
1233 | goto out; | 1427 | goto out; |
1234 | } | 1428 | } |
1235 | xs_reclassify_socket(sock); | 1429 | xs_reclassify_socket4(sock); |
1236 | 1430 | ||
1237 | if (xs_bind(transport, sock)) { | 1431 | if (xs_bind4(transport, sock)) { |
1238 | sock_release(sock); | 1432 | sock_release(sock); |
1239 | goto out; | 1433 | goto out; |
1240 | } | 1434 | } |
@@ -1242,29 +1436,48 @@ static void xs_udp_connect_worker(struct work_struct *work) | |||
1242 | dprintk("RPC: worker connecting xprt %p to address: %s\n", | 1436 | dprintk("RPC: worker connecting xprt %p to address: %s\n", |
1243 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); | 1437 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); |
1244 | 1438 | ||
1245 | if (!transport->inet) { | 1439 | xs_udp_finish_connecting(xprt, sock); |
1246 | struct sock *sk = sock->sk; | 1440 | status = 0; |
1441 | out: | ||
1442 | xprt_wake_pending_tasks(xprt, status); | ||
1443 | xprt_clear_connecting(xprt); | ||
1444 | } | ||
1247 | 1445 | ||
1248 | write_lock_bh(&sk->sk_callback_lock); | 1446 | /** |
1447 | * xs_udp_connect_worker6 - set up a UDP socket | ||
1448 | * @work: RPC transport to connect | ||
1449 | * | ||
1450 | * Invoked by a work queue tasklet. | ||
1451 | */ | ||
1452 | static void xs_udp_connect_worker6(struct work_struct *work) | ||
1453 | { | ||
1454 | struct sock_xprt *transport = | ||
1455 | container_of(work, struct sock_xprt, connect_worker.work); | ||
1456 | struct rpc_xprt *xprt = &transport->xprt; | ||
1457 | struct socket *sock = transport->sock; | ||
1458 | int err, status = -EIO; | ||
1249 | 1459 | ||
1250 | sk->sk_user_data = xprt; | 1460 | if (xprt->shutdown || !xprt_bound(xprt)) |
1251 | transport->old_data_ready = sk->sk_data_ready; | 1461 | goto out; |
1252 | transport->old_state_change = sk->sk_state_change; | ||
1253 | transport->old_write_space = sk->sk_write_space; | ||
1254 | sk->sk_data_ready = xs_udp_data_ready; | ||
1255 | sk->sk_write_space = xs_udp_write_space; | ||
1256 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
1257 | sk->sk_allocation = GFP_ATOMIC; | ||
1258 | 1462 | ||
1259 | xprt_set_connected(xprt); | 1463 | /* Start by resetting any existing state */ |
1464 | xs_close(xprt); | ||
1260 | 1465 | ||
1261 | /* Reset to new socket */ | 1466 | if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { |
1262 | transport->sock = sock; | 1467 | dprintk("RPC: can't create UDP transport socket (%d).\n", -err); |
1263 | transport->inet = sk; | 1468 | goto out; |
1469 | } | ||
1470 | xs_reclassify_socket6(sock); | ||
1264 | 1471 | ||
1265 | write_unlock_bh(&sk->sk_callback_lock); | 1472 | if (xs_bind6(transport, sock) < 0) { |
1473 | sock_release(sock); | ||
1474 | goto out; | ||
1266 | } | 1475 | } |
1267 | xs_udp_do_set_buffer_size(xprt); | 1476 | |
1477 | dprintk("RPC: worker connecting xprt %p to address: %s\n", | ||
1478 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); | ||
1479 | |||
1480 | xs_udp_finish_connecting(xprt, sock); | ||
1268 | status = 0; | 1481 | status = 0; |
1269 | out: | 1482 | out: |
1270 | xprt_wake_pending_tasks(xprt, status); | 1483 | xprt_wake_pending_tasks(xprt, status); |
@@ -1295,13 +1508,52 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) | |||
1295 | result); | 1508 | result); |
1296 | } | 1509 | } |
1297 | 1510 | ||
1511 | static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | ||
1512 | { | ||
1513 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | ||
1514 | |||
1515 | if (!transport->inet) { | ||
1516 | struct sock *sk = sock->sk; | ||
1517 | |||
1518 | write_lock_bh(&sk->sk_callback_lock); | ||
1519 | |||
1520 | sk->sk_user_data = xprt; | ||
1521 | transport->old_data_ready = sk->sk_data_ready; | ||
1522 | transport->old_state_change = sk->sk_state_change; | ||
1523 | transport->old_write_space = sk->sk_write_space; | ||
1524 | sk->sk_data_ready = xs_tcp_data_ready; | ||
1525 | sk->sk_state_change = xs_tcp_state_change; | ||
1526 | sk->sk_write_space = xs_tcp_write_space; | ||
1527 | sk->sk_allocation = GFP_ATOMIC; | ||
1528 | |||
1529 | /* socket options */ | ||
1530 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | ||
1531 | sock_reset_flag(sk, SOCK_LINGER); | ||
1532 | tcp_sk(sk)->linger2 = 0; | ||
1533 | tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; | ||
1534 | |||
1535 | xprt_clear_connected(xprt); | ||
1536 | |||
1537 | /* Reset to new socket */ | ||
1538 | transport->sock = sock; | ||
1539 | transport->inet = sk; | ||
1540 | |||
1541 | write_unlock_bh(&sk->sk_callback_lock); | ||
1542 | } | ||
1543 | |||
1544 | /* Tell the socket layer to start connecting... */ | ||
1545 | xprt->stat.connect_count++; | ||
1546 | xprt->stat.connect_start = jiffies; | ||
1547 | return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); | ||
1548 | } | ||
1549 | |||
1298 | /** | 1550 | /** |
1299 | * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint | 1551 | * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint |
1300 | * @work: RPC transport to connect | 1552 | * @work: RPC transport to connect |
1301 | * | 1553 | * |
1302 | * Invoked by a work queue tasklet. | 1554 | * Invoked by a work queue tasklet. |
1303 | */ | 1555 | */ |
1304 | static void xs_tcp_connect_worker(struct work_struct *work) | 1556 | static void xs_tcp_connect_worker4(struct work_struct *work) |
1305 | { | 1557 | { |
1306 | struct sock_xprt *transport = | 1558 | struct sock_xprt *transport = |
1307 | container_of(work, struct sock_xprt, connect_worker.work); | 1559 | container_of(work, struct sock_xprt, connect_worker.work); |
@@ -1315,13 +1567,12 @@ static void xs_tcp_connect_worker(struct work_struct *work) | |||
1315 | if (!sock) { | 1567 | if (!sock) { |
1316 | /* start from scratch */ | 1568 | /* start from scratch */ |
1317 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | 1569 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { |
1318 | dprintk("RPC: can't create TCP transport " | 1570 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); |
1319 | "socket (%d).\n", -err); | ||
1320 | goto out; | 1571 | goto out; |
1321 | } | 1572 | } |
1322 | xs_reclassify_socket(sock); | 1573 | xs_reclassify_socket4(sock); |
1323 | 1574 | ||
1324 | if (xs_bind(transport, sock)) { | 1575 | if (xs_bind4(transport, sock) < 0) { |
1325 | sock_release(sock); | 1576 | sock_release(sock); |
1326 | goto out; | 1577 | goto out; |
1327 | } | 1578 | } |
@@ -1332,43 +1583,70 @@ static void xs_tcp_connect_worker(struct work_struct *work) | |||
1332 | dprintk("RPC: worker connecting xprt %p to address: %s\n", | 1583 | dprintk("RPC: worker connecting xprt %p to address: %s\n", |
1333 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); | 1584 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); |
1334 | 1585 | ||
1335 | if (!transport->inet) { | 1586 | status = xs_tcp_finish_connecting(xprt, sock); |
1336 | struct sock *sk = sock->sk; | 1587 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", |
1337 | 1588 | xprt, -status, xprt_connected(xprt), | |
1338 | write_lock_bh(&sk->sk_callback_lock); | 1589 | sock->sk->sk_state); |
1590 | if (status < 0) { | ||
1591 | switch (status) { | ||
1592 | case -EINPROGRESS: | ||
1593 | case -EALREADY: | ||
1594 | goto out_clear; | ||
1595 | case -ECONNREFUSED: | ||
1596 | case -ECONNRESET: | ||
1597 | /* retry with existing socket, after a delay */ | ||
1598 | break; | ||
1599 | default: | ||
1600 | /* get rid of existing socket, and retry */ | ||
1601 | xs_close(xprt); | ||
1602 | break; | ||
1603 | } | ||
1604 | } | ||
1605 | out: | ||
1606 | xprt_wake_pending_tasks(xprt, status); | ||
1607 | out_clear: | ||
1608 | xprt_clear_connecting(xprt); | ||
1609 | } | ||
1339 | 1610 | ||
1340 | sk->sk_user_data = xprt; | 1611 | /** |
1341 | transport->old_data_ready = sk->sk_data_ready; | 1612 | * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint |
1342 | transport->old_state_change = sk->sk_state_change; | 1613 | * @work: RPC transport to connect |
1343 | transport->old_write_space = sk->sk_write_space; | 1614 | * |
1344 | sk->sk_data_ready = xs_tcp_data_ready; | 1615 | * Invoked by a work queue tasklet. |
1345 | sk->sk_state_change = xs_tcp_state_change; | 1616 | */ |
1346 | sk->sk_write_space = xs_tcp_write_space; | 1617 | static void xs_tcp_connect_worker6(struct work_struct *work) |
1347 | sk->sk_allocation = GFP_ATOMIC; | 1618 | { |
1619 | struct sock_xprt *transport = | ||
1620 | container_of(work, struct sock_xprt, connect_worker.work); | ||
1621 | struct rpc_xprt *xprt = &transport->xprt; | ||
1622 | struct socket *sock = transport->sock; | ||
1623 | int err, status = -EIO; | ||
1348 | 1624 | ||
1349 | /* socket options */ | 1625 | if (xprt->shutdown || !xprt_bound(xprt)) |
1350 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | 1626 | goto out; |
1351 | sock_reset_flag(sk, SOCK_LINGER); | ||
1352 | tcp_sk(sk)->linger2 = 0; | ||
1353 | tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; | ||
1354 | 1627 | ||
1355 | xprt_clear_connected(xprt); | 1628 | if (!sock) { |
1629 | /* start from scratch */ | ||
1630 | if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | ||
1631 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); | ||
1632 | goto out; | ||
1633 | } | ||
1634 | xs_reclassify_socket6(sock); | ||
1356 | 1635 | ||
1357 | /* Reset to new socket */ | 1636 | if (xs_bind6(transport, sock) < 0) { |
1358 | transport->sock = sock; | 1637 | sock_release(sock); |
1359 | transport->inet = sk; | 1638 | goto out; |
1639 | } | ||
1640 | } else | ||
1641 | /* "close" the socket, preserving the local port */ | ||
1642 | xs_tcp_reuse_connection(xprt); | ||
1360 | 1643 | ||
1361 | write_unlock_bh(&sk->sk_callback_lock); | 1644 | dprintk("RPC: worker connecting xprt %p to address: %s\n", |
1362 | } | 1645 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); |
1363 | 1646 | ||
1364 | /* Tell the socket layer to start connecting... */ | 1647 | status = xs_tcp_finish_connecting(xprt, sock); |
1365 | xprt->stat.connect_count++; | ||
1366 | xprt->stat.connect_start = jiffies; | ||
1367 | status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, | ||
1368 | xprt->addrlen, O_NONBLOCK); | ||
1369 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | 1648 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", |
1370 | xprt, -status, xprt_connected(xprt), | 1649 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); |
1371 | sock->sk->sk_state); | ||
1372 | if (status < 0) { | 1650 | if (status < 0) { |
1373 | switch (status) { | 1651 | switch (status) { |
1374 | case -EINPROGRESS: | 1652 | case -EINPROGRESS: |
@@ -1508,7 +1786,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { | |||
1508 | .print_stats = xs_tcp_print_stats, | 1786 | .print_stats = xs_tcp_print_stats, |
1509 | }; | 1787 | }; |
1510 | 1788 | ||
1511 | static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size) | 1789 | static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, |
1790 | unsigned int slot_table_size) | ||
1512 | { | 1791 | { |
1513 | struct rpc_xprt *xprt; | 1792 | struct rpc_xprt *xprt; |
1514 | struct sock_xprt *new; | 1793 | struct sock_xprt *new; |
@@ -1549,8 +1828,9 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned | |||
1549 | * @args: rpc transport creation arguments | 1828 | * @args: rpc transport creation arguments |
1550 | * | 1829 | * |
1551 | */ | 1830 | */ |
1552 | struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) | 1831 | struct rpc_xprt *xs_setup_udp(struct xprt_create *args) |
1553 | { | 1832 | { |
1833 | struct sockaddr *addr = args->dstaddr; | ||
1554 | struct rpc_xprt *xprt; | 1834 | struct rpc_xprt *xprt; |
1555 | struct sock_xprt *transport; | 1835 | struct sock_xprt *transport; |
1556 | 1836 | ||
@@ -1559,15 +1839,11 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) | |||
1559 | return xprt; | 1839 | return xprt; |
1560 | transport = container_of(xprt, struct sock_xprt, xprt); | 1840 | transport = container_of(xprt, struct sock_xprt, xprt); |
1561 | 1841 | ||
1562 | if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) | ||
1563 | xprt_set_bound(xprt); | ||
1564 | |||
1565 | xprt->prot = IPPROTO_UDP; | 1842 | xprt->prot = IPPROTO_UDP; |
1566 | xprt->tsh_size = 0; | 1843 | xprt->tsh_size = 0; |
1567 | /* XXX: header size can vary due to auth type, IPv6, etc. */ | 1844 | /* XXX: header size can vary due to auth type, IPv6, etc. */ |
1568 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | 1845 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); |
1569 | 1846 | ||
1570 | INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker); | ||
1571 | xprt->bind_timeout = XS_BIND_TO; | 1847 | xprt->bind_timeout = XS_BIND_TO; |
1572 | xprt->connect_timeout = XS_UDP_CONN_TO; | 1848 | xprt->connect_timeout = XS_UDP_CONN_TO; |
1573 | xprt->reestablish_timeout = XS_UDP_REEST_TO; | 1849 | xprt->reestablish_timeout = XS_UDP_REEST_TO; |
@@ -1580,11 +1856,37 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) | |||
1580 | else | 1856 | else |
1581 | xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); | 1857 | xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); |
1582 | 1858 | ||
1583 | xs_format_peer_addresses(xprt); | 1859 | switch (addr->sa_family) { |
1860 | case AF_INET: | ||
1861 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) | ||
1862 | xprt_set_bound(xprt); | ||
1863 | |||
1864 | INIT_DELAYED_WORK(&transport->connect_worker, | ||
1865 | xs_udp_connect_worker4); | ||
1866 | xs_format_ipv4_peer_addresses(xprt); | ||
1867 | break; | ||
1868 | case AF_INET6: | ||
1869 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) | ||
1870 | xprt_set_bound(xprt); | ||
1871 | |||
1872 | INIT_DELAYED_WORK(&transport->connect_worker, | ||
1873 | xs_udp_connect_worker6); | ||
1874 | xs_format_ipv6_peer_addresses(xprt); | ||
1875 | break; | ||
1876 | default: | ||
1877 | kfree(xprt); | ||
1878 | return ERR_PTR(-EAFNOSUPPORT); | ||
1879 | } | ||
1880 | |||
1584 | dprintk("RPC: set up transport to address %s\n", | 1881 | dprintk("RPC: set up transport to address %s\n", |
1585 | xprt->address_strings[RPC_DISPLAY_ALL]); | 1882 | xprt->address_strings[RPC_DISPLAY_ALL]); |
1586 | 1883 | ||
1587 | return xprt; | 1884 | if (try_module_get(THIS_MODULE)) |
1885 | return xprt; | ||
1886 | |||
1887 | kfree(xprt->slot); | ||
1888 | kfree(xprt); | ||
1889 | return ERR_PTR(-EINVAL); | ||
1588 | } | 1890 | } |
1589 | 1891 | ||
1590 | /** | 1892 | /** |
@@ -1592,8 +1894,9 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) | |||
1592 | * @args: rpc transport creation arguments | 1894 | * @args: rpc transport creation arguments |
1593 | * | 1895 | * |
1594 | */ | 1896 | */ |
1595 | struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) | 1897 | struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) |
1596 | { | 1898 | { |
1899 | struct sockaddr *addr = args->dstaddr; | ||
1597 | struct rpc_xprt *xprt; | 1900 | struct rpc_xprt *xprt; |
1598 | struct sock_xprt *transport; | 1901 | struct sock_xprt *transport; |
1599 | 1902 | ||
@@ -1602,14 +1905,10 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) | |||
1602 | return xprt; | 1905 | return xprt; |
1603 | transport = container_of(xprt, struct sock_xprt, xprt); | 1906 | transport = container_of(xprt, struct sock_xprt, xprt); |
1604 | 1907 | ||
1605 | if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) | ||
1606 | xprt_set_bound(xprt); | ||
1607 | |||
1608 | xprt->prot = IPPROTO_TCP; | 1908 | xprt->prot = IPPROTO_TCP; |
1609 | xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); | 1909 | xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); |
1610 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; | 1910 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
1611 | 1911 | ||
1612 | INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker); | ||
1613 | xprt->bind_timeout = XS_BIND_TO; | 1912 | xprt->bind_timeout = XS_BIND_TO; |
1614 | xprt->connect_timeout = XS_TCP_CONN_TO; | 1913 | xprt->connect_timeout = XS_TCP_CONN_TO; |
1615 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | 1914 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
@@ -1622,15 +1921,55 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) | |||
1622 | else | 1921 | else |
1623 | xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); | 1922 | xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); |
1624 | 1923 | ||
1625 | xs_format_peer_addresses(xprt); | 1924 | switch (addr->sa_family) { |
1925 | case AF_INET: | ||
1926 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) | ||
1927 | xprt_set_bound(xprt); | ||
1928 | |||
1929 | INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); | ||
1930 | xs_format_ipv4_peer_addresses(xprt); | ||
1931 | break; | ||
1932 | case AF_INET6: | ||
1933 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) | ||
1934 | xprt_set_bound(xprt); | ||
1935 | |||
1936 | INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); | ||
1937 | xs_format_ipv6_peer_addresses(xprt); | ||
1938 | break; | ||
1939 | default: | ||
1940 | kfree(xprt); | ||
1941 | return ERR_PTR(-EAFNOSUPPORT); | ||
1942 | } | ||
1943 | |||
1626 | dprintk("RPC: set up transport to address %s\n", | 1944 | dprintk("RPC: set up transport to address %s\n", |
1627 | xprt->address_strings[RPC_DISPLAY_ALL]); | 1945 | xprt->address_strings[RPC_DISPLAY_ALL]); |
1628 | 1946 | ||
1629 | return xprt; | 1947 | if (try_module_get(THIS_MODULE)) |
1948 | return xprt; | ||
1949 | |||
1950 | kfree(xprt->slot); | ||
1951 | kfree(xprt); | ||
1952 | return ERR_PTR(-EINVAL); | ||
1630 | } | 1953 | } |
1631 | 1954 | ||
1955 | static struct xprt_class xs_udp_transport = { | ||
1956 | .list = LIST_HEAD_INIT(xs_udp_transport.list), | ||
1957 | .name = "udp", | ||
1958 | .owner = THIS_MODULE, | ||
1959 | .ident = IPPROTO_UDP, | ||
1960 | .setup = xs_setup_udp, | ||
1961 | }; | ||
1962 | |||
1963 | static struct xprt_class xs_tcp_transport = { | ||
1964 | .list = LIST_HEAD_INIT(xs_tcp_transport.list), | ||
1965 | .name = "tcp", | ||
1966 | .owner = THIS_MODULE, | ||
1967 | .ident = IPPROTO_TCP, | ||
1968 | .setup = xs_setup_tcp, | ||
1969 | }; | ||
1970 | |||
1632 | /** | 1971 | /** |
1633 | * init_socket_xprt - set up xprtsock's sysctls | 1972 | * init_socket_xprt - set up xprtsock's sysctls, register with RPC client |
1634 | * | 1973 | * |
1635 | */ | 1974 | */ |
1636 | int init_socket_xprt(void) | 1975 | int init_socket_xprt(void) |
@@ -1640,11 +1979,14 @@ int init_socket_xprt(void) | |||
1640 | sunrpc_table_header = register_sysctl_table(sunrpc_table); | 1979 | sunrpc_table_header = register_sysctl_table(sunrpc_table); |
1641 | #endif | 1980 | #endif |
1642 | 1981 | ||
1982 | xprt_register_transport(&xs_udp_transport); | ||
1983 | xprt_register_transport(&xs_tcp_transport); | ||
1984 | |||
1643 | return 0; | 1985 | return 0; |
1644 | } | 1986 | } |
1645 | 1987 | ||
1646 | /** | 1988 | /** |
1647 | * cleanup_socket_xprt - remove xprtsock's sysctls | 1989 | * cleanup_socket_xprt - remove xprtsock's sysctls, unregister |
1648 | * | 1990 | * |
1649 | */ | 1991 | */ |
1650 | void cleanup_socket_xprt(void) | 1992 | void cleanup_socket_xprt(void) |
@@ -1655,4 +1997,7 @@ void cleanup_socket_xprt(void) | |||
1655 | sunrpc_table_header = NULL; | 1997 | sunrpc_table_header = NULL; |
1656 | } | 1998 | } |
1657 | #endif | 1999 | #endif |
2000 | |||
2001 | xprt_unregister_transport(&xs_udp_transport); | ||
2002 | xprt_unregister_transport(&xs_tcp_transport); | ||
1658 | } | 2003 | } |