aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/knfsd-stats.txt159
-rw-r--r--Documentation/filesystems/nfs41-server.txt161
-rw-r--r--fs/lockd/svclock.c13
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/nfs3proc.c10
-rw-r--r--fs/nfsd/nfs4callback.c47
-rw-r--r--fs/nfsd/nfs4proc.c246
-rw-r--r--fs/nfsd/nfs4recover.c74
-rw-r--r--fs/nfsd/nfs4state.c1196
-rw-r--r--fs/nfsd/nfs4xdr.c633
-rw-r--r--fs/nfsd/nfsctl.c38
-rw-r--r--fs/nfsd/nfsproc.c3
-rw-r--r--fs/nfsd/nfssvc.c88
-rw-r--r--fs/nfsd/vfs.c37
-rw-r--r--include/linux/lockd/xdr.h12
-rw-r--r--include/linux/lockd/xdr4.h10
-rw-r--r--include/linux/nfs.h1
-rw-r--r--include/linux/nfs4.h138
-rw-r--r--include/linux/nfsd/cache.h8
-rw-r--r--include/linux/nfsd/nfsd.h225
-rw-r--r--include/linux/nfsd/nfsfh.h7
-rw-r--r--include/linux/nfsd/state.h84
-rw-r--r--include/linux/nfsd/stats.h9
-rw-r--r--include/linux/nfsd/xdr4.h129
-rw-r--r--include/linux/sunrpc/svc.h16
-rw-r--r--include/linux/sunrpc/xdr.h42
-rw-r--r--net/sunrpc/svc.c3
-rw-r--r--net/sunrpc/svc_xprt.c127
-rw-r--r--net/sunrpc/svcsock.c35
29 files changed, 2997 insertions, 555 deletions
diff --git a/Documentation/filesystems/knfsd-stats.txt b/Documentation/filesystems/knfsd-stats.txt
new file mode 100644
index 000000000000..64ced5149d37
--- /dev/null
+++ b/Documentation/filesystems/knfsd-stats.txt
@@ -0,0 +1,159 @@
1
2Kernel NFS Server Statistics
3============================
4
5This document describes the format and semantics of the statistics
6which the kernel NFS server makes available to userspace. These
7statistics are available in several text form pseudo files, each of
8which is described separately below.
9
10In most cases you don't need to know these formats, as the nfsstat(8)
11program from the nfs-utils distribution provides a helpful command-line
12interface for extracting and printing them.
13
14All the files described here are formatted as a sequence of text lines,
15separated by newline '\n' characters. Lines beginning with a hash
16'#' character are comments intended for humans and should be ignored
17by parsing routines. All other lines contain a sequence of fields
18separated by whitespace.
19
20/proc/fs/nfsd/pool_stats
21------------------------
22
23This file is available in kernels from 2.6.30 onwards, if the
24/proc/fs/nfsd filesystem is mounted (it almost always should be).
25
26The first line is a comment which describes the fields present in
27all the other lines. The other lines present the following data as
28a sequence of unsigned decimal numeric fields. One line is shown
29for each NFS thread pool.
30
31All counters are 64 bits wide and wrap naturally. There is no way
32to zero these counters, instead applications should do their own
33rate conversion.
34
35pool
36 The id number of the NFS thread pool to which this line applies.
37 This number does not change.
38
39 Thread pool ids are a contiguous set of small integers starting
40 at zero. The maximum value depends on the thread pool mode, but
41 currently cannot be larger than the number of CPUs in the system.
42 Note that in the default case there will be a single thread pool
43 which contains all the nfsd threads and all the CPUs in the system,
44 and thus this file will have a single line with a pool id of "0".
45
46packets-arrived
47 Counts how many NFS packets have arrived. More precisely, this
48 is the number of times that the network stack has notified the
49 sunrpc server layer that new data may be available on a transport
50 (e.g. an NFS or UDP socket or an NFS/RDMA endpoint).
51
52 Depending on the NFS workload patterns and various network stack
53 effects (such as Large Receive Offload) which can combine packets
54 on the wire, this may be either more or less than the number
55 of NFS calls received (which statistic is available elsewhere).
56 However this is a more accurate and less workload-dependent measure
57 of how much CPU load is being placed on the sunrpc server layer
58 due to NFS network traffic.
59
60sockets-enqueued
61 Counts how many times an NFS transport is enqueued to wait for
62 an nfsd thread to service it, i.e. no nfsd thread was considered
63 available.
64
65 The circumstance this statistic tracks indicates that there was NFS
66 network-facing work to be done but it couldn't be done immediately,
67 thus introducing a small delay in servicing NFS calls. The ideal
68 rate of change for this counter is zero; significantly non-zero
69 values may indicate a performance limitation.
70
71 This can happen either because there are too few nfsd threads in the
72 thread pool for the NFS workload (the workload is thread-limited),
73 or because the NFS workload needs more CPU time than is available in
74 the thread pool (the workload is CPU-limited). In the former case,
75 configuring more nfsd threads will probably improve the performance
76 of the NFS workload. In the latter case, the sunrpc server layer is
77 already choosing not to wake idle nfsd threads because there are too
78 many nfsd threads which want to run but cannot, so configuring more
79 nfsd threads will make no difference whatsoever. The overloads-avoided
80 statistic (see below) can be used to distinguish these cases.
81
82threads-woken
83 Counts how many times an idle nfsd thread is woken to try to
84 receive some data from an NFS transport.
85
86 This statistic tracks the circumstance where incoming
87 network-facing NFS work is being handled quickly, which is a good
88 thing. The ideal rate of change for this counter will be close
89 to but less than the rate of change of the packets-arrived counter.
90
91overloads-avoided
92 Counts how many times the sunrpc server layer chose not to wake an
93 nfsd thread, despite the presence of idle nfsd threads, because
94 too many nfsd threads had been recently woken but could not get
95 enough CPU time to actually run.
96
97 This statistic counts a circumstance where the sunrpc layer
98 heuristically avoids overloading the CPU scheduler with too many
99 runnable nfsd threads. The ideal rate of change for this counter
100 is zero. Significant non-zero values indicate that the workload
101 is CPU limited. Usually this is associated with heavy CPU usage
102 on all the CPUs in the nfsd thread pool.
103
104 If a sustained large overloads-avoided rate is detected on a pool,
105 the top(1) utility should be used to check for the following
106 pattern of CPU usage on all the CPUs associated with the given
107 nfsd thread pool.
108
109 - %us ~= 0 (as you're *NOT* running applications on your NFS server)
110
111 - %wa ~= 0
112
113 - %id ~= 0
114
115 - %sy + %hi + %si ~= 100
116
117 If this pattern is seen, configuring more nfsd threads will *not*
118 improve the performance of the workload. If this patten is not
119 seen, then something more subtle is wrong.
120
121threads-timedout
122 Counts how many times an nfsd thread triggered an idle timeout,
123 i.e. was not woken to handle any incoming network packets for
124 some time.
125
126 This statistic counts a circumstance where there are more nfsd
127 threads configured than can be used by the NFS workload. This is
128 a clue that the number of nfsd threads can be reduced without
129 affecting performance. Unfortunately, it's only a clue and not
130 a strong indication, for a couple of reasons:
131
132 - Currently the rate at which the counter is incremented is quite
133 slow; the idle timeout is 60 minutes. Unless the NFS workload
134 remains constant for hours at a time, this counter is unlikely
135 to be providing information that is still useful.
136
137 - It is usually a wise policy to provide some slack,
138 i.e. configure a few more nfsds than are currently needed,
139 to allow for future spikes in load.
140
141
142Note that incoming packets on NFS transports will be dealt with in
143one of three ways. An nfsd thread can be woken (threads-woken counts
144this case), or the transport can be enqueued for later attention
145(sockets-enqueued counts this case), or the packet can be temporarily
146deferred because the transport is currently being used by an nfsd
147thread. This last case is not very interesting and is not explicitly
148counted, but can be inferred from the other counters thus:
149
150packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken )
151
152
153More
154----
155Descriptions of the other statistics file should go here.
156
157
158Greg Banks <gnb@sgi.com>
15926 Mar 2009
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt
new file mode 100644
index 000000000000..05d81cbcb2e1
--- /dev/null
+++ b/Documentation/filesystems/nfs41-server.txt
@@ -0,0 +1,161 @@
1NFSv4.1 Server Implementation
2
3Server support for minorversion 1 can be controlled using the
4/proc/fs/nfsd/versions control file. The string output returned
5by reading this file will contain either "+4.1" or "-4.1"
6correspondingly.
7
8Currently, server support for minorversion 1 is disabled by default.
9It can be enabled at run time by writing the string "+4.1" to
10the /proc/fs/nfsd/versions control file. Note that to write this
11control file, the nfsd service must be taken down. Use your user-mode
12nfs-utils to set this up; see rpc.nfsd(8)
13
14The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
15on the latest NFSv4.1 Internet Draft:
16http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
17
18From the many new features in NFSv4.1 the current implementation
19focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
20"exactly once" semantics and better control and throttling of the
21resources allocated for each client.
22
23Other NFSv4.1 features, Parallel NFS operations in particular,
24are still under development out of tree.
25See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
26for more information.
27
28The table below, taken from the NFSv4.1 document, lists
29the operations that are mandatory to implement (REQ), optional
30(OPT), and NFSv4.0 operations that are required not to implement (MNI)
31in minor version 1. The first column indicates the operations that
32are not supported yet by the linux server implementation.
33
34The OPTIONAL features identified and their abbreviations are as follows:
35 pNFS Parallel NFS
36 FDELG File Delegations
37 DDELG Directory Delegations
38
39The following abbreviations indicate the linux server implementation status.
40 I Implemented NFSv4.1 operations.
41 NS Not Supported.
42 NS* unimplemented optional feature.
43 P pNFS features implemented out of tree.
44 PNS pNFS features that are not supported yet (out of tree).
45
46Operations
47
48 +----------------------+------------+--------------+----------------+
49 | Operation | REQ, REC, | Feature | Definition |
50 | | OPT, or | (REQ, REC, | |
51 | | MNI | or OPT) | |
52 +----------------------+------------+--------------+----------------+
53 | ACCESS | REQ | | Section 18.1 |
54NS | BACKCHANNEL_CTL | REQ | | Section 18.33 |
55NS | BIND_CONN_TO_SESSION | REQ | | Section 18.34 |
56 | CLOSE | REQ | | Section 18.2 |
57 | COMMIT | REQ | | Section 18.3 |
58 | CREATE | REQ | | Section 18.4 |
59I | CREATE_SESSION | REQ | | Section 18.36 |
60NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 |
61 | DELEGRETURN | OPT | FDELG, | Section 18.6 |
62 | | | DDELG, pNFS | |
63 | | | (REQ) | |
64NS | DESTROY_CLIENTID | REQ | | Section 18.50 |
65I | DESTROY_SESSION | REQ | | Section 18.37 |
66I | EXCHANGE_ID | REQ | | Section 18.35 |
67NS | FREE_STATEID | REQ | | Section 18.38 |
68 | GETATTR | REQ | | Section 18.7 |
69P | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 |
70P | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 |
71 | GETFH | REQ | | Section 18.8 |
72NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 |
73P | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 |
74P | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 |
75P | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 |
76 | LINK | OPT | | Section 18.9 |
77 | LOCK | REQ | | Section 18.10 |
78 | LOCKT | REQ | | Section 18.11 |
79 | LOCKU | REQ | | Section 18.12 |
80 | LOOKUP | REQ | | Section 18.13 |
81 | LOOKUPP | REQ | | Section 18.14 |
82 | NVERIFY | REQ | | Section 18.15 |
83 | OPEN | REQ | | Section 18.16 |
84NS*| OPENATTR | OPT | | Section 18.17 |
85 | OPEN_CONFIRM | MNI | | N/A |
86 | OPEN_DOWNGRADE | REQ | | Section 18.18 |
87 | PUTFH | REQ | | Section 18.19 |
88 | PUTPUBFH | REQ | | Section 18.20 |
89 | PUTROOTFH | REQ | | Section 18.21 |
90 | READ | REQ | | Section 18.22 |
91 | READDIR | REQ | | Section 18.23 |
92 | READLINK | OPT | | Section 18.24 |
93NS | RECLAIM_COMPLETE | REQ | | Section 18.51 |
94 | RELEASE_LOCKOWNER | MNI | | N/A |
95 | REMOVE | REQ | | Section 18.25 |
96 | RENAME | REQ | | Section 18.26 |
97 | RENEW | MNI | | N/A |
98 | RESTOREFH | REQ | | Section 18.27 |
99 | SAVEFH | REQ | | Section 18.28 |
100 | SECINFO | REQ | | Section 18.29 |
101NS | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, |
102 | | | layout (REQ) | Section 13.12 |
103I | SEQUENCE | REQ | | Section 18.46 |
104 | SETATTR | REQ | | Section 18.30 |
105 | SETCLIENTID | MNI | | N/A |
106 | SETCLIENTID_CONFIRM | MNI | | N/A |
107NS | SET_SSV | REQ | | Section 18.47 |
108NS | TEST_STATEID | REQ | | Section 18.48 |
109 | VERIFY | REQ | | Section 18.31 |
110NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 |
111 | WRITE | REQ | | Section 18.32 |
112
113Callback Operations
114
115 +-------------------------+-----------+-------------+---------------+
116 | Operation | REQ, REC, | Feature | Definition |
117 | | OPT, or | (REQ, REC, | |
118 | | MNI | or OPT) | |
119 +-------------------------+-----------+-------------+---------------+
120 | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 |
121P | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 |
122NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 |
123P | CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 |
124NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 |
125NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 |
126 | CB_RECALL | OPT | FDELG, | Section 20.2 |
127 | | | DDELG, pNFS | |
128 | | | (REQ) | |
129NS*| CB_RECALL_ANY | OPT | FDELG, | Section 20.6 |
130 | | | DDELG, pNFS | |
131 | | | (REQ) | |
132NS | CB_RECALL_SLOT | REQ | | Section 20.8 |
133NS*| CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 |
134 | | | (REQ) | |
135I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 |
136 | | | DDELG, pNFS | |
137 | | | (REQ) | |
138NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
139 | | | DDELG, pNFS | |
140 | | | (REQ) | |
141 +-------------------------+-----------+-------------+---------------+
142
143Implementation notes:
144
145EXCHANGE_ID:
146* only SP4_NONE state protection supported
147* implementation ids are ignored
148
149CREATE_SESSION:
150* backchannel attributes are ignored
151* backchannel security parameters are ignored
152
153SEQUENCE:
154* no support for dynamic slot table renegotiation (optional)
155
156nfsv4.1 COMPOUND rules:
157The following cases aren't supported yet:
158* Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION,
159 DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID.
160* DESTROY_SESSION MUST be the final operation in the COMPOUND request.
161
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 763b78a6e9de..83ee34203bd7 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -426,8 +426,15 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
426 ret = nlm_granted; 426 ret = nlm_granted;
427 goto out; 427 goto out;
428 case -EAGAIN: 428 case -EAGAIN:
429 /*
430 * If this is a blocking request for an
431 * already pending lock request then we need
432 * to put it back on lockd's block list
433 */
434 if (wait)
435 break;
429 ret = nlm_lck_denied; 436 ret = nlm_lck_denied;
430 break; 437 goto out;
431 case FILE_LOCK_DEFERRED: 438 case FILE_LOCK_DEFERRED:
432 if (wait) 439 if (wait)
433 break; 440 break;
@@ -443,10 +450,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
443 goto out; 450 goto out;
444 } 451 }
445 452
446 ret = nlm_lck_denied;
447 if (!wait)
448 goto out;
449
450 ret = nlm_lck_blocked; 453 ret = nlm_lck_blocked;
451 454
452 /* Append to list of blocked */ 455 /* Append to list of blocked */
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 44d7d04dab95..503b9da159a3 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -1,6 +1,7 @@
1config NFSD 1config NFSD
2 tristate "NFS server support" 2 tristate "NFS server support"
3 depends on INET 3 depends on INET
4 depends on FILE_LOCKING
4 select LOCKD 5 select LOCKD
5 select SUNRPC 6 select SUNRPC
6 select EXPORTFS 7 select EXPORTFS
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 9dbd2eb91281..7c9fe838f038 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -18,6 +18,7 @@
18#include <linux/unistd.h> 18#include <linux/unistd.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/major.h> 20#include <linux/major.h>
21#include <linux/magic.h>
21 22
22#include <linux/sunrpc/svc.h> 23#include <linux/sunrpc/svc.h>
23#include <linux/nfsd/nfsd.h> 24#include <linux/nfsd/nfsd.h>
@@ -202,6 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
202 struct nfsd3_writeres *resp) 203 struct nfsd3_writeres *resp)
203{ 204{
204 __be32 nfserr; 205 __be32 nfserr;
206 unsigned long cnt = argp->len;
205 207
206 dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", 208 dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n",
207 SVCFH_fmt(&argp->fh), 209 SVCFH_fmt(&argp->fh),
@@ -214,9 +216,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
214 nfserr = nfsd_write(rqstp, &resp->fh, NULL, 216 nfserr = nfsd_write(rqstp, &resp->fh, NULL,
215 argp->offset, 217 argp->offset,
216 rqstp->rq_vec, argp->vlen, 218 rqstp->rq_vec, argp->vlen,
217 argp->len, 219 &cnt,
218 &resp->committed); 220 &resp->committed);
219 resp->count = argp->count; 221 resp->count = cnt;
220 RETURN_STATUS(nfserr); 222 RETURN_STATUS(nfserr);
221} 223}
222 224
@@ -569,7 +571,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
569 struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; 571 struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
570 572
571 /* Note that we don't care for remote fs's here */ 573 /* Note that we don't care for remote fs's here */
572 if (sb->s_magic == 0x4d44 /* MSDOS_SUPER_MAGIC */) { 574 if (sb->s_magic == MSDOS_SUPER_MAGIC) {
573 resp->f_properties = NFS3_FSF_BILLYBOY; 575 resp->f_properties = NFS3_FSF_BILLYBOY;
574 } 576 }
575 resp->f_maxfilesize = sb->s_maxbytes; 577 resp->f_maxfilesize = sb->s_maxbytes;
@@ -610,7 +612,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
610 resp->p_link_max = EXT2_LINK_MAX; 612 resp->p_link_max = EXT2_LINK_MAX;
611 resp->p_name_max = EXT2_NAME_LEN; 613 resp->p_name_max = EXT2_NAME_LEN;
612 break; 614 break;
613 case 0x4d44: /* MSDOS_SUPER_MAGIC */ 615 case MSDOS_SUPER_MAGIC:
614 resp->p_case_insensitive = 1; 616 resp->p_case_insensitive = 1;
615 resp->p_case_preserving = 0; 617 resp->p_case_preserving = 0;
616 break; 618 break;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c464181b5994..290289bd44f7 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -218,7 +218,7 @@ static int
218encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) 218encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
219{ 219{
220 __be32 *p; 220 __be32 *p;
221 int len = cb_rec->cbr_fhlen; 221 int len = cb_rec->cbr_fh.fh_size;
222 222
223 RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); 223 RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
224 WRITE32(OP_CB_RECALL); 224 WRITE32(OP_CB_RECALL);
@@ -226,7 +226,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
226 WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); 226 WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t));
227 WRITE32(cb_rec->cbr_trunc); 227 WRITE32(cb_rec->cbr_trunc);
228 WRITE32(len); 228 WRITE32(len);
229 WRITEMEM(cb_rec->cbr_fhval, len); 229 WRITEMEM(&cb_rec->cbr_fh.fh_base, len);
230 return 0; 230 return 0;
231} 231}
232 232
@@ -361,9 +361,8 @@ static struct rpc_program cb_program = {
361/* Reference counting, callback cleanup, etc., all look racy as heck. 361/* Reference counting, callback cleanup, etc., all look racy as heck.
362 * And why is cb_set an atomic? */ 362 * And why is cb_set an atomic? */
363 363
364static int do_probe_callback(void *data) 364static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp)
365{ 365{
366 struct nfs4_client *clp = data;
367 struct sockaddr_in addr; 366 struct sockaddr_in addr;
368 struct nfs4_callback *cb = &clp->cl_callback; 367 struct nfs4_callback *cb = &clp->cl_callback;
369 struct rpc_timeout timeparms = { 368 struct rpc_timeout timeparms = {
@@ -384,17 +383,10 @@ static int do_probe_callback(void *data)
384 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 383 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
385 .client_name = clp->cl_principal, 384 .client_name = clp->cl_principal,
386 }; 385 };
387 struct rpc_message msg = {
388 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
389 .rpc_argp = clp,
390 };
391 struct rpc_clnt *client; 386 struct rpc_clnt *client;
392 int status;
393 387
394 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) { 388 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
395 status = nfserr_cb_path_down; 389 return ERR_PTR(-EINVAL);
396 goto out_err;
397 }
398 390
399 /* Initialize address */ 391 /* Initialize address */
400 memset(&addr, 0, sizeof(addr)); 392 memset(&addr, 0, sizeof(addr));
@@ -404,9 +396,29 @@ static int do_probe_callback(void *data)
404 396
405 /* Create RPC client */ 397 /* Create RPC client */
406 client = rpc_create(&args); 398 client = rpc_create(&args);
399 if (IS_ERR(client))
400 dprintk("NFSD: couldn't create callback client: %ld\n",
401 PTR_ERR(client));
402 return client;
403
404}
405
406static int do_probe_callback(void *data)
407{
408 struct nfs4_client *clp = data;
409 struct nfs4_callback *cb = &clp->cl_callback;
410 struct rpc_message msg = {
411 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
412 .rpc_argp = clp,
413 };
414 struct rpc_clnt *client;
415 int status;
416
417 client = setup_callback_client(clp);
407 if (IS_ERR(client)) { 418 if (IS_ERR(client)) {
408 dprintk("NFSD: couldn't create callback client\n");
409 status = PTR_ERR(client); 419 status = PTR_ERR(client);
420 dprintk("NFSD: couldn't create callback client: %d\n",
421 status);
410 goto out_err; 422 goto out_err;
411 } 423 }
412 424
@@ -422,10 +434,10 @@ static int do_probe_callback(void *data)
422out_release_client: 434out_release_client:
423 rpc_shutdown_client(client); 435 rpc_shutdown_client(client);
424out_err: 436out_err:
425 dprintk("NFSD: warning: no callback path to client %.*s\n", 437 dprintk("NFSD: warning: no callback path to client %.*s: error %d\n",
426 (int)clp->cl_name.len, clp->cl_name.data); 438 (int)clp->cl_name.len, clp->cl_name.data, status);
427 put_nfs4_client(clp); 439 put_nfs4_client(clp);
428 return status; 440 return 0;
429} 441}
430 442
431/* 443/*
@@ -451,7 +463,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
451 463
452/* 464/*
453 * called with dp->dl_count inc'ed. 465 * called with dp->dl_count inc'ed.
454 * nfs4_lock_state() may or may not have been called.
455 */ 466 */
456void 467void
457nfsd4_cb_recall(struct nfs4_delegation *dp) 468nfsd4_cb_recall(struct nfs4_delegation *dp)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 9fa60a3ad48c..b2883e9c6381 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -93,6 +93,21 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
93 open->op_truncate = 0; 93 open->op_truncate = 0;
94 94
95 if (open->op_create) { 95 if (open->op_create) {
96 /* FIXME: check session persistence and pnfs flags.
97 * The nfsv4.1 spec requires the following semantics:
98 *
99 * Persistent | pNFS | Server REQUIRED | Client Allowed
100 * Reply Cache | server | |
101 * -------------+--------+-----------------+--------------------
102 * no | no | EXCLUSIVE4_1 | EXCLUSIVE4_1
103 * | | | (SHOULD)
104 * | | and EXCLUSIVE4 | or EXCLUSIVE4
105 * | | | (SHOULD NOT)
106 * no | yes | EXCLUSIVE4_1 | EXCLUSIVE4_1
107 * yes | no | GUARDED4 | GUARDED4
108 * yes | yes | GUARDED4 | GUARDED4
109 */
110
96 /* 111 /*
97 * Note: create modes (UNCHECKED,GUARDED...) are the same 112 * Note: create modes (UNCHECKED,GUARDED...) are the same
98 * in NFSv4 as in v3. 113 * in NFSv4 as in v3.
@@ -103,11 +118,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
103 (u32 *)open->op_verf.data, 118 (u32 *)open->op_verf.data,
104 &open->op_truncate, &created); 119 &open->op_truncate, &created);
105 120
106 /* If we ever decide to use different attrs to store the 121 /*
107 * verifier in nfsd_create_v3, then we'll need to change this 122 * Following rfc 3530 14.2.16, use the returned bitmask
123 * to indicate which attributes we used to store the
124 * verifier:
108 */ 125 */
109 if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) 126 if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0)
110 open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | 127 open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS |
111 FATTR4_WORD1_TIME_MODIFY); 128 FATTR4_WORD1_TIME_MODIFY);
112 } else { 129 } else {
113 status = nfsd_lookup(rqstp, current_fh, 130 status = nfsd_lookup(rqstp, current_fh,
@@ -118,13 +135,11 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
118 goto out; 135 goto out;
119 136
120 set_change_info(&open->op_cinfo, current_fh); 137 set_change_info(&open->op_cinfo, current_fh);
121
122 /* set reply cache */
123 fh_dup2(current_fh, &resfh); 138 fh_dup2(current_fh, &resfh);
124 open->op_stateowner->so_replay.rp_openfh_len = resfh.fh_handle.fh_size;
125 memcpy(open->op_stateowner->so_replay.rp_openfh,
126 &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
127 139
140 /* set reply cache */
141 fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh,
142 &resfh.fh_handle);
128 if (!created) 143 if (!created)
129 status = do_open_permission(rqstp, current_fh, open, 144 status = do_open_permission(rqstp, current_fh, open,
130 NFSD_MAY_NOP); 145 NFSD_MAY_NOP);
@@ -150,10 +165,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
150 memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); 165 memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info));
151 166
152 /* set replay cache */ 167 /* set replay cache */
153 open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size; 168 fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh,
154 memcpy(open->op_stateowner->so_replay.rp_openfh, 169 &current_fh->fh_handle);
155 &current_fh->fh_handle.fh_base,
156 current_fh->fh_handle.fh_size);
157 170
158 open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && 171 open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
159 (open->op_iattr.ia_size == 0); 172 (open->op_iattr.ia_size == 0);
@@ -164,12 +177,23 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
164 return status; 177 return status;
165} 178}
166 179
180static void
181copy_clientid(clientid_t *clid, struct nfsd4_session *session)
182{
183 struct nfsd4_sessionid *sid =
184 (struct nfsd4_sessionid *)session->se_sessionid.data;
185
186 clid->cl_boot = sid->clientid.cl_boot;
187 clid->cl_id = sid->clientid.cl_id;
188}
167 189
168static __be32 190static __be32
169nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 191nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
170 struct nfsd4_open *open) 192 struct nfsd4_open *open)
171{ 193{
172 __be32 status; 194 __be32 status;
195 struct nfsd4_compoundres *resp;
196
173 dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", 197 dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
174 (int)open->op_fname.len, open->op_fname.data, 198 (int)open->op_fname.len, open->op_fname.data,
175 open->op_stateowner); 199 open->op_stateowner);
@@ -178,16 +202,19 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
178 if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) 202 if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
179 return nfserr_inval; 203 return nfserr_inval;
180 204
205 if (nfsd4_has_session(cstate))
206 copy_clientid(&open->op_clientid, cstate->session);
207
181 nfs4_lock_state(); 208 nfs4_lock_state();
182 209
183 /* check seqid for replay. set nfs4_owner */ 210 /* check seqid for replay. set nfs4_owner */
184 status = nfsd4_process_open1(open); 211 resp = rqstp->rq_resp;
212 status = nfsd4_process_open1(&resp->cstate, open);
185 if (status == nfserr_replay_me) { 213 if (status == nfserr_replay_me) {
186 struct nfs4_replay *rp = &open->op_stateowner->so_replay; 214 struct nfs4_replay *rp = &open->op_stateowner->so_replay;
187 fh_put(&cstate->current_fh); 215 fh_put(&cstate->current_fh);
188 cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len; 216 fh_copy_shallow(&cstate->current_fh.fh_handle,
189 memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh, 217 &rp->rp_openfh);
190 rp->rp_openfh_len);
191 status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); 218 status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
192 if (status) 219 if (status)
193 dprintk("nfsd4_open: replay failed" 220 dprintk("nfsd4_open: replay failed"
@@ -209,10 +236,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
209 236
210 switch (open->op_claim_type) { 237 switch (open->op_claim_type) {
211 case NFS4_OPEN_CLAIM_DELEGATE_CUR: 238 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
212 status = nfserr_inval;
213 if (open->op_create)
214 goto out;
215 /* fall through */
216 case NFS4_OPEN_CLAIM_NULL: 239 case NFS4_OPEN_CLAIM_NULL:
217 /* 240 /*
218 * (1) set CURRENT_FH to the file being opened, 241 * (1) set CURRENT_FH to the file being opened,
@@ -455,8 +478,9 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
455 if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) 478 if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
456 return nfserr_inval; 479 return nfserr_inval;
457 480
458 getattr->ga_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; 481 getattr->ga_bmval[0] &= nfsd_suppattrs0(cstate->minorversion);
459 getattr->ga_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; 482 getattr->ga_bmval[1] &= nfsd_suppattrs1(cstate->minorversion);
483 getattr->ga_bmval[2] &= nfsd_suppattrs2(cstate->minorversion);
460 484
461 getattr->ga_fhp = &cstate->current_fh; 485 getattr->ga_fhp = &cstate->current_fh;
462 return nfs_ok; 486 return nfs_ok;
@@ -520,9 +544,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
520 544
521 nfs4_lock_state(); 545 nfs4_lock_state();
522 /* check stateid */ 546 /* check stateid */
523 if ((status = nfs4_preprocess_stateid_op(&cstate->current_fh, 547 if ((status = nfs4_preprocess_stateid_op(cstate, &read->rd_stateid,
524 &read->rd_stateid, 548 RD_STATE, &read->rd_filp))) {
525 CHECK_FH | RD_STATE, &read->rd_filp))) {
526 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); 549 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
527 goto out; 550 goto out;
528 } 551 }
@@ -548,8 +571,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
548 if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) 571 if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
549 return nfserr_inval; 572 return nfserr_inval;
550 573
551 readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; 574 readdir->rd_bmval[0] &= nfsd_suppattrs0(cstate->minorversion);
552 readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; 575 readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion);
576 readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion);
553 577
554 if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || 578 if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) ||
555 (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) 579 (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE)))
@@ -653,8 +677,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
653 677
654 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { 678 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
655 nfs4_lock_state(); 679 nfs4_lock_state();
656 status = nfs4_preprocess_stateid_op(&cstate->current_fh, 680 status = nfs4_preprocess_stateid_op(cstate,
657 &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL); 681 &setattr->sa_stateid, WR_STATE, NULL);
658 nfs4_unlock_state(); 682 nfs4_unlock_state();
659 if (status) { 683 if (status) {
660 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); 684 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
@@ -685,6 +709,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
685 struct file *filp = NULL; 709 struct file *filp = NULL;
686 u32 *p; 710 u32 *p;
687 __be32 status = nfs_ok; 711 __be32 status = nfs_ok;
712 unsigned long cnt;
688 713
689 /* no need to check permission - this will be done in nfsd_write() */ 714 /* no need to check permission - this will be done in nfsd_write() */
690 715
@@ -692,8 +717,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
692 return nfserr_inval; 717 return nfserr_inval;
693 718
694 nfs4_lock_state(); 719 nfs4_lock_state();
695 status = nfs4_preprocess_stateid_op(&cstate->current_fh, stateid, 720 status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp);
696 CHECK_FH | WR_STATE, &filp);
697 if (filp) 721 if (filp)
698 get_file(filp); 722 get_file(filp);
699 nfs4_unlock_state(); 723 nfs4_unlock_state();
@@ -703,7 +727,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
703 return status; 727 return status;
704 } 728 }
705 729
706 write->wr_bytes_written = write->wr_buflen; 730 cnt = write->wr_buflen;
707 write->wr_how_written = write->wr_stable_how; 731 write->wr_how_written = write->wr_stable_how;
708 p = (u32 *)write->wr_verifier.data; 732 p = (u32 *)write->wr_verifier.data;
709 *p++ = nfssvc_boot.tv_sec; 733 *p++ = nfssvc_boot.tv_sec;
@@ -711,10 +735,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
711 735
712 status = nfsd_write(rqstp, &cstate->current_fh, filp, 736 status = nfsd_write(rqstp, &cstate->current_fh, filp,
713 write->wr_offset, rqstp->rq_vec, write->wr_vlen, 737 write->wr_offset, rqstp->rq_vec, write->wr_vlen,
714 write->wr_buflen, &write->wr_how_written); 738 &cnt, &write->wr_how_written);
715 if (filp) 739 if (filp)
716 fput(filp); 740 fput(filp);
717 741
742 write->wr_bytes_written = cnt;
743
718 if (status == nfserr_symlink) 744 if (status == nfserr_symlink)
719 status = nfserr_inval; 745 status = nfserr_inval;
720 return status; 746 return status;
@@ -737,8 +763,9 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
737 if (status) 763 if (status)
738 return status; 764 return status;
739 765
740 if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) 766 if ((verify->ve_bmval[0] & ~nfsd_suppattrs0(cstate->minorversion))
741 || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) 767 || (verify->ve_bmval[1] & ~nfsd_suppattrs1(cstate->minorversion))
768 || (verify->ve_bmval[2] & ~nfsd_suppattrs2(cstate->minorversion)))
742 return nfserr_attrnotsupp; 769 return nfserr_attrnotsupp;
743 if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) 770 if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)
744 || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) 771 || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1))
@@ -766,7 +793,8 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
766 if (status) 793 if (status)
767 goto out_kfree; 794 goto out_kfree;
768 795
769 p = buf + 3; 796 /* skip bitmap */
797 p = buf + 1 + ntohl(buf[0]);
770 status = nfserr_not_same; 798 status = nfserr_not_same;
771 if (ntohl(*p++) != verify->ve_attrlen) 799 if (ntohl(*p++) != verify->ve_attrlen)
772 goto out_kfree; 800 goto out_kfree;
@@ -813,39 +841,17 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
813 nfsdstats.nfs4_opcount[opnum]++; 841 nfsdstats.nfs4_opcount[opnum]++;
814} 842}
815 843
816static void cstate_free(struct nfsd4_compound_state *cstate)
817{
818 if (cstate == NULL)
819 return;
820 fh_put(&cstate->current_fh);
821 fh_put(&cstate->save_fh);
822 BUG_ON(cstate->replay_owner);
823 kfree(cstate);
824}
825
826static struct nfsd4_compound_state *cstate_alloc(void)
827{
828 struct nfsd4_compound_state *cstate;
829
830 cstate = kmalloc(sizeof(struct nfsd4_compound_state), GFP_KERNEL);
831 if (cstate == NULL)
832 return NULL;
833 fh_init(&cstate->current_fh, NFS4_FHSIZE);
834 fh_init(&cstate->save_fh, NFS4_FHSIZE);
835 cstate->replay_owner = NULL;
836 return cstate;
837}
838
839typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, 844typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
840 void *); 845 void *);
846enum nfsd4_op_flags {
847 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
848 ALLOWED_ON_ABSENT_FS = 2 << 0, /* ops processed on absent fs */
849 ALLOWED_AS_FIRST_OP = 3 << 0, /* ops reqired first in compound */
850};
841 851
842struct nfsd4_operation { 852struct nfsd4_operation {
843 nfsd4op_func op_func; 853 nfsd4op_func op_func;
844 u32 op_flags; 854 u32 op_flags;
845/* Most ops require a valid current filehandle; a few don't: */
846#define ALLOWED_WITHOUT_FH 1
847/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */
848#define ALLOWED_ON_ABSENT_FS 2
849 char *op_name; 855 char *op_name;
850}; 856};
851 857
@@ -854,6 +860,51 @@ static struct nfsd4_operation nfsd4_ops[];
854static const char *nfsd4_op_name(unsigned opnum); 860static const char *nfsd4_op_name(unsigned opnum);
855 861
856/* 862/*
863 * This is a replay of a compound for which no cache entry pages
864 * were used. Encode the sequence operation, and if cachethis is FALSE
865 * encode the uncache rep error on the next operation.
866 */
867static __be32
868nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args,
869 struct nfsd4_compoundres *resp)
870{
871 struct nfsd4_op *op;
872
873 dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__,
874 resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
875
876 /* Encode the replayed sequence operation */
877 BUG_ON(resp->opcnt != 1);
878 op = &args->ops[resp->opcnt - 1];
879 nfsd4_encode_operation(resp, op);
880
881 /*return nfserr_retry_uncached_rep in next operation. */
882 if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) {
883 op = &args->ops[resp->opcnt++];
884 op->status = nfserr_retry_uncached_rep;
885 nfsd4_encode_operation(resp, op);
886 }
887 return op->status;
888}
889
890/*
891 * Enforce NFSv4.1 COMPOUND ordering rules.
892 *
893 * TODO:
894 * - enforce NFS4ERR_NOT_ONLY_OP,
895 * - DESTROY_SESSION MUST be the final operation in the COMPOUND request.
896 */
897static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args)
898{
899 if (args->minorversion && args->opcnt > 0) {
900 struct nfsd4_op *op = &args->ops[0];
901 return (op->status == nfserr_op_illegal) ||
902 (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP);
903 }
904 return true;
905}
906
907/*
857 * COMPOUND call. 908 * COMPOUND call.
858 */ 909 */
859static __be32 910static __be32
@@ -863,12 +914,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
863{ 914{
864 struct nfsd4_op *op; 915 struct nfsd4_op *op;
865 struct nfsd4_operation *opdesc; 916 struct nfsd4_operation *opdesc;
866 struct nfsd4_compound_state *cstate = NULL; 917 struct nfsd4_compound_state *cstate = &resp->cstate;
867 int slack_bytes; 918 int slack_bytes;
868 __be32 status; 919 __be32 status;
869 920
870 resp->xbuf = &rqstp->rq_res; 921 resp->xbuf = &rqstp->rq_res;
871 resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; 922 resp->p = rqstp->rq_res.head[0].iov_base +
923 rqstp->rq_res.head[0].iov_len;
872 resp->tagp = resp->p; 924 resp->tagp = resp->p;
873 /* reserve space for: taglen, tag, and opcnt */ 925 /* reserve space for: taglen, tag, and opcnt */
874 resp->p += 2 + XDR_QUADLEN(args->taglen); 926 resp->p += 2 + XDR_QUADLEN(args->taglen);
@@ -877,18 +929,25 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
877 resp->tag = args->tag; 929 resp->tag = args->tag;
878 resp->opcnt = 0; 930 resp->opcnt = 0;
879 resp->rqstp = rqstp; 931 resp->rqstp = rqstp;
932 resp->cstate.minorversion = args->minorversion;
933 resp->cstate.replay_owner = NULL;
934 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
935 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
936 /* Use the deferral mechanism only for NFSv4.0 compounds */
937 rqstp->rq_usedeferral = (args->minorversion == 0);
880 938
881 /* 939 /*
882 * According to RFC3010, this takes precedence over all other errors. 940 * According to RFC3010, this takes precedence over all other errors.
883 */ 941 */
884 status = nfserr_minor_vers_mismatch; 942 status = nfserr_minor_vers_mismatch;
885 if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) 943 if (args->minorversion > nfsd_supported_minorversion)
886 goto out; 944 goto out;
887 945
888 status = nfserr_resource; 946 if (!nfs41_op_ordering_ok(args)) {
889 cstate = cstate_alloc(); 947 op = &args->ops[0];
890 if (cstate == NULL) 948 op->status = nfserr_sequence_pos;
891 goto out; 949 goto encode_op;
950 }
892 951
893 status = nfs_ok; 952 status = nfs_ok;
894 while (!status && resp->opcnt < args->opcnt) { 953 while (!status && resp->opcnt < args->opcnt) {
@@ -897,7 +956,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
897 dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", 956 dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
898 resp->opcnt, args->opcnt, op->opnum, 957 resp->opcnt, args->opcnt, op->opnum,
899 nfsd4_op_name(op->opnum)); 958 nfsd4_op_name(op->opnum));
900
901 /* 959 /*
902 * The XDR decode routines may have pre-set op->status; 960 * The XDR decode routines may have pre-set op->status;
903 * for example, if there is a miscellaneous XDR error 961 * for example, if there is a miscellaneous XDR error
@@ -938,6 +996,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
938 BUG_ON(op->status == nfs_ok); 996 BUG_ON(op->status == nfs_ok);
939 997
940encode_op: 998encode_op:
999 /* Only from SEQUENCE or CREATE_SESSION */
1000 if (resp->cstate.status == nfserr_replay_cache) {
1001 dprintk("%s NFS4.1 replay from cache\n", __func__);
1002 if (nfsd4_not_cached(resp))
1003 status = nfsd4_enc_uncached_replay(args, resp);
1004 else
1005 status = op->status;
1006 goto out;
1007 }
941 if (op->status == nfserr_replay_me) { 1008 if (op->status == nfserr_replay_me) {
942 op->replay = &cstate->replay_owner->so_replay; 1009 op->replay = &cstate->replay_owner->so_replay;
943 nfsd4_encode_replay(resp, op); 1010 nfsd4_encode_replay(resp, op);
@@ -961,15 +1028,24 @@ encode_op:
961 1028
962 nfsd4_increment_op_stats(op->opnum); 1029 nfsd4_increment_op_stats(op->opnum);
963 } 1030 }
1031 if (!rqstp->rq_usedeferral && status == nfserr_dropit) {
1032 dprintk("%s Dropit - send NFS4ERR_DELAY\n", __func__);
1033 status = nfserr_jukebox;
1034 }
964 1035
965 cstate_free(cstate); 1036 resp->cstate.status = status;
1037 fh_put(&resp->cstate.current_fh);
1038 fh_put(&resp->cstate.save_fh);
1039 BUG_ON(resp->cstate.replay_owner);
966out: 1040out:
967 nfsd4_release_compoundargs(args); 1041 nfsd4_release_compoundargs(args);
1042 /* Reset deferral mechanism for RPC deferrals */
1043 rqstp->rq_usedeferral = 1;
968 dprintk("nfsv4 compound returned %d\n", ntohl(status)); 1044 dprintk("nfsv4 compound returned %d\n", ntohl(status));
969 return status; 1045 return status;
970} 1046}
971 1047
972static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { 1048static struct nfsd4_operation nfsd4_ops[] = {
973 [OP_ACCESS] = { 1049 [OP_ACCESS] = {
974 .op_func = (nfsd4op_func)nfsd4_access, 1050 .op_func = (nfsd4op_func)nfsd4_access,
975 .op_name = "OP_ACCESS", 1051 .op_name = "OP_ACCESS",
@@ -1045,7 +1121,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
1045 .op_name = "OP_PUTFH", 1121 .op_name = "OP_PUTFH",
1046 }, 1122 },
1047 [OP_PUTPUBFH] = { 1123 [OP_PUTPUBFH] = {
1048 /* unsupported, just for future reference: */ 1124 .op_func = (nfsd4op_func)nfsd4_putrootfh,
1049 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1125 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1050 .op_name = "OP_PUTPUBFH", 1126 .op_name = "OP_PUTPUBFH",
1051 }, 1127 },
@@ -1119,6 +1195,28 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
1119 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1195 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1120 .op_name = "OP_RELEASE_LOCKOWNER", 1196 .op_name = "OP_RELEASE_LOCKOWNER",
1121 }, 1197 },
1198
1199 /* NFSv4.1 operations */
1200 [OP_EXCHANGE_ID] = {
1201 .op_func = (nfsd4op_func)nfsd4_exchange_id,
1202 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
1203 .op_name = "OP_EXCHANGE_ID",
1204 },
1205 [OP_CREATE_SESSION] = {
1206 .op_func = (nfsd4op_func)nfsd4_create_session,
1207 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
1208 .op_name = "OP_CREATE_SESSION",
1209 },
1210 [OP_DESTROY_SESSION] = {
1211 .op_func = (nfsd4op_func)nfsd4_destroy_session,
1212 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
1213 .op_name = "OP_DESTROY_SESSION",
1214 },
1215 [OP_SEQUENCE] = {
1216 .op_func = (nfsd4op_func)nfsd4_sequence,
1217 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
1218 .op_name = "OP_SEQUENCE",
1219 },
1122}; 1220};
1123 1221
1124static const char *nfsd4_op_name(unsigned opnum) 1222static const char *nfsd4_op_name(unsigned opnum)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 74f7b67567fd..3444c0052a87 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -182,36 +182,26 @@ out_unlock:
182 182
183typedef int (recdir_func)(struct dentry *, struct dentry *); 183typedef int (recdir_func)(struct dentry *, struct dentry *);
184 184
185struct dentry_list { 185struct name_list {
186 struct dentry *dentry; 186 char name[HEXDIR_LEN];
187 struct list_head list; 187 struct list_head list;
188}; 188};
189 189
190struct dentry_list_arg {
191 struct list_head dentries;
192 struct dentry *parent;
193};
194
195static int 190static int
196nfsd4_build_dentrylist(void *arg, const char *name, int namlen, 191nfsd4_build_namelist(void *arg, const char *name, int namlen,
197 loff_t offset, u64 ino, unsigned int d_type) 192 loff_t offset, u64 ino, unsigned int d_type)
198{ 193{
199 struct dentry_list_arg *dla = arg; 194 struct list_head *names = arg;
200 struct list_head *dentries = &dla->dentries; 195 struct name_list *entry;
201 struct dentry *parent = dla->parent;
202 struct dentry *dentry;
203 struct dentry_list *child;
204 196
205 if (name && isdotent(name, namlen)) 197 if (namlen != HEXDIR_LEN - 1)
206 return 0; 198 return 0;
207 dentry = lookup_one_len(name, parent, namlen); 199 entry = kmalloc(sizeof(struct name_list), GFP_KERNEL);
208 if (IS_ERR(dentry)) 200 if (entry == NULL)
209 return PTR_ERR(dentry);
210 child = kmalloc(sizeof(*child), GFP_KERNEL);
211 if (child == NULL)
212 return -ENOMEM; 201 return -ENOMEM;
213 child->dentry = dentry; 202 memcpy(entry->name, name, HEXDIR_LEN - 1);
214 list_add(&child->list, dentries); 203 entry->name[HEXDIR_LEN - 1] = '\0';
204 list_add(&entry->list, names);
215 return 0; 205 return 0;
216} 206}
217 207
@@ -220,11 +210,9 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
220{ 210{
221 const struct cred *original_cred; 211 const struct cred *original_cred;
222 struct file *filp; 212 struct file *filp;
223 struct dentry_list_arg dla = { 213 LIST_HEAD(names);
224 .parent = dir, 214 struct name_list *entry;
225 }; 215 struct dentry *dentry;
226 struct list_head *dentries = &dla.dentries;
227 struct dentry_list *child;
228 int status; 216 int status;
229 217
230 if (!rec_dir_init) 218 if (!rec_dir_init)
@@ -233,31 +221,34 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
233 status = nfs4_save_creds(&original_cred); 221 status = nfs4_save_creds(&original_cred);
234 if (status < 0) 222 if (status < 0)
235 return status; 223 return status;
236 INIT_LIST_HEAD(dentries);
237 224
238 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, 225 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY,
239 current_cred()); 226 current_cred());
240 status = PTR_ERR(filp); 227 status = PTR_ERR(filp);
241 if (IS_ERR(filp)) 228 if (IS_ERR(filp))
242 goto out; 229 goto out;
243 INIT_LIST_HEAD(dentries); 230 status = vfs_readdir(filp, nfsd4_build_namelist, &names);
244 status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
245 fput(filp); 231 fput(filp);
246 while (!list_empty(dentries)) { 232 while (!list_empty(&names)) {
247 child = list_entry(dentries->next, struct dentry_list, list); 233 entry = list_entry(names.next, struct name_list, list);
248 status = f(dir, child->dentry); 234
235 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
236 if (IS_ERR(dentry)) {
237 status = PTR_ERR(dentry);
238 goto out;
239 }
240 status = f(dir, dentry);
241 dput(dentry);
249 if (status) 242 if (status)
250 goto out; 243 goto out;
251 list_del(&child->list); 244 list_del(&entry->list);
252 dput(child->dentry); 245 kfree(entry);
253 kfree(child);
254 } 246 }
255out: 247out:
256 while (!list_empty(dentries)) { 248 while (!list_empty(&names)) {
257 child = list_entry(dentries->next, struct dentry_list, list); 249 entry = list_entry(names.next, struct name_list, list);
258 list_del(&child->list); 250 list_del(&entry->list);
259 dput(child->dentry); 251 kfree(entry);
260 kfree(child);
261 } 252 }
262 nfs4_reset_creds(original_cred); 253 nfs4_reset_creds(original_cred);
263 return status; 254 return status;
@@ -353,7 +344,8 @@ purge_old(struct dentry *parent, struct dentry *child)
353{ 344{
354 int status; 345 int status;
355 346
356 if (nfs4_has_reclaimed_state(child->d_name.name)) 347 /* note: we currently use this path only for minorversion 0 */
348 if (nfs4_has_reclaimed_state(child->d_name.name, false))
357 return 0; 349 return 0;
358 350
359 status = nfsd4_clear_clid_dir(parent, child); 351 status = nfsd4_clear_clid_dir(parent, child);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b6f60f48e94b..c65a27b76a9d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -68,6 +68,7 @@ static u32 current_delegid = 1;
68static u32 nfs4_init; 68static u32 nfs4_init;
69static stateid_t zerostateid; /* bits all 0 */ 69static stateid_t zerostateid; /* bits all 0 */
70static stateid_t onestateid; /* bits all 1 */ 70static stateid_t onestateid; /* bits all 1 */
71static u64 current_sessionid = 1;
71 72
72#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) 73#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
73#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) 74#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
@@ -75,18 +76,21 @@ static stateid_t onestateid; /* bits all 1 */
75/* forward declarations */ 76/* forward declarations */
76static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); 77static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
77static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); 78static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
78static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
79static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; 79static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
80static void nfs4_set_recdir(char *recdir); 80static void nfs4_set_recdir(char *recdir);
81 81
82/* Locking: 82/* Locking: */
83 * 83
84 * client_mutex: 84/* Currently used for almost all code touching nfsv4 state: */
85 * protects clientid_hashtbl[], clientstr_hashtbl[],
86 * unconfstr_hashtbl[], uncofid_hashtbl[].
87 */
88static DEFINE_MUTEX(client_mutex); 85static DEFINE_MUTEX(client_mutex);
89 86
87/*
88 * Currently used for the del_recall_lru and file hash table. In an
89 * effort to decrease the scope of the client_mutex, this spinlock may
90 * eventually cover more:
91 */
92static DEFINE_SPINLOCK(recall_lock);
93
90static struct kmem_cache *stateowner_slab = NULL; 94static struct kmem_cache *stateowner_slab = NULL;
91static struct kmem_cache *file_slab = NULL; 95static struct kmem_cache *file_slab = NULL;
92static struct kmem_cache *stateid_slab = NULL; 96static struct kmem_cache *stateid_slab = NULL;
@@ -117,37 +121,23 @@ opaque_hashval(const void *ptr, int nbytes)
117 return x; 121 return x;
118} 122}
119 123
120/* forward declarations */
121static void release_stateowner(struct nfs4_stateowner *sop);
122static void release_stateid(struct nfs4_stateid *stp, int flags);
123
124/*
125 * Delegation state
126 */
127
128/* recall_lock protects the del_recall_lru */
129static DEFINE_SPINLOCK(recall_lock);
130static struct list_head del_recall_lru; 124static struct list_head del_recall_lru;
131 125
132static void
133free_nfs4_file(struct kref *kref)
134{
135 struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref);
136 list_del(&fp->fi_hash);
137 iput(fp->fi_inode);
138 kmem_cache_free(file_slab, fp);
139}
140
141static inline void 126static inline void
142put_nfs4_file(struct nfs4_file *fi) 127put_nfs4_file(struct nfs4_file *fi)
143{ 128{
144 kref_put(&fi->fi_ref, free_nfs4_file); 129 if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
130 list_del(&fi->fi_hash);
131 spin_unlock(&recall_lock);
132 iput(fi->fi_inode);
133 kmem_cache_free(file_slab, fi);
134 }
145} 135}
146 136
147static inline void 137static inline void
148get_nfs4_file(struct nfs4_file *fi) 138get_nfs4_file(struct nfs4_file *fi)
149{ 139{
150 kref_get(&fi->fi_ref); 140 atomic_inc(&fi->fi_ref);
151} 141}
152 142
153static int num_delegations; 143static int num_delegations;
@@ -220,9 +210,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
220 dp->dl_stateid.si_stateownerid = current_delegid++; 210 dp->dl_stateid.si_stateownerid = current_delegid++;
221 dp->dl_stateid.si_fileid = 0; 211 dp->dl_stateid.si_fileid = 0;
222 dp->dl_stateid.si_generation = 0; 212 dp->dl_stateid.si_generation = 0;
223 dp->dl_fhlen = current_fh->fh_handle.fh_size; 213 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
224 memcpy(dp->dl_fhval, &current_fh->fh_handle.fh_base,
225 current_fh->fh_handle.fh_size);
226 dp->dl_time = 0; 214 dp->dl_time = 0;
227 atomic_set(&dp->dl_count, 1); 215 atomic_set(&dp->dl_count, 1);
228 list_add(&dp->dl_perfile, &fp->fi_delegations); 216 list_add(&dp->dl_perfile, &fp->fi_delegations);
@@ -311,6 +299,291 @@ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE];
311static struct list_head client_lru; 299static struct list_head client_lru;
312static struct list_head close_lru; 300static struct list_head close_lru;
313 301
302static void unhash_generic_stateid(struct nfs4_stateid *stp)
303{
304 list_del(&stp->st_hash);
305 list_del(&stp->st_perfile);
306 list_del(&stp->st_perstateowner);
307}
308
309static void free_generic_stateid(struct nfs4_stateid *stp)
310{
311 put_nfs4_file(stp->st_file);
312 kmem_cache_free(stateid_slab, stp);
313}
314
315static void release_lock_stateid(struct nfs4_stateid *stp)
316{
317 unhash_generic_stateid(stp);
318 locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner);
319 free_generic_stateid(stp);
320}
321
322static void unhash_lockowner(struct nfs4_stateowner *sop)
323{
324 struct nfs4_stateid *stp;
325
326 list_del(&sop->so_idhash);
327 list_del(&sop->so_strhash);
328 list_del(&sop->so_perstateid);
329 while (!list_empty(&sop->so_stateids)) {
330 stp = list_first_entry(&sop->so_stateids,
331 struct nfs4_stateid, st_perstateowner);
332 release_lock_stateid(stp);
333 }
334}
335
336static void release_lockowner(struct nfs4_stateowner *sop)
337{
338 unhash_lockowner(sop);
339 nfs4_put_stateowner(sop);
340}
341
342static void
343release_stateid_lockowners(struct nfs4_stateid *open_stp)
344{
345 struct nfs4_stateowner *lock_sop;
346
347 while (!list_empty(&open_stp->st_lockowners)) {
348 lock_sop = list_entry(open_stp->st_lockowners.next,
349 struct nfs4_stateowner, so_perstateid);
350 /* list_del(&open_stp->st_lockowners); */
351 BUG_ON(lock_sop->so_is_open_owner);
352 release_lockowner(lock_sop);
353 }
354}
355
356static void release_open_stateid(struct nfs4_stateid *stp)
357{
358 unhash_generic_stateid(stp);
359 release_stateid_lockowners(stp);
360 nfsd_close(stp->st_vfs_file);
361 free_generic_stateid(stp);
362}
363
364static void unhash_openowner(struct nfs4_stateowner *sop)
365{
366 struct nfs4_stateid *stp;
367
368 list_del(&sop->so_idhash);
369 list_del(&sop->so_strhash);
370 list_del(&sop->so_perclient);
371 list_del(&sop->so_perstateid); /* XXX: necessary? */
372 while (!list_empty(&sop->so_stateids)) {
373 stp = list_first_entry(&sop->so_stateids,
374 struct nfs4_stateid, st_perstateowner);
375 release_open_stateid(stp);
376 }
377}
378
379static void release_openowner(struct nfs4_stateowner *sop)
380{
381 unhash_openowner(sop);
382 list_del(&sop->so_close_lru);
383 nfs4_put_stateowner(sop);
384}
385
386static DEFINE_SPINLOCK(sessionid_lock);
387#define SESSION_HASH_SIZE 512
388static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
389
390static inline int
391hash_sessionid(struct nfs4_sessionid *sessionid)
392{
393 struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid;
394
395 return sid->sequence % SESSION_HASH_SIZE;
396}
397
398static inline void
399dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
400{
401 u32 *ptr = (u32 *)(&sessionid->data[0]);
402 dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
403}
404
405static void
406gen_sessionid(struct nfsd4_session *ses)
407{
408 struct nfs4_client *clp = ses->se_client;
409 struct nfsd4_sessionid *sid;
410
411 sid = (struct nfsd4_sessionid *)ses->se_sessionid.data;
412 sid->clientid = clp->cl_clientid;
413 sid->sequence = current_sessionid++;
414 sid->reserved = 0;
415}
416
417/*
418 * Give the client the number of slots it requests bound by
419 * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages.
420 *
421 * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we
422 * should (up to a point) re-negotiate active sessions and reduce their
423 * slot usage to make rooom for new connections. For now we just fail the
424 * create session.
425 */
426static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
427{
428 int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
429
430 spin_lock(&nfsd_serv->sv_lock);
431 if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages)
432 np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used;
433 nfsd_serv->sv_drc_pages_used += np;
434 spin_unlock(&nfsd_serv->sv_lock);
435
436 if (np <= 0) {
437 status = nfserr_resource;
438 fchan->maxreqs = 0;
439 } else
440 fchan->maxreqs = np / NFSD_PAGES_PER_SLOT;
441
442 return status;
443}
444
445/*
446 * fchan holds the client values on input, and the server values on output
447 */
448static int init_forechannel_attrs(struct svc_rqst *rqstp,
449 struct nfsd4_session *session,
450 struct nfsd4_channel_attrs *fchan)
451{
452 int status = 0;
453 __u32 maxcount = svc_max_payload(rqstp);
454
455 /* headerpadsz set to zero in encode routine */
456
457 /* Use the client's max request and max response size if possible */
458 if (fchan->maxreq_sz > maxcount)
459 fchan->maxreq_sz = maxcount;
460 session->se_fmaxreq_sz = fchan->maxreq_sz;
461
462 if (fchan->maxresp_sz > maxcount)
463 fchan->maxresp_sz = maxcount;
464 session->se_fmaxresp_sz = fchan->maxresp_sz;
465
466 /* Set the max response cached size our default which is
467 * a multiple of PAGE_SIZE and small */
468 session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
469 fchan->maxresp_cached = session->se_fmaxresp_cached;
470
471 /* Use the client's maxops if possible */
472 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
473 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
474 session->se_fmaxops = fchan->maxops;
475
476 /* try to use the client requested number of slots */
477 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
478 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
479
480 /* FIXME: Error means no more DRC pages so the server should
481 * recover pages from existing sessions. For now fail session
482 * creation.
483 */
484 status = set_forechannel_maxreqs(fchan);
485
486 session->se_fnumslots = fchan->maxreqs;
487 return status;
488}
489
490static int
491alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
492 struct nfsd4_create_session *cses)
493{
494 struct nfsd4_session *new, tmp;
495 int idx, status = nfserr_resource, slotsize;
496
497 memset(&tmp, 0, sizeof(tmp));
498
499 /* FIXME: For now, we just accept the client back channel attributes. */
500 status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel);
501 if (status)
502 goto out;
503
504 /* allocate struct nfsd4_session and slot table in one piece */
505 slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot);
506 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
507 if (!new)
508 goto out;
509
510 memcpy(new, &tmp, sizeof(*new));
511
512 new->se_client = clp;
513 gen_sessionid(new);
514 idx = hash_sessionid(&new->se_sessionid);
515 memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
516 NFS4_MAX_SESSIONID_LEN);
517
518 new->se_flags = cses->flags;
519 kref_init(&new->se_ref);
520 spin_lock(&sessionid_lock);
521 list_add(&new->se_hash, &sessionid_hashtbl[idx]);
522 list_add(&new->se_perclnt, &clp->cl_sessions);
523 spin_unlock(&sessionid_lock);
524
525 status = nfs_ok;
526out:
527 return status;
528}
529
530/* caller must hold sessionid_lock */
531static struct nfsd4_session *
532find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
533{
534 struct nfsd4_session *elem;
535 int idx;
536
537 dump_sessionid(__func__, sessionid);
538 idx = hash_sessionid(sessionid);
539 dprintk("%s: idx is %d\n", __func__, idx);
540 /* Search in the appropriate list */
541 list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) {
542 dump_sessionid("list traversal", &elem->se_sessionid);
543 if (!memcmp(elem->se_sessionid.data, sessionid->data,
544 NFS4_MAX_SESSIONID_LEN)) {
545 return elem;
546 }
547 }
548
549 dprintk("%s: session not found\n", __func__);
550 return NULL;
551}
552
553/* caller must hold sessionid_lock */
554static void
555unhash_session(struct nfsd4_session *ses)
556{
557 list_del(&ses->se_hash);
558 list_del(&ses->se_perclnt);
559}
560
561static void
562release_session(struct nfsd4_session *ses)
563{
564 spin_lock(&sessionid_lock);
565 unhash_session(ses);
566 spin_unlock(&sessionid_lock);
567 nfsd4_put_session(ses);
568}
569
570static void nfsd4_release_respages(struct page **respages, short resused);
571
572void
573free_session(struct kref *kref)
574{
575 struct nfsd4_session *ses;
576 int i;
577
578 ses = container_of(kref, struct nfsd4_session, se_ref);
579 for (i = 0; i < ses->se_fnumslots; i++) {
580 struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
581 nfsd4_release_respages(e->ce_respages, e->ce_resused);
582 }
583 kfree(ses->se_slots);
584 kfree(ses);
585}
586
314static inline void 587static inline void
315renew_client(struct nfs4_client *clp) 588renew_client(struct nfs4_client *clp)
316{ 589{
@@ -330,8 +603,8 @@ STALE_CLIENTID(clientid_t *clid)
330{ 603{
331 if (clid->cl_boot == boot_time) 604 if (clid->cl_boot == boot_time)
332 return 0; 605 return 0;
333 dprintk("NFSD stale clientid (%08x/%08x)\n", 606 dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
334 clid->cl_boot, clid->cl_id); 607 clid->cl_boot, clid->cl_id, boot_time);
335 return 1; 608 return 1;
336} 609}
337 610
@@ -376,6 +649,8 @@ static inline void
376free_client(struct nfs4_client *clp) 649free_client(struct nfs4_client *clp)
377{ 650{
378 shutdown_callback_client(clp); 651 shutdown_callback_client(clp);
652 nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages,
653 clp->cl_slot.sl_cache_entry.ce_resused);
379 if (clp->cl_cred.cr_group_info) 654 if (clp->cl_cred.cr_group_info)
380 put_group_info(clp->cl_cred.cr_group_info); 655 put_group_info(clp->cl_cred.cr_group_info);
381 kfree(clp->cl_principal); 656 kfree(clp->cl_principal);
@@ -420,7 +695,13 @@ expire_client(struct nfs4_client *clp)
420 list_del(&clp->cl_lru); 695 list_del(&clp->cl_lru);
421 while (!list_empty(&clp->cl_openowners)) { 696 while (!list_empty(&clp->cl_openowners)) {
422 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); 697 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
423 release_stateowner(sop); 698 release_openowner(sop);
699 }
700 while (!list_empty(&clp->cl_sessions)) {
701 struct nfsd4_session *ses;
702 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
703 se_perclnt);
704 release_session(ses);
424 } 705 }
425 put_nfs4_client(clp); 706 put_nfs4_client(clp);
426} 707}
@@ -439,6 +720,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
439 INIT_LIST_HEAD(&clp->cl_strhash); 720 INIT_LIST_HEAD(&clp->cl_strhash);
440 INIT_LIST_HEAD(&clp->cl_openowners); 721 INIT_LIST_HEAD(&clp->cl_openowners);
441 INIT_LIST_HEAD(&clp->cl_delegations); 722 INIT_LIST_HEAD(&clp->cl_delegations);
723 INIT_LIST_HEAD(&clp->cl_sessions);
442 INIT_LIST_HEAD(&clp->cl_lru); 724 INIT_LIST_HEAD(&clp->cl_lru);
443 return clp; 725 return clp;
444} 726}
@@ -568,25 +850,45 @@ find_unconfirmed_client(clientid_t *clid)
568 return NULL; 850 return NULL;
569} 851}
570 852
853/*
854 * Return 1 iff clp's clientid establishment method matches the use_exchange_id
855 * parameter. Matching is based on the fact the at least one of the
856 * EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1
857 *
858 * FIXME: we need to unify the clientid namespaces for nfsv4.x
859 * and correctly deal with client upgrade/downgrade in EXCHANGE_ID
860 * and SET_CLIENTID{,_CONFIRM}
861 */
862static inline int
863match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id)
864{
865 bool has_exchange_flags = (clp->cl_exchange_flags != 0);
866 return use_exchange_id == has_exchange_flags;
867}
868
571static struct nfs4_client * 869static struct nfs4_client *
572find_confirmed_client_by_str(const char *dname, unsigned int hashval) 870find_confirmed_client_by_str(const char *dname, unsigned int hashval,
871 bool use_exchange_id)
573{ 872{
574 struct nfs4_client *clp; 873 struct nfs4_client *clp;
575 874
576 list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { 875 list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
577 if (same_name(clp->cl_recdir, dname)) 876 if (same_name(clp->cl_recdir, dname) &&
877 match_clientid_establishment(clp, use_exchange_id))
578 return clp; 878 return clp;
579 } 879 }
580 return NULL; 880 return NULL;
581} 881}
582 882
583static struct nfs4_client * 883static struct nfs4_client *
584find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) 884find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
885 bool use_exchange_id)
585{ 886{
586 struct nfs4_client *clp; 887 struct nfs4_client *clp;
587 888
588 list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { 889 list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
589 if (same_name(clp->cl_recdir, dname)) 890 if (same_name(clp->cl_recdir, dname) &&
891 match_clientid_establishment(clp, use_exchange_id))
590 return clp; 892 return clp;
591 } 893 }
592 return NULL; 894 return NULL;
@@ -685,6 +987,534 @@ out_err:
685 return; 987 return;
686} 988}
687 989
990void
991nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
992{
993 struct nfsd4_compoundres *resp = rqstp->rq_resp;
994
995 resp->cstate.statp = statp;
996}
997
998/*
999 * Dereference the result pages.
1000 */
1001static void
1002nfsd4_release_respages(struct page **respages, short resused)
1003{
1004 int i;
1005
1006 dprintk("--> %s\n", __func__);
1007 for (i = 0; i < resused; i++) {
1008 if (!respages[i])
1009 continue;
1010 put_page(respages[i]);
1011 respages[i] = NULL;
1012 }
1013}
1014
1015static void
1016nfsd4_copy_pages(struct page **topages, struct page **frompages, short count)
1017{
1018 int i;
1019
1020 for (i = 0; i < count; i++) {
1021 topages[i] = frompages[i];
1022 if (!topages[i])
1023 continue;
1024 get_page(topages[i]);
1025 }
1026}
1027
1028/*
1029 * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous
1030 * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total
1031 * length of the XDR response is less than se_fmaxresp_cached
1032 * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a
1033 * of the reply (e.g. readdir).
1034 *
1035 * Store the base and length of the rq_req.head[0] page
1036 * of the NFSv4.1 data, just past the rpc header.
1037 */
1038void
1039nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
1040{
1041 struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
1042 struct svc_rqst *rqstp = resp->rqstp;
1043 struct nfsd4_compoundargs *args = rqstp->rq_argp;
1044 struct nfsd4_op *op = &args->ops[resp->opcnt];
1045 struct kvec *resv = &rqstp->rq_res.head[0];
1046
1047 dprintk("--> %s entry %p\n", __func__, entry);
1048
1049 /* Don't cache a failed OP_SEQUENCE. */
1050 if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status)
1051 return;
1052
1053 nfsd4_release_respages(entry->ce_respages, entry->ce_resused);
1054 entry->ce_opcnt = resp->opcnt;
1055 entry->ce_status = resp->cstate.status;
1056
1057 /*
1058 * Don't need a page to cache just the sequence operation - the slot
1059 * does this for us!
1060 */
1061
1062 if (nfsd4_not_cached(resp)) {
1063 entry->ce_resused = 0;
1064 entry->ce_rpchdrlen = 0;
1065 dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__,
1066 resp->cstate.slot->sl_cache_entry.ce_cachethis);
1067 return;
1068 }
1069 entry->ce_resused = rqstp->rq_resused;
1070 if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1)
1071 entry->ce_resused = NFSD_PAGES_PER_SLOT + 1;
1072 nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages,
1073 entry->ce_resused);
1074 entry->ce_datav.iov_base = resp->cstate.statp;
1075 entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp -
1076 (char *)page_address(rqstp->rq_respages[0]));
1077 /* Current request rpc header length*/
1078 entry->ce_rpchdrlen = (char *)resp->cstate.statp -
1079 (char *)page_address(rqstp->rq_respages[0]);
1080}
1081
1082/*
1083 * We keep the rpc header, but take the nfs reply from the replycache.
1084 */
1085static int
1086nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
1087 struct nfsd4_cache_entry *entry)
1088{
1089 struct svc_rqst *rqstp = resp->rqstp;
1090 struct kvec *resv = &resp->rqstp->rq_res.head[0];
1091 int len;
1092
1093 /* Current request rpc header length*/
1094 len = (char *)resp->cstate.statp -
1095 (char *)page_address(rqstp->rq_respages[0]);
1096 if (entry->ce_datav.iov_len + len > PAGE_SIZE) {
1097 dprintk("%s v41 cached reply too large (%Zd).\n", __func__,
1098 entry->ce_datav.iov_len);
1099 return 0;
1100 }
1101 /* copy the cached reply nfsd data past the current rpc header */
1102 memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base,
1103 entry->ce_datav.iov_len);
1104 resv->iov_len = len + entry->ce_datav.iov_len;
1105 return 1;
1106}
1107
1108/*
1109 * Keep the first page of the replay. Copy the NFSv4.1 data from the first
1110 * cached page. Replace any futher replay pages from the cache.
1111 */
1112__be32
1113nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
1114 struct nfsd4_sequence *seq)
1115{
1116 struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
1117 __be32 status;
1118
1119 dprintk("--> %s entry %p\n", __func__, entry);
1120
1121 /*
1122 * If this is just the sequence operation, we did not keep
1123 * a page in the cache entry because we can just use the
1124 * slot info stored in struct nfsd4_sequence that was checked
1125 * against the slot in nfsd4_sequence().
1126 *
1127 * This occurs when seq->cachethis is FALSE, or when the client
1128 * session inactivity timer fires and a solo sequence operation
1129 * is sent (lease renewal).
1130 */
1131 if (seq && nfsd4_not_cached(resp)) {
1132 seq->maxslots = resp->cstate.session->se_fnumslots;
1133 return nfs_ok;
1134 }
1135
1136 if (!nfsd41_copy_replay_data(resp, entry)) {
1137 /*
1138 * Not enough room to use the replay rpc header, send the
1139 * cached header. Release all the allocated result pages.
1140 */
1141 svc_free_res_pages(resp->rqstp);
1142 nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages,
1143 entry->ce_resused);
1144 } else {
1145 /* Release all but the first allocated result page */
1146
1147 resp->rqstp->rq_resused--;
1148 svc_free_res_pages(resp->rqstp);
1149
1150 nfsd4_copy_pages(&resp->rqstp->rq_respages[1],
1151 &entry->ce_respages[1],
1152 entry->ce_resused - 1);
1153 }
1154
1155 resp->rqstp->rq_resused = entry->ce_resused;
1156 resp->opcnt = entry->ce_opcnt;
1157 resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen;
1158 status = entry->ce_status;
1159
1160 return status;
1161}
1162
1163/*
1164 * Set the exchange_id flags returned by the server.
1165 */
1166static void
1167nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
1168{
1169 /* pNFS is not supported */
1170 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
1171
1172 /* Referrals are supported, Migration is not. */
1173 new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
1174
1175 /* set the wire flags to return to client. */
1176 clid->flags = new->cl_exchange_flags;
1177}
1178
1179__be32
1180nfsd4_exchange_id(struct svc_rqst *rqstp,
1181 struct nfsd4_compound_state *cstate,
1182 struct nfsd4_exchange_id *exid)
1183{
1184 struct nfs4_client *unconf, *conf, *new;
1185 int status;
1186 unsigned int strhashval;
1187 char dname[HEXDIR_LEN];
1188 nfs4_verifier verf = exid->verifier;
1189 u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
1190
1191 dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
1192 " ip_addr=%u flags %x, spa_how %d\n",
1193 __func__, rqstp, exid, exid->clname.len, exid->clname.data,
1194 ip_addr, exid->flags, exid->spa_how);
1195
1196 if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
1197 return nfserr_inval;
1198
1199 /* Currently only support SP4_NONE */
1200 switch (exid->spa_how) {
1201 case SP4_NONE:
1202 break;
1203 case SP4_SSV:
1204 return nfserr_encr_alg_unsupp;
1205 default:
1206 BUG(); /* checked by xdr code */
1207 case SP4_MACH_CRED:
1208 return nfserr_serverfault; /* no excuse :-/ */
1209 }
1210
1211 status = nfs4_make_rec_clidname(dname, &exid->clname);
1212
1213 if (status)
1214 goto error;
1215
1216 strhashval = clientstr_hashval(dname);
1217
1218 nfs4_lock_state();
1219 status = nfs_ok;
1220
1221 conf = find_confirmed_client_by_str(dname, strhashval, true);
1222 if (conf) {
1223 if (!same_verf(&verf, &conf->cl_verifier)) {
1224 /* 18.35.4 case 8 */
1225 if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1226 status = nfserr_not_same;
1227 goto out;
1228 }
1229 /* Client reboot: destroy old state */
1230 expire_client(conf);
1231 goto out_new;
1232 }
1233 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
1234 /* 18.35.4 case 9 */
1235 if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1236 status = nfserr_perm;
1237 goto out;
1238 }
1239 expire_client(conf);
1240 goto out_new;
1241 }
1242 if (ip_addr != conf->cl_addr &&
1243 !(exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A)) {
1244 /* Client collision. 18.35.4 case 3 */
1245 status = nfserr_clid_inuse;
1246 goto out;
1247 }
1248 /*
1249 * Set bit when the owner id and verifier map to an already
1250 * confirmed client id (18.35.3).
1251 */
1252 exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
1253
1254 /*
1255 * Falling into 18.35.4 case 2, possible router replay.
1256 * Leave confirmed record intact and return same result.
1257 */
1258 copy_verf(conf, &verf);
1259 new = conf;
1260 goto out_copy;
1261 } else {
1262 /* 18.35.4 case 7 */
1263 if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1264 status = nfserr_noent;
1265 goto out;
1266 }
1267 }
1268
1269 unconf = find_unconfirmed_client_by_str(dname, strhashval, true);
1270 if (unconf) {
1271 /*
1272 * Possible retry or client restart. Per 18.35.4 case 4,
1273 * a new unconfirmed record should be generated regardless
1274 * of whether any properties have changed.
1275 */
1276 expire_client(unconf);
1277 }
1278
1279out_new:
1280 /* Normal case */
1281 new = create_client(exid->clname, dname);
1282 if (new == NULL) {
1283 status = nfserr_resource;
1284 goto out;
1285 }
1286
1287 copy_verf(new, &verf);
1288 copy_cred(&new->cl_cred, &rqstp->rq_cred);
1289 new->cl_addr = ip_addr;
1290 gen_clid(new);
1291 gen_confirm(new);
1292 add_to_unconfirmed(new, strhashval);
1293out_copy:
1294 exid->clientid.cl_boot = new->cl_clientid.cl_boot;
1295 exid->clientid.cl_id = new->cl_clientid.cl_id;
1296
1297 new->cl_slot.sl_seqid = 0;
1298 exid->seqid = 1;
1299 nfsd4_set_ex_flags(new, exid);
1300
1301 dprintk("nfsd4_exchange_id seqid %d flags %x\n",
1302 new->cl_slot.sl_seqid, new->cl_exchange_flags);
1303 status = nfs_ok;
1304
1305out:
1306 nfs4_unlock_state();
1307error:
1308 dprintk("nfsd4_exchange_id returns %d\n", ntohl(status));
1309 return status;
1310}
1311
1312static int
1313check_slot_seqid(u32 seqid, struct nfsd4_slot *slot)
1314{
1315 dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid,
1316 slot->sl_seqid);
1317
1318 /* The slot is in use, and no response has been sent. */
1319 if (slot->sl_inuse) {
1320 if (seqid == slot->sl_seqid)
1321 return nfserr_jukebox;
1322 else
1323 return nfserr_seq_misordered;
1324 }
1325 /* Normal */
1326 if (likely(seqid == slot->sl_seqid + 1))
1327 return nfs_ok;
1328 /* Replay */
1329 if (seqid == slot->sl_seqid)
1330 return nfserr_replay_cache;
1331 /* Wraparound */
1332 if (seqid == 1 && (slot->sl_seqid + 1) == 0)
1333 return nfs_ok;
1334 /* Misordered replay or misordered new request */
1335 return nfserr_seq_misordered;
1336}
1337
1338__be32
1339nfsd4_create_session(struct svc_rqst *rqstp,
1340 struct nfsd4_compound_state *cstate,
1341 struct nfsd4_create_session *cr_ses)
1342{
1343 u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
1344 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1345 struct nfs4_client *conf, *unconf;
1346 struct nfsd4_slot *slot = NULL;
1347 int status = 0;
1348
1349 nfs4_lock_state();
1350 unconf = find_unconfirmed_client(&cr_ses->clientid);
1351 conf = find_confirmed_client(&cr_ses->clientid);
1352
1353 if (conf) {
1354 slot = &conf->cl_slot;
1355 status = check_slot_seqid(cr_ses->seqid, slot);
1356 if (status == nfserr_replay_cache) {
1357 dprintk("Got a create_session replay! seqid= %d\n",
1358 slot->sl_seqid);
1359 cstate->slot = slot;
1360 cstate->status = status;
1361 /* Return the cached reply status */
1362 status = nfsd4_replay_cache_entry(resp, NULL);
1363 goto out;
1364 } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) {
1365 status = nfserr_seq_misordered;
1366 dprintk("Sequence misordered!\n");
1367 dprintk("Expected seqid= %d but got seqid= %d\n",
1368 slot->sl_seqid, cr_ses->seqid);
1369 goto out;
1370 }
1371 conf->cl_slot.sl_seqid++;
1372 } else if (unconf) {
1373 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1374 (ip_addr != unconf->cl_addr)) {
1375 status = nfserr_clid_inuse;
1376 goto out;
1377 }
1378
1379 slot = &unconf->cl_slot;
1380 status = check_slot_seqid(cr_ses->seqid, slot);
1381 if (status) {
1382 /* an unconfirmed replay returns misordered */
1383 status = nfserr_seq_misordered;
1384 goto out;
1385 }
1386
1387 slot->sl_seqid++; /* from 0 to 1 */
1388 move_to_confirmed(unconf);
1389
1390 /*
1391 * We do not support RDMA or persistent sessions
1392 */
1393 cr_ses->flags &= ~SESSION4_PERSIST;
1394 cr_ses->flags &= ~SESSION4_RDMA;
1395
1396 conf = unconf;
1397 } else {
1398 status = nfserr_stale_clientid;
1399 goto out;
1400 }
1401
1402 status = alloc_init_session(rqstp, conf, cr_ses);
1403 if (status)
1404 goto out;
1405
1406 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
1407 NFS4_MAX_SESSIONID_LEN);
1408 cr_ses->seqid = slot->sl_seqid;
1409
1410 slot->sl_inuse = true;
1411 cstate->slot = slot;
1412 /* Ensure a page is used for the cache */
1413 slot->sl_cache_entry.ce_cachethis = 1;
1414out:
1415 nfs4_unlock_state();
1416 dprintk("%s returns %d\n", __func__, ntohl(status));
1417 return status;
1418}
1419
1420__be32
1421nfsd4_destroy_session(struct svc_rqst *r,
1422 struct nfsd4_compound_state *cstate,
1423 struct nfsd4_destroy_session *sessionid)
1424{
1425 struct nfsd4_session *ses;
1426 u32 status = nfserr_badsession;
1427
1428 /* Notes:
1429 * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid
1430 * - Should we return nfserr_back_chan_busy if waiting for
1431 * callbacks on to-be-destroyed session?
1432 * - Do we need to clear any callback info from previous session?
1433 */
1434
1435 dump_sessionid(__func__, &sessionid->sessionid);
1436 spin_lock(&sessionid_lock);
1437 ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
1438 if (!ses) {
1439 spin_unlock(&sessionid_lock);
1440 goto out;
1441 }
1442
1443 unhash_session(ses);
1444 spin_unlock(&sessionid_lock);
1445
1446 /* wait for callbacks */
1447 shutdown_callback_client(ses->se_client);
1448 nfsd4_put_session(ses);
1449 status = nfs_ok;
1450out:
1451 dprintk("%s returns %d\n", __func__, ntohl(status));
1452 return status;
1453}
1454
1455__be32
1456nfsd4_sequence(struct svc_rqst *rqstp,
1457 struct nfsd4_compound_state *cstate,
1458 struct nfsd4_sequence *seq)
1459{
1460 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1461 struct nfsd4_session *session;
1462 struct nfsd4_slot *slot;
1463 int status;
1464
1465 if (resp->opcnt != 1)
1466 return nfserr_sequence_pos;
1467
1468 spin_lock(&sessionid_lock);
1469 status = nfserr_badsession;
1470 session = find_in_sessionid_hashtbl(&seq->sessionid);
1471 if (!session)
1472 goto out;
1473
1474 status = nfserr_badslot;
1475 if (seq->slotid >= session->se_fnumslots)
1476 goto out;
1477
1478 slot = &session->se_slots[seq->slotid];
1479 dprintk("%s: slotid %d\n", __func__, seq->slotid);
1480
1481 status = check_slot_seqid(seq->seqid, slot);
1482 if (status == nfserr_replay_cache) {
1483 cstate->slot = slot;
1484 cstate->session = session;
1485 /* Return the cached reply status and set cstate->status
1486 * for nfsd4_svc_encode_compoundres processing */
1487 status = nfsd4_replay_cache_entry(resp, seq);
1488 cstate->status = nfserr_replay_cache;
1489 goto replay_cache;
1490 }
1491 if (status)
1492 goto out;
1493
1494 /* Success! bump slot seqid */
1495 slot->sl_inuse = true;
1496 slot->sl_seqid = seq->seqid;
1497 slot->sl_cache_entry.ce_cachethis = seq->cachethis;
1498 /* Always set the cache entry cachethis for solo sequence */
1499 if (nfsd4_is_solo_sequence(resp))
1500 slot->sl_cache_entry.ce_cachethis = 1;
1501
1502 cstate->slot = slot;
1503 cstate->session = session;
1504
1505replay_cache:
1506 /* Renew the clientid on success and on replay.
1507 * Hold a session reference until done processing the compound:
1508 * nfsd4_put_session called only if the cstate slot is set.
1509 */
1510 renew_client(session->se_client);
1511 nfsd4_get_session(session);
1512out:
1513 spin_unlock(&sessionid_lock);
1514 dprintk("%s: return %d\n", __func__, ntohl(status));
1515 return status;
1516}
1517
688__be32 1518__be32
689nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1519nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
690 struct nfsd4_setclientid *setclid) 1520 struct nfsd4_setclientid *setclid)
@@ -716,14 +1546,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
716 strhashval = clientstr_hashval(dname); 1546 strhashval = clientstr_hashval(dname);
717 1547
718 nfs4_lock_state(); 1548 nfs4_lock_state();
719 conf = find_confirmed_client_by_str(dname, strhashval); 1549 conf = find_confirmed_client_by_str(dname, strhashval, false);
720 if (conf) { 1550 if (conf) {
721 /* RFC 3530 14.2.33 CASE 0: */ 1551 /* RFC 3530 14.2.33 CASE 0: */
722 status = nfserr_clid_inuse; 1552 status = nfserr_clid_inuse;
723 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) 1553 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
724 || conf->cl_addr != sin->sin_addr.s_addr) { 1554 dprintk("NFSD: setclientid: string in use by client"
725 dprintk("NFSD: setclientid: string in use by clientat %pI4\n", 1555 " at %pI4\n", &conf->cl_addr);
726 &conf->cl_addr);
727 goto out; 1556 goto out;
728 } 1557 }
729 } 1558 }
@@ -732,7 +1561,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
732 * has a description of SETCLIENTID request processing consisting 1561 * has a description of SETCLIENTID request processing consisting
733 * of 5 bullet points, labeled as CASE0 - CASE4 below. 1562 * of 5 bullet points, labeled as CASE0 - CASE4 below.
734 */ 1563 */
735 unconf = find_unconfirmed_client_by_str(dname, strhashval); 1564 unconf = find_unconfirmed_client_by_str(dname, strhashval, false);
736 status = nfserr_resource; 1565 status = nfserr_resource;
737 if (!conf) { 1566 if (!conf) {
738 /* 1567 /*
@@ -887,7 +1716,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
887 unsigned int hash = 1716 unsigned int hash =
888 clientstr_hashval(unconf->cl_recdir); 1717 clientstr_hashval(unconf->cl_recdir);
889 conf = find_confirmed_client_by_str(unconf->cl_recdir, 1718 conf = find_confirmed_client_by_str(unconf->cl_recdir,
890 hash); 1719 hash, false);
891 if (conf) { 1720 if (conf) {
892 nfsd4_remove_clid_dir(conf); 1721 nfsd4_remove_clid_dir(conf);
893 expire_client(conf); 1722 expire_client(conf);
@@ -923,11 +1752,13 @@ alloc_init_file(struct inode *ino)
923 1752
924 fp = kmem_cache_alloc(file_slab, GFP_KERNEL); 1753 fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
925 if (fp) { 1754 if (fp) {
926 kref_init(&fp->fi_ref); 1755 atomic_set(&fp->fi_ref, 1);
927 INIT_LIST_HEAD(&fp->fi_hash); 1756 INIT_LIST_HEAD(&fp->fi_hash);
928 INIT_LIST_HEAD(&fp->fi_stateids); 1757 INIT_LIST_HEAD(&fp->fi_stateids);
929 INIT_LIST_HEAD(&fp->fi_delegations); 1758 INIT_LIST_HEAD(&fp->fi_delegations);
1759 spin_lock(&recall_lock);
930 list_add(&fp->fi_hash, &file_hashtbl[hashval]); 1760 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
1761 spin_unlock(&recall_lock);
931 fp->fi_inode = igrab(ino); 1762 fp->fi_inode = igrab(ino);
932 fp->fi_id = current_fileid++; 1763 fp->fi_id = current_fileid++;
933 fp->fi_had_conflict = false; 1764 fp->fi_had_conflict = false;
@@ -1037,48 +1868,6 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
1037 return sop; 1868 return sop;
1038} 1869}
1039 1870
1040static void
1041release_stateid_lockowners(struct nfs4_stateid *open_stp)
1042{
1043 struct nfs4_stateowner *lock_sop;
1044
1045 while (!list_empty(&open_stp->st_lockowners)) {
1046 lock_sop = list_entry(open_stp->st_lockowners.next,
1047 struct nfs4_stateowner, so_perstateid);
1048 /* list_del(&open_stp->st_lockowners); */
1049 BUG_ON(lock_sop->so_is_open_owner);
1050 release_stateowner(lock_sop);
1051 }
1052}
1053
1054static void
1055unhash_stateowner(struct nfs4_stateowner *sop)
1056{
1057 struct nfs4_stateid *stp;
1058
1059 list_del(&sop->so_idhash);
1060 list_del(&sop->so_strhash);
1061 if (sop->so_is_open_owner)
1062 list_del(&sop->so_perclient);
1063 list_del(&sop->so_perstateid);
1064 while (!list_empty(&sop->so_stateids)) {
1065 stp = list_entry(sop->so_stateids.next,
1066 struct nfs4_stateid, st_perstateowner);
1067 if (sop->so_is_open_owner)
1068 release_stateid(stp, OPEN_STATE);
1069 else
1070 release_stateid(stp, LOCK_STATE);
1071 }
1072}
1073
1074static void
1075release_stateowner(struct nfs4_stateowner *sop)
1076{
1077 unhash_stateowner(sop);
1078 list_del(&sop->so_close_lru);
1079 nfs4_put_stateowner(sop);
1080}
1081
1082static inline void 1871static inline void
1083init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { 1872init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
1084 struct nfs4_stateowner *sop = open->op_stateowner; 1873 struct nfs4_stateowner *sop = open->op_stateowner;
@@ -1100,30 +1889,13 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
1100 stp->st_stateid.si_generation = 0; 1889 stp->st_stateid.si_generation = 0;
1101 stp->st_access_bmap = 0; 1890 stp->st_access_bmap = 0;
1102 stp->st_deny_bmap = 0; 1891 stp->st_deny_bmap = 0;
1103 __set_bit(open->op_share_access, &stp->st_access_bmap); 1892 __set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK,
1893 &stp->st_access_bmap);
1104 __set_bit(open->op_share_deny, &stp->st_deny_bmap); 1894 __set_bit(open->op_share_deny, &stp->st_deny_bmap);
1105 stp->st_openstp = NULL; 1895 stp->st_openstp = NULL;
1106} 1896}
1107 1897
1108static void 1898static void
1109release_stateid(struct nfs4_stateid *stp, int flags)
1110{
1111 struct file *filp = stp->st_vfs_file;
1112
1113 list_del(&stp->st_hash);
1114 list_del(&stp->st_perfile);
1115 list_del(&stp->st_perstateowner);
1116 if (flags & OPEN_STATE) {
1117 release_stateid_lockowners(stp);
1118 stp->st_vfs_file = NULL;
1119 nfsd_close(filp);
1120 } else if (flags & LOCK_STATE)
1121 locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
1122 put_nfs4_file(stp->st_file);
1123 kmem_cache_free(stateid_slab, stp);
1124}
1125
1126static void
1127move_to_close_lru(struct nfs4_stateowner *sop) 1899move_to_close_lru(struct nfs4_stateowner *sop)
1128{ 1900{
1129 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); 1901 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
@@ -1160,20 +1932,33 @@ find_file(struct inode *ino)
1160 unsigned int hashval = file_hashval(ino); 1932 unsigned int hashval = file_hashval(ino);
1161 struct nfs4_file *fp; 1933 struct nfs4_file *fp;
1162 1934
1935 spin_lock(&recall_lock);
1163 list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { 1936 list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
1164 if (fp->fi_inode == ino) { 1937 if (fp->fi_inode == ino) {
1165 get_nfs4_file(fp); 1938 get_nfs4_file(fp);
1939 spin_unlock(&recall_lock);
1166 return fp; 1940 return fp;
1167 } 1941 }
1168 } 1942 }
1943 spin_unlock(&recall_lock);
1169 return NULL; 1944 return NULL;
1170} 1945}
1171 1946
1172static inline int access_valid(u32 x) 1947static inline int access_valid(u32 x, u32 minorversion)
1173{ 1948{
1174 if (x < NFS4_SHARE_ACCESS_READ) 1949 if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
1175 return 0; 1950 return 0;
1176 if (x > NFS4_SHARE_ACCESS_BOTH) 1951 if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH)
1952 return 0;
1953 x &= ~NFS4_SHARE_ACCESS_MASK;
1954 if (minorversion && x) {
1955 if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL)
1956 return 0;
1957 if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED)
1958 return 0;
1959 x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK);
1960 }
1961 if (x)
1177 return 0; 1962 return 0;
1178 return 1; 1963 return 1;
1179} 1964}
@@ -1409,7 +2194,8 @@ static struct lock_manager_operations nfsd_lease_mng_ops = {
1409 2194
1410 2195
1411__be32 2196__be32
1412nfsd4_process_open1(struct nfsd4_open *open) 2197nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2198 struct nfsd4_open *open)
1413{ 2199{
1414 clientid_t *clientid = &open->op_clientid; 2200 clientid_t *clientid = &open->op_clientid;
1415 struct nfs4_client *clp = NULL; 2201 struct nfs4_client *clp = NULL;
@@ -1432,10 +2218,13 @@ nfsd4_process_open1(struct nfsd4_open *open)
1432 return nfserr_expired; 2218 return nfserr_expired;
1433 goto renew; 2219 goto renew;
1434 } 2220 }
2221 /* When sessions are used, skip open sequenceid processing */
2222 if (nfsd4_has_session(cstate))
2223 goto renew;
1435 if (!sop->so_confirmed) { 2224 if (!sop->so_confirmed) {
1436 /* Replace unconfirmed owners without checking for replay. */ 2225 /* Replace unconfirmed owners without checking for replay. */
1437 clp = sop->so_client; 2226 clp = sop->so_client;
1438 release_stateowner(sop); 2227 release_openowner(sop);
1439 open->op_stateowner = NULL; 2228 open->op_stateowner = NULL;
1440 goto renew; 2229 goto renew;
1441 } 2230 }
@@ -1709,6 +2498,7 @@ out:
1709__be32 2498__be32
1710nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) 2499nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
1711{ 2500{
2501 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1712 struct nfs4_file *fp = NULL; 2502 struct nfs4_file *fp = NULL;
1713 struct inode *ino = current_fh->fh_dentry->d_inode; 2503 struct inode *ino = current_fh->fh_dentry->d_inode;
1714 struct nfs4_stateid *stp = NULL; 2504 struct nfs4_stateid *stp = NULL;
@@ -1716,7 +2506,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1716 __be32 status; 2506 __be32 status;
1717 2507
1718 status = nfserr_inval; 2508 status = nfserr_inval;
1719 if (!access_valid(open->op_share_access) 2509 if (!access_valid(open->op_share_access, resp->cstate.minorversion)
1720 || !deny_valid(open->op_share_deny)) 2510 || !deny_valid(open->op_share_deny))
1721 goto out; 2511 goto out;
1722 /* 2512 /*
@@ -1764,12 +2554,17 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1764 init_stateid(stp, fp, open); 2554 init_stateid(stp, fp, open);
1765 status = nfsd4_truncate(rqstp, current_fh, open); 2555 status = nfsd4_truncate(rqstp, current_fh, open);
1766 if (status) { 2556 if (status) {
1767 release_stateid(stp, OPEN_STATE); 2557 release_open_stateid(stp);
1768 goto out; 2558 goto out;
1769 } 2559 }
2560 if (nfsd4_has_session(&resp->cstate))
2561 update_stateid(&stp->st_stateid);
1770 } 2562 }
1771 memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); 2563 memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
1772 2564
2565 if (nfsd4_has_session(&resp->cstate))
2566 open->op_stateowner->so_confirmed = 1;
2567
1773 /* 2568 /*
1774 * Attempt to hand out a delegation. No error return, because the 2569 * Attempt to hand out a delegation. No error return, because the
1775 * OPEN succeeds even if we fail. 2570 * OPEN succeeds even if we fail.
@@ -1790,7 +2585,8 @@ out:
1790 * To finish the open response, we just need to set the rflags. 2585 * To finish the open response, we just need to set the rflags.
1791 */ 2586 */
1792 open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; 2587 open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
1793 if (!open->op_stateowner->so_confirmed) 2588 if (!open->op_stateowner->so_confirmed &&
2589 !nfsd4_has_session(&resp->cstate))
1794 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; 2590 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
1795 2591
1796 return status; 2592 return status;
@@ -1898,7 +2694,7 @@ nfs4_laundromat(void)
1898 } 2694 }
1899 dprintk("NFSD: purging unused open stateowner (so_id %d)\n", 2695 dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
1900 sop->so_id); 2696 sop->so_id);
1901 release_stateowner(sop); 2697 release_openowner(sop);
1902 } 2698 }
1903 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) 2699 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
1904 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; 2700 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
@@ -1983,10 +2779,7 @@ out:
1983static inline __be32 2779static inline __be32
1984check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) 2780check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
1985{ 2781{
1986 /* Trying to call delegreturn with a special stateid? Yuch: */ 2782 if (ONE_STATEID(stateid) && (flags & RD_STATE))
1987 if (!(flags & (RD_STATE | WR_STATE)))
1988 return nfserr_bad_stateid;
1989 else if (ONE_STATEID(stateid) && (flags & RD_STATE))
1990 return nfs_ok; 2783 return nfs_ok;
1991 else if (locks_in_grace()) { 2784 else if (locks_in_grace()) {
1992 /* Answer in remaining cases depends on existance of 2785 /* Answer in remaining cases depends on existance of
@@ -2005,14 +2798,20 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
2005 * that are not able to provide mandatory locking. 2798 * that are not able to provide mandatory locking.
2006 */ 2799 */
2007static inline int 2800static inline int
2008io_during_grace_disallowed(struct inode *inode, int flags) 2801grace_disallows_io(struct inode *inode)
2009{ 2802{
2010 return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) 2803 return locks_in_grace() && mandatory_lock(inode);
2011 && mandatory_lock(inode);
2012} 2804}
2013 2805
2014static int check_stateid_generation(stateid_t *in, stateid_t *ref) 2806static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags)
2015{ 2807{
2808 /*
2809 * When sessions are used the stateid generation number is ignored
2810 * when it is zero.
2811 */
2812 if ((flags & HAS_SESSION) && in->si_generation == 0)
2813 goto out;
2814
2016 /* If the client sends us a stateid from the future, it's buggy: */ 2815 /* If the client sends us a stateid from the future, it's buggy: */
2017 if (in->si_generation > ref->si_generation) 2816 if (in->si_generation > ref->si_generation)
2018 return nfserr_bad_stateid; 2817 return nfserr_bad_stateid;
@@ -2028,74 +2827,77 @@ static int check_stateid_generation(stateid_t *in, stateid_t *ref)
2028 */ 2827 */
2029 if (in->si_generation < ref->si_generation) 2828 if (in->si_generation < ref->si_generation)
2030 return nfserr_old_stateid; 2829 return nfserr_old_stateid;
2830out:
2031 return nfs_ok; 2831 return nfs_ok;
2032} 2832}
2033 2833
2834static int is_delegation_stateid(stateid_t *stateid)
2835{
2836 return stateid->si_fileid == 0;
2837}
2838
2034/* 2839/*
2035* Checks for stateid operations 2840* Checks for stateid operations
2036*/ 2841*/
2037__be32 2842__be32
2038nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp) 2843nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2844 stateid_t *stateid, int flags, struct file **filpp)
2039{ 2845{
2040 struct nfs4_stateid *stp = NULL; 2846 struct nfs4_stateid *stp = NULL;
2041 struct nfs4_delegation *dp = NULL; 2847 struct nfs4_delegation *dp = NULL;
2042 stateid_t *stidp; 2848 struct svc_fh *current_fh = &cstate->current_fh;
2043 struct inode *ino = current_fh->fh_dentry->d_inode; 2849 struct inode *ino = current_fh->fh_dentry->d_inode;
2044 __be32 status; 2850 __be32 status;
2045 2851
2046 dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n",
2047 stateid->si_boot, stateid->si_stateownerid,
2048 stateid->si_fileid, stateid->si_generation);
2049 if (filpp) 2852 if (filpp)
2050 *filpp = NULL; 2853 *filpp = NULL;
2051 2854
2052 if (io_during_grace_disallowed(ino, flags)) 2855 if (grace_disallows_io(ino))
2053 return nfserr_grace; 2856 return nfserr_grace;
2054 2857
2858 if (nfsd4_has_session(cstate))
2859 flags |= HAS_SESSION;
2860
2055 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 2861 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
2056 return check_special_stateids(current_fh, stateid, flags); 2862 return check_special_stateids(current_fh, stateid, flags);
2057 2863
2058 /* STALE STATEID */
2059 status = nfserr_stale_stateid; 2864 status = nfserr_stale_stateid;
2060 if (STALE_STATEID(stateid)) 2865 if (STALE_STATEID(stateid))
2061 goto out; 2866 goto out;
2062 2867
2063 /* BAD STATEID */
2064 status = nfserr_bad_stateid; 2868 status = nfserr_bad_stateid;
2065 if (!stateid->si_fileid) { /* delegation stateid */ 2869 if (is_delegation_stateid(stateid)) {
2066 if(!(dp = find_delegation_stateid(ino, stateid))) { 2870 dp = find_delegation_stateid(ino, stateid);
2067 dprintk("NFSD: delegation stateid not found\n"); 2871 if (!dp)
2068 goto out; 2872 goto out;
2069 } 2873 status = check_stateid_generation(stateid, &dp->dl_stateid,
2070 stidp = &dp->dl_stateid; 2874 flags);
2875 if (status)
2876 goto out;
2877 status = nfs4_check_delegmode(dp, flags);
2878 if (status)
2879 goto out;
2880 renew_client(dp->dl_client);
2881 if (filpp)
2882 *filpp = dp->dl_vfs_file;
2071 } else { /* open or lock stateid */ 2883 } else { /* open or lock stateid */
2072 if (!(stp = find_stateid(stateid, flags))) { 2884 stp = find_stateid(stateid, flags);
2073 dprintk("NFSD: open or lock stateid not found\n"); 2885 if (!stp)
2074 goto out; 2886 goto out;
2075 } 2887 if (nfs4_check_fh(current_fh, stp))
2076 if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp))
2077 goto out; 2888 goto out;
2078 if (!stp->st_stateowner->so_confirmed) 2889 if (!stp->st_stateowner->so_confirmed)
2079 goto out; 2890 goto out;
2080 stidp = &stp->st_stateid; 2891 status = check_stateid_generation(stateid, &stp->st_stateid,
2081 } 2892 flags);
2082 status = check_stateid_generation(stateid, stidp); 2893 if (status)
2083 if (status) 2894 goto out;
2084 goto out; 2895 status = nfs4_check_openmode(stp, flags);
2085 if (stp) { 2896 if (status)
2086 if ((status = nfs4_check_openmode(stp,flags)))
2087 goto out; 2897 goto out;
2088 renew_client(stp->st_stateowner->so_client); 2898 renew_client(stp->st_stateowner->so_client);
2089 if (filpp) 2899 if (filpp)
2090 *filpp = stp->st_vfs_file; 2900 *filpp = stp->st_vfs_file;
2091 } else {
2092 if ((status = nfs4_check_delegmode(dp, flags)))
2093 goto out;
2094 renew_client(dp->dl_client);
2095 if (flags & DELEG_RET)
2096 unhash_delegation(dp);
2097 if (filpp)
2098 *filpp = dp->dl_vfs_file;
2099 } 2901 }
2100 status = nfs_ok; 2902 status = nfs_ok;
2101out: 2903out:
@@ -2113,10 +2915,14 @@ setlkflg (int type)
2113 * Checks for sequence id mutating operations. 2915 * Checks for sequence id mutating operations.
2114 */ 2916 */
2115static __be32 2917static __be32
2116nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock) 2918nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2919 stateid_t *stateid, int flags,
2920 struct nfs4_stateowner **sopp,
2921 struct nfs4_stateid **stpp, struct nfsd4_lock *lock)
2117{ 2922{
2118 struct nfs4_stateid *stp; 2923 struct nfs4_stateid *stp;
2119 struct nfs4_stateowner *sop; 2924 struct nfs4_stateowner *sop;
2925 struct svc_fh *current_fh = &cstate->current_fh;
2120 __be32 status; 2926 __be32 status;
2121 2927
2122 dprintk("NFSD: preprocess_seqid_op: seqid=%d " 2928 dprintk("NFSD: preprocess_seqid_op: seqid=%d "
@@ -2134,6 +2940,10 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2134 2940
2135 if (STALE_STATEID(stateid)) 2941 if (STALE_STATEID(stateid))
2136 return nfserr_stale_stateid; 2942 return nfserr_stale_stateid;
2943
2944 if (nfsd4_has_session(cstate))
2945 flags |= HAS_SESSION;
2946
2137 /* 2947 /*
2138 * We return BAD_STATEID if filehandle doesn't match stateid, 2948 * We return BAD_STATEID if filehandle doesn't match stateid,
2139 * the confirmed flag is incorrecly set, or the generation 2949 * the confirmed flag is incorrecly set, or the generation
@@ -2166,8 +2976,9 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2166 if (lock->lk_is_new) { 2976 if (lock->lk_is_new) {
2167 if (!sop->so_is_open_owner) 2977 if (!sop->so_is_open_owner)
2168 return nfserr_bad_stateid; 2978 return nfserr_bad_stateid;
2169 if (!same_clid(&clp->cl_clientid, lockclid)) 2979 if (!(flags & HAS_SESSION) &&
2170 return nfserr_bad_stateid; 2980 !same_clid(&clp->cl_clientid, lockclid))
2981 return nfserr_bad_stateid;
2171 /* stp is the open stateid */ 2982 /* stp is the open stateid */
2172 status = nfs4_check_openmode(stp, lkflg); 2983 status = nfs4_check_openmode(stp, lkflg);
2173 if (status) 2984 if (status)
@@ -2190,7 +3001,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2190 * For the moment, we ignore the possibility of 3001 * For the moment, we ignore the possibility of
2191 * generation number wraparound. 3002 * generation number wraparound.
2192 */ 3003 */
2193 if (seqid != sop->so_seqid) 3004 if (!(flags & HAS_SESSION) && seqid != sop->so_seqid)
2194 goto check_replay; 3005 goto check_replay;
2195 3006
2196 if (sop->so_confirmed && flags & CONFIRM) { 3007 if (sop->so_confirmed && flags & CONFIRM) {
@@ -2203,7 +3014,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2203 " confirmed yet!\n"); 3014 " confirmed yet!\n");
2204 return nfserr_bad_stateid; 3015 return nfserr_bad_stateid;
2205 } 3016 }
2206 status = check_stateid_generation(stateid, &stp->st_stateid); 3017 status = check_stateid_generation(stateid, &stp->st_stateid, flags);
2207 if (status) 3018 if (status)
2208 return status; 3019 return status;
2209 renew_client(sop->so_client); 3020 renew_client(sop->so_client);
@@ -2239,7 +3050,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2239 3050
2240 nfs4_lock_state(); 3051 nfs4_lock_state();
2241 3052
2242 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, 3053 if ((status = nfs4_preprocess_seqid_op(cstate,
2243 oc->oc_seqid, &oc->oc_req_stateid, 3054 oc->oc_seqid, &oc->oc_req_stateid,
2244 CONFIRM | OPEN_STATE, 3055 CONFIRM | OPEN_STATE,
2245 &oc->oc_stateowner, &stp, NULL))) 3056 &oc->oc_stateowner, &stp, NULL)))
@@ -2304,12 +3115,12 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
2304 (int)cstate->current_fh.fh_dentry->d_name.len, 3115 (int)cstate->current_fh.fh_dentry->d_name.len,
2305 cstate->current_fh.fh_dentry->d_name.name); 3116 cstate->current_fh.fh_dentry->d_name.name);
2306 3117
2307 if (!access_valid(od->od_share_access) 3118 if (!access_valid(od->od_share_access, cstate->minorversion)
2308 || !deny_valid(od->od_share_deny)) 3119 || !deny_valid(od->od_share_deny))
2309 return nfserr_inval; 3120 return nfserr_inval;
2310 3121
2311 nfs4_lock_state(); 3122 nfs4_lock_state();
2312 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, 3123 if ((status = nfs4_preprocess_seqid_op(cstate,
2313 od->od_seqid, 3124 od->od_seqid,
2314 &od->od_stateid, 3125 &od->od_stateid,
2315 OPEN_STATE, 3126 OPEN_STATE,
@@ -2362,7 +3173,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2362 3173
2363 nfs4_lock_state(); 3174 nfs4_lock_state();
2364 /* check close_lru for replay */ 3175 /* check close_lru for replay */
2365 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, 3176 if ((status = nfs4_preprocess_seqid_op(cstate,
2366 close->cl_seqid, 3177 close->cl_seqid,
2367 &close->cl_stateid, 3178 &close->cl_stateid,
2368 OPEN_STATE | CLOSE_STATE, 3179 OPEN_STATE | CLOSE_STATE,
@@ -2373,7 +3184,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2373 memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); 3184 memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t));
2374 3185
2375 /* release_stateid() calls nfsd_close() if needed */ 3186 /* release_stateid() calls nfsd_close() if needed */
2376 release_stateid(stp, OPEN_STATE); 3187 release_open_stateid(stp);
2377 3188
2378 /* place unused nfs4_stateowners on so_close_lru list to be 3189 /* place unused nfs4_stateowners on so_close_lru list to be
2379 * released by the laundromat service after the lease period 3190 * released by the laundromat service after the lease period
@@ -2394,16 +3205,40 @@ __be32
2394nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 3205nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2395 struct nfsd4_delegreturn *dr) 3206 struct nfsd4_delegreturn *dr)
2396{ 3207{
3208 struct nfs4_delegation *dp;
3209 stateid_t *stateid = &dr->dr_stateid;
3210 struct inode *inode;
2397 __be32 status; 3211 __be32 status;
3212 int flags = 0;
2398 3213
2399 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) 3214 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
2400 goto out; 3215 return status;
3216 inode = cstate->current_fh.fh_dentry->d_inode;
2401 3217
3218 if (nfsd4_has_session(cstate))
3219 flags |= HAS_SESSION;
2402 nfs4_lock_state(); 3220 nfs4_lock_state();
2403 status = nfs4_preprocess_stateid_op(&cstate->current_fh, 3221 status = nfserr_bad_stateid;
2404 &dr->dr_stateid, DELEG_RET, NULL); 3222 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
2405 nfs4_unlock_state(); 3223 goto out;
3224 status = nfserr_stale_stateid;
3225 if (STALE_STATEID(stateid))
3226 goto out;
3227 status = nfserr_bad_stateid;
3228 if (!is_delegation_stateid(stateid))
3229 goto out;
3230 dp = find_delegation_stateid(inode, stateid);
3231 if (!dp)
3232 goto out;
3233 status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
3234 if (status)
3235 goto out;
3236 renew_client(dp->dl_client);
3237
3238 unhash_delegation(dp);
2406out: 3239out:
3240 nfs4_unlock_state();
3241
2407 return status; 3242 return status;
2408} 3243}
2409 3244
@@ -2684,11 +3519,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2684 struct nfs4_file *fp; 3519 struct nfs4_file *fp;
2685 3520
2686 status = nfserr_stale_clientid; 3521 status = nfserr_stale_clientid;
2687 if (STALE_CLIENTID(&lock->lk_new_clientid)) 3522 if (!nfsd4_has_session(cstate) &&
3523 STALE_CLIENTID(&lock->lk_new_clientid))
2688 goto out; 3524 goto out;
2689 3525
2690 /* validate and update open stateid and open seqid */ 3526 /* validate and update open stateid and open seqid */
2691 status = nfs4_preprocess_seqid_op(&cstate->current_fh, 3527 status = nfs4_preprocess_seqid_op(cstate,
2692 lock->lk_new_open_seqid, 3528 lock->lk_new_open_seqid,
2693 &lock->lk_new_open_stateid, 3529 &lock->lk_new_open_stateid,
2694 OPEN_STATE, 3530 OPEN_STATE,
@@ -2715,7 +3551,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2715 goto out; 3551 goto out;
2716 } else { 3552 } else {
2717 /* lock (lock owner + lock stateid) already exists */ 3553 /* lock (lock owner + lock stateid) already exists */
2718 status = nfs4_preprocess_seqid_op(&cstate->current_fh, 3554 status = nfs4_preprocess_seqid_op(cstate,
2719 lock->lk_old_lock_seqid, 3555 lock->lk_old_lock_seqid,
2720 &lock->lk_old_lock_stateid, 3556 &lock->lk_old_lock_stateid,
2721 LOCK_STATE, 3557 LOCK_STATE,
@@ -2788,7 +3624,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2788 } 3624 }
2789out: 3625out:
2790 if (status && lock->lk_is_new && lock_sop) 3626 if (status && lock->lk_is_new && lock_sop)
2791 release_stateowner(lock_sop); 3627 release_lockowner(lock_sop);
2792 if (lock->lk_replay_owner) { 3628 if (lock->lk_replay_owner) {
2793 nfs4_get_stateowner(lock->lk_replay_owner); 3629 nfs4_get_stateowner(lock->lk_replay_owner);
2794 cstate->replay_owner = lock->lk_replay_owner; 3630 cstate->replay_owner = lock->lk_replay_owner;
@@ -2838,7 +3674,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2838 nfs4_lock_state(); 3674 nfs4_lock_state();
2839 3675
2840 status = nfserr_stale_clientid; 3676 status = nfserr_stale_clientid;
2841 if (STALE_CLIENTID(&lockt->lt_clientid)) 3677 if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid))
2842 goto out; 3678 goto out;
2843 3679
2844 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { 3680 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) {
@@ -2911,7 +3747,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2911 3747
2912 nfs4_lock_state(); 3748 nfs4_lock_state();
2913 3749
2914 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, 3750 if ((status = nfs4_preprocess_seqid_op(cstate,
2915 locku->lu_seqid, 3751 locku->lu_seqid,
2916 &locku->lu_stateid, 3752 &locku->lu_stateid,
2917 LOCK_STATE, 3753 LOCK_STATE,
@@ -3037,7 +3873,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
3037 /* unhash_stateowner deletes so_perclient only 3873 /* unhash_stateowner deletes so_perclient only
3038 * for openowners. */ 3874 * for openowners. */
3039 list_del(&sop->so_perclient); 3875 list_del(&sop->so_perclient);
3040 release_stateowner(sop); 3876 release_lockowner(sop);
3041 } 3877 }
3042out: 3878out:
3043 nfs4_unlock_state(); 3879 nfs4_unlock_state();
@@ -3051,12 +3887,12 @@ alloc_reclaim(void)
3051} 3887}
3052 3888
3053int 3889int
3054nfs4_has_reclaimed_state(const char *name) 3890nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
3055{ 3891{
3056 unsigned int strhashval = clientstr_hashval(name); 3892 unsigned int strhashval = clientstr_hashval(name);
3057 struct nfs4_client *clp; 3893 struct nfs4_client *clp;
3058 3894
3059 clp = find_confirmed_client_by_str(name, strhashval); 3895 clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id);
3060 return clp ? 1 : 0; 3896 return clp ? 1 : 0;
3061} 3897}
3062 3898
@@ -3153,6 +3989,8 @@ nfs4_state_init(void)
3153 INIT_LIST_HEAD(&unconf_str_hashtbl[i]); 3989 INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
3154 INIT_LIST_HEAD(&unconf_id_hashtbl[i]); 3990 INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
3155 } 3991 }
3992 for (i = 0; i < SESSION_HASH_SIZE; i++)
3993 INIT_LIST_HEAD(&sessionid_hashtbl[i]);
3156 for (i = 0; i < FILE_HASH_SIZE; i++) { 3994 for (i = 0; i < FILE_HASH_SIZE; i++) {
3157 INIT_LIST_HEAD(&file_hashtbl[i]); 3995 INIT_LIST_HEAD(&file_hashtbl[i]);
3158 } 3996 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 9250067943d8..b820c311931c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -45,6 +45,7 @@
45#include <linux/fs.h> 45#include <linux/fs.h>
46#include <linux/namei.h> 46#include <linux/namei.h>
47#include <linux/vfs.h> 47#include <linux/vfs.h>
48#include <linux/utsname.h>
48#include <linux/sunrpc/xdr.h> 49#include <linux/sunrpc/xdr.h>
49#include <linux/sunrpc/svc.h> 50#include <linux/sunrpc/svc.h>
50#include <linux/sunrpc/clnt.h> 51#include <linux/sunrpc/clnt.h>
@@ -188,6 +189,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
188 return p; 189 return p;
189} 190}
190 191
192static int zero_clientid(clientid_t *clid)
193{
194 return (clid->cl_boot == 0) && (clid->cl_id == 0);
195}
196
191static int 197static int
192defer_free(struct nfsd4_compoundargs *argp, 198defer_free(struct nfsd4_compoundargs *argp,
193 void (*release)(const void *), void *p) 199 void (*release)(const void *), void *p)
@@ -230,6 +236,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
230 236
231 bmval[0] = 0; 237 bmval[0] = 0;
232 bmval[1] = 0; 238 bmval[1] = 0;
239 bmval[2] = 0;
233 240
234 READ_BUF(4); 241 READ_BUF(4);
235 READ32(bmlen); 242 READ32(bmlen);
@@ -241,13 +248,27 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
241 READ32(bmval[0]); 248 READ32(bmval[0]);
242 if (bmlen > 1) 249 if (bmlen > 1)
243 READ32(bmval[1]); 250 READ32(bmval[1]);
251 if (bmlen > 2)
252 READ32(bmval[2]);
244 253
245 DECODE_TAIL; 254 DECODE_TAIL;
246} 255}
247 256
257static u32 nfsd_attrmask[] = {
258 NFSD_WRITEABLE_ATTRS_WORD0,
259 NFSD_WRITEABLE_ATTRS_WORD1,
260 NFSD_WRITEABLE_ATTRS_WORD2
261};
262
263static u32 nfsd41_ex_attrmask[] = {
264 NFSD_SUPPATTR_EXCLCREAT_WORD0,
265 NFSD_SUPPATTR_EXCLCREAT_WORD1,
266 NFSD_SUPPATTR_EXCLCREAT_WORD2
267};
268
248static __be32 269static __be32
249nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, 270nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable,
250 struct nfs4_acl **acl) 271 struct iattr *iattr, struct nfs4_acl **acl)
251{ 272{
252 int expected_len, len = 0; 273 int expected_len, len = 0;
253 u32 dummy32; 274 u32 dummy32;
@@ -263,9 +284,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
263 * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; 284 * According to spec, unsupported attributes return ERR_ATTRNOTSUPP;
264 * read-only attributes return ERR_INVAL. 285 * read-only attributes return ERR_INVAL.
265 */ 286 */
266 if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) 287 if ((bmval[0] & ~nfsd_suppattrs0(argp->minorversion)) ||
288 (bmval[1] & ~nfsd_suppattrs1(argp->minorversion)) ||
289 (bmval[2] & ~nfsd_suppattrs2(argp->minorversion)))
267 return nfserr_attrnotsupp; 290 return nfserr_attrnotsupp;
268 if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1)) 291 if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) ||
292 (bmval[2] & ~writable[2]))
269 return nfserr_inval; 293 return nfserr_inval;
270 294
271 READ_BUF(4); 295 READ_BUF(4);
@@ -400,6 +424,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
400 goto xdr_error; 424 goto xdr_error;
401 } 425 }
402 } 426 }
427 BUG_ON(bmval[2]); /* no such writeable attr supported yet */
403 if (len != expected_len) 428 if (len != expected_len)
404 goto xdr_error; 429 goto xdr_error;
405 430
@@ -493,7 +518,9 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
493 if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) 518 if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
494 return status; 519 return status;
495 520
496 if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl))) 521 status = nfsd4_decode_fattr(argp, create->cr_bmval, nfsd_attrmask,
522 &create->cr_iattr, &create->cr_acl);
523 if (status)
497 goto out; 524 goto out;
498 525
499 DECODE_TAIL; 526 DECODE_TAIL;
@@ -583,6 +610,8 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
583 READ_BUF(lockt->lt_owner.len); 610 READ_BUF(lockt->lt_owner.len);
584 READMEM(lockt->lt_owner.data, lockt->lt_owner.len); 611 READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
585 612
613 if (argp->minorversion && !zero_clientid(&lockt->lt_clientid))
614 return nfserr_inval;
586 DECODE_TAIL; 615 DECODE_TAIL;
587} 616}
588 617
@@ -652,13 +681,26 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
652 switch (open->op_createmode) { 681 switch (open->op_createmode) {
653 case NFS4_CREATE_UNCHECKED: 682 case NFS4_CREATE_UNCHECKED:
654 case NFS4_CREATE_GUARDED: 683 case NFS4_CREATE_GUARDED:
655 if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl))) 684 status = nfsd4_decode_fattr(argp, open->op_bmval,
685 nfsd_attrmask, &open->op_iattr, &open->op_acl);
686 if (status)
656 goto out; 687 goto out;
657 break; 688 break;
658 case NFS4_CREATE_EXCLUSIVE: 689 case NFS4_CREATE_EXCLUSIVE:
659 READ_BUF(8); 690 READ_BUF(8);
660 COPYMEM(open->op_verf.data, 8); 691 COPYMEM(open->op_verf.data, 8);
661 break; 692 break;
693 case NFS4_CREATE_EXCLUSIVE4_1:
694 if (argp->minorversion < 1)
695 goto xdr_error;
696 READ_BUF(8);
697 COPYMEM(open->op_verf.data, 8);
698 status = nfsd4_decode_fattr(argp, open->op_bmval,
699 nfsd41_ex_attrmask, &open->op_iattr,
700 &open->op_acl);
701 if (status)
702 goto out;
703 break;
662 default: 704 default:
663 goto xdr_error; 705 goto xdr_error;
664 } 706 }
@@ -851,7 +893,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
851 status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); 893 status = nfsd4_decode_stateid(argp, &setattr->sa_stateid);
852 if (status) 894 if (status)
853 return status; 895 return status;
854 return nfsd4_decode_fattr(argp, setattr->sa_bmval, 896 return nfsd4_decode_fattr(argp, setattr->sa_bmval, nfsd_attrmask,
855 &setattr->sa_iattr, &setattr->sa_acl); 897 &setattr->sa_iattr, &setattr->sa_acl);
856} 898}
857 899
@@ -993,6 +1035,241 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
993 READ_BUF(rlockowner->rl_owner.len); 1035 READ_BUF(rlockowner->rl_owner.len);
994 READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); 1036 READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
995 1037
1038 if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid))
1039 return nfserr_inval;
1040 DECODE_TAIL;
1041}
1042
1043static __be32
1044nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
1045 struct nfsd4_exchange_id *exid)
1046{
1047 int dummy;
1048 DECODE_HEAD;
1049
1050 READ_BUF(NFS4_VERIFIER_SIZE);
1051 COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE);
1052
1053 READ_BUF(4);
1054 READ32(exid->clname.len);
1055
1056 READ_BUF(exid->clname.len);
1057 SAVEMEM(exid->clname.data, exid->clname.len);
1058
1059 READ_BUF(4);
1060 READ32(exid->flags);
1061
1062 /* Ignore state_protect4_a */
1063 READ_BUF(4);
1064 READ32(exid->spa_how);
1065 switch (exid->spa_how) {
1066 case SP4_NONE:
1067 break;
1068 case SP4_MACH_CRED:
1069 /* spo_must_enforce */
1070 READ_BUF(4);
1071 READ32(dummy);
1072 READ_BUF(dummy * 4);
1073 p += dummy;
1074
1075 /* spo_must_allow */
1076 READ_BUF(4);
1077 READ32(dummy);
1078 READ_BUF(dummy * 4);
1079 p += dummy;
1080 break;
1081 case SP4_SSV:
1082 /* ssp_ops */
1083 READ_BUF(4);
1084 READ32(dummy);
1085 READ_BUF(dummy * 4);
1086 p += dummy;
1087
1088 READ_BUF(4);
1089 READ32(dummy);
1090 READ_BUF(dummy * 4);
1091 p += dummy;
1092
1093 /* ssp_hash_algs<> */
1094 READ_BUF(4);
1095 READ32(dummy);
1096 READ_BUF(dummy);
1097 p += XDR_QUADLEN(dummy);
1098
1099 /* ssp_encr_algs<> */
1100 READ_BUF(4);
1101 READ32(dummy);
1102 READ_BUF(dummy);
1103 p += XDR_QUADLEN(dummy);
1104
1105 /* ssp_window and ssp_num_gss_handles */
1106 READ_BUF(8);
1107 READ32(dummy);
1108 READ32(dummy);
1109 break;
1110 default:
1111 goto xdr_error;
1112 }
1113
1114 /* Ignore Implementation ID */
1115 READ_BUF(4); /* nfs_impl_id4 array length */
1116 READ32(dummy);
1117
1118 if (dummy > 1)
1119 goto xdr_error;
1120
1121 if (dummy == 1) {
1122 /* nii_domain */
1123 READ_BUF(4);
1124 READ32(dummy);
1125 READ_BUF(dummy);
1126 p += XDR_QUADLEN(dummy);
1127
1128 /* nii_name */
1129 READ_BUF(4);
1130 READ32(dummy);
1131 READ_BUF(dummy);
1132 p += XDR_QUADLEN(dummy);
1133
1134 /* nii_date */
1135 READ_BUF(12);
1136 p += 3;
1137 }
1138 DECODE_TAIL;
1139}
1140
1141static __be32
1142nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1143 struct nfsd4_create_session *sess)
1144{
1145 DECODE_HEAD;
1146
1147 u32 dummy;
1148 char *machine_name;
1149 int i;
1150 int nr_secflavs;
1151
1152 READ_BUF(16);
1153 COPYMEM(&sess->clientid, 8);
1154 READ32(sess->seqid);
1155 READ32(sess->flags);
1156
1157 /* Fore channel attrs */
1158 READ_BUF(28);
1159 READ32(dummy); /* headerpadsz is always 0 */
1160 READ32(sess->fore_channel.maxreq_sz);
1161 READ32(sess->fore_channel.maxresp_sz);
1162 READ32(sess->fore_channel.maxresp_cached);
1163 READ32(sess->fore_channel.maxops);
1164 READ32(sess->fore_channel.maxreqs);
1165 READ32(sess->fore_channel.nr_rdma_attrs);
1166 if (sess->fore_channel.nr_rdma_attrs == 1) {
1167 READ_BUF(4);
1168 READ32(sess->fore_channel.rdma_attrs);
1169 } else if (sess->fore_channel.nr_rdma_attrs > 1) {
1170 dprintk("Too many fore channel attr bitmaps!\n");
1171 goto xdr_error;
1172 }
1173
1174 /* Back channel attrs */
1175 READ_BUF(28);
1176 READ32(dummy); /* headerpadsz is always 0 */
1177 READ32(sess->back_channel.maxreq_sz);
1178 READ32(sess->back_channel.maxresp_sz);
1179 READ32(sess->back_channel.maxresp_cached);
1180 READ32(sess->back_channel.maxops);
1181 READ32(sess->back_channel.maxreqs);
1182 READ32(sess->back_channel.nr_rdma_attrs);
1183 if (sess->back_channel.nr_rdma_attrs == 1) {
1184 READ_BUF(4);
1185 READ32(sess->back_channel.rdma_attrs);
1186 } else if (sess->back_channel.nr_rdma_attrs > 1) {
1187 dprintk("Too many back channel attr bitmaps!\n");
1188 goto xdr_error;
1189 }
1190
1191 READ_BUF(8);
1192 READ32(sess->callback_prog);
1193
1194 /* callback_sec_params4 */
1195 READ32(nr_secflavs);
1196 for (i = 0; i < nr_secflavs; ++i) {
1197 READ_BUF(4);
1198 READ32(dummy);
1199 switch (dummy) {
1200 case RPC_AUTH_NULL:
1201 /* Nothing to read */
1202 break;
1203 case RPC_AUTH_UNIX:
1204 READ_BUF(8);
1205 /* stamp */
1206 READ32(dummy);
1207
1208 /* machine name */
1209 READ32(dummy);
1210 READ_BUF(dummy);
1211 SAVEMEM(machine_name, dummy);
1212
1213 /* uid, gid */
1214 READ_BUF(8);
1215 READ32(sess->uid);
1216 READ32(sess->gid);
1217
1218 /* more gids */
1219 READ_BUF(4);
1220 READ32(dummy);
1221 READ_BUF(dummy * 4);
1222 for (i = 0; i < dummy; ++i)
1223 READ32(dummy);
1224 break;
1225 case RPC_AUTH_GSS:
1226 dprintk("RPC_AUTH_GSS callback secflavor "
1227 "not supported!\n");
1228 READ_BUF(8);
1229 /* gcbp_service */
1230 READ32(dummy);
1231 /* gcbp_handle_from_server */
1232 READ32(dummy);
1233 READ_BUF(dummy);
1234 p += XDR_QUADLEN(dummy);
1235 /* gcbp_handle_from_client */
1236 READ_BUF(4);
1237 READ32(dummy);
1238 READ_BUF(dummy);
1239 p += XDR_QUADLEN(dummy);
1240 break;
1241 default:
1242 dprintk("Illegal callback secflavor\n");
1243 return nfserr_inval;
1244 }
1245 }
1246 DECODE_TAIL;
1247}
1248
1249static __be32
1250nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
1251 struct nfsd4_destroy_session *destroy_session)
1252{
1253 DECODE_HEAD;
1254 READ_BUF(NFS4_MAX_SESSIONID_LEN);
1255 COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN);
1256
1257 DECODE_TAIL;
1258}
1259
1260static __be32
1261nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
1262 struct nfsd4_sequence *seq)
1263{
1264 DECODE_HEAD;
1265
1266 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
1267 COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
1268 READ32(seq->seqid);
1269 READ32(seq->slotid);
1270 READ32(seq->maxslots);
1271 READ32(seq->cachethis);
1272
996 DECODE_TAIL; 1273 DECODE_TAIL;
997} 1274}
998 1275
@@ -1005,7 +1282,7 @@ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
1005static __be32 1282static __be32
1006nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) 1283nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
1007{ 1284{
1008 return nfserr_opnotsupp; 1285 return nfserr_notsupp;
1009} 1286}
1010 1287
1011typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); 1288typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
@@ -1031,7 +1308,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1031 [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, 1308 [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
1032 [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, 1309 [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
1033 [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, 1310 [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
1034 [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, 1311 [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop,
1035 [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, 1312 [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
1036 [OP_READ] = (nfsd4_dec)nfsd4_decode_read, 1313 [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
1037 [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, 1314 [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
@@ -1050,6 +1327,67 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1050 [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, 1327 [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
1051}; 1328};
1052 1329
1330static nfsd4_dec nfsd41_dec_ops[] = {
1331 [OP_ACCESS] (nfsd4_dec)nfsd4_decode_access,
1332 [OP_CLOSE] (nfsd4_dec)nfsd4_decode_close,
1333 [OP_COMMIT] (nfsd4_dec)nfsd4_decode_commit,
1334 [OP_CREATE] (nfsd4_dec)nfsd4_decode_create,
1335 [OP_DELEGPURGE] (nfsd4_dec)nfsd4_decode_notsupp,
1336 [OP_DELEGRETURN] (nfsd4_dec)nfsd4_decode_delegreturn,
1337 [OP_GETATTR] (nfsd4_dec)nfsd4_decode_getattr,
1338 [OP_GETFH] (nfsd4_dec)nfsd4_decode_noop,
1339 [OP_LINK] (nfsd4_dec)nfsd4_decode_link,
1340 [OP_LOCK] (nfsd4_dec)nfsd4_decode_lock,
1341 [OP_LOCKT] (nfsd4_dec)nfsd4_decode_lockt,
1342 [OP_LOCKU] (nfsd4_dec)nfsd4_decode_locku,
1343 [OP_LOOKUP] (nfsd4_dec)nfsd4_decode_lookup,
1344 [OP_LOOKUPP] (nfsd4_dec)nfsd4_decode_noop,
1345 [OP_NVERIFY] (nfsd4_dec)nfsd4_decode_verify,
1346 [OP_OPEN] (nfsd4_dec)nfsd4_decode_open,
1347 [OP_OPENATTR] (nfsd4_dec)nfsd4_decode_notsupp,
1348 [OP_OPEN_CONFIRM] (nfsd4_dec)nfsd4_decode_notsupp,
1349 [OP_OPEN_DOWNGRADE] (nfsd4_dec)nfsd4_decode_open_downgrade,
1350 [OP_PUTFH] (nfsd4_dec)nfsd4_decode_putfh,
1351 [OP_PUTPUBFH] (nfsd4_dec)nfsd4_decode_notsupp,
1352 [OP_PUTROOTFH] (nfsd4_dec)nfsd4_decode_noop,
1353 [OP_READ] (nfsd4_dec)nfsd4_decode_read,
1354 [OP_READDIR] (nfsd4_dec)nfsd4_decode_readdir,
1355 [OP_READLINK] (nfsd4_dec)nfsd4_decode_noop,
1356 [OP_REMOVE] (nfsd4_dec)nfsd4_decode_remove,
1357 [OP_RENAME] (nfsd4_dec)nfsd4_decode_rename,
1358 [OP_RENEW] (nfsd4_dec)nfsd4_decode_notsupp,
1359 [OP_RESTOREFH] (nfsd4_dec)nfsd4_decode_noop,
1360 [OP_SAVEFH] (nfsd4_dec)nfsd4_decode_noop,
1361 [OP_SECINFO] (nfsd4_dec)nfsd4_decode_secinfo,
1362 [OP_SETATTR] (nfsd4_dec)nfsd4_decode_setattr,
1363 [OP_SETCLIENTID] (nfsd4_dec)nfsd4_decode_notsupp,
1364 [OP_SETCLIENTID_CONFIRM](nfsd4_dec)nfsd4_decode_notsupp,
1365 [OP_VERIFY] (nfsd4_dec)nfsd4_decode_verify,
1366 [OP_WRITE] (nfsd4_dec)nfsd4_decode_write,
1367 [OP_RELEASE_LOCKOWNER] (nfsd4_dec)nfsd4_decode_notsupp,
1368
1369 /* new operations for NFSv4.1 */
1370 [OP_BACKCHANNEL_CTL] (nfsd4_dec)nfsd4_decode_notsupp,
1371 [OP_BIND_CONN_TO_SESSION](nfsd4_dec)nfsd4_decode_notsupp,
1372 [OP_EXCHANGE_ID] (nfsd4_dec)nfsd4_decode_exchange_id,
1373 [OP_CREATE_SESSION] (nfsd4_dec)nfsd4_decode_create_session,
1374 [OP_DESTROY_SESSION] (nfsd4_dec)nfsd4_decode_destroy_session,
1375 [OP_FREE_STATEID] (nfsd4_dec)nfsd4_decode_notsupp,
1376 [OP_GET_DIR_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp,
1377 [OP_GETDEVICEINFO] (nfsd4_dec)nfsd4_decode_notsupp,
1378 [OP_GETDEVICELIST] (nfsd4_dec)nfsd4_decode_notsupp,
1379 [OP_LAYOUTCOMMIT] (nfsd4_dec)nfsd4_decode_notsupp,
1380 [OP_LAYOUTGET] (nfsd4_dec)nfsd4_decode_notsupp,
1381 [OP_LAYOUTRETURN] (nfsd4_dec)nfsd4_decode_notsupp,
1382 [OP_SECINFO_NO_NAME] (nfsd4_dec)nfsd4_decode_notsupp,
1383 [OP_SEQUENCE] (nfsd4_dec)nfsd4_decode_sequence,
1384 [OP_SET_SSV] (nfsd4_dec)nfsd4_decode_notsupp,
1385 [OP_TEST_STATEID] (nfsd4_dec)nfsd4_decode_notsupp,
1386 [OP_WANT_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp,
1387 [OP_DESTROY_CLIENTID] (nfsd4_dec)nfsd4_decode_notsupp,
1388 [OP_RECLAIM_COMPLETE] (nfsd4_dec)nfsd4_decode_notsupp,
1389};
1390
1053struct nfsd4_minorversion_ops { 1391struct nfsd4_minorversion_ops {
1054 nfsd4_dec *decoders; 1392 nfsd4_dec *decoders;
1055 int nops; 1393 int nops;
@@ -1057,6 +1395,7 @@ struct nfsd4_minorversion_ops {
1057 1395
1058static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { 1396static struct nfsd4_minorversion_ops nfsd4_minorversion[] = {
1059 [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, 1397 [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) },
1398 [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) },
1060}; 1399};
1061 1400
1062static __be32 1401static __be32
@@ -1412,6 +1751,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1412{ 1751{
1413 u32 bmval0 = bmval[0]; 1752 u32 bmval0 = bmval[0];
1414 u32 bmval1 = bmval[1]; 1753 u32 bmval1 = bmval[1];
1754 u32 bmval2 = bmval[2];
1415 struct kstat stat; 1755 struct kstat stat;
1416 struct svc_fh tempfh; 1756 struct svc_fh tempfh;
1417 struct kstatfs statfs; 1757 struct kstatfs statfs;
@@ -1425,12 +1765,16 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1425 int err; 1765 int err;
1426 int aclsupport = 0; 1766 int aclsupport = 0;
1427 struct nfs4_acl *acl = NULL; 1767 struct nfs4_acl *acl = NULL;
1768 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1769 u32 minorversion = resp->cstate.minorversion;
1428 1770
1429 BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); 1771 BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
1430 BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); 1772 BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
1431 BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); 1773 BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion));
1774 BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion));
1432 1775
1433 if (exp->ex_fslocs.migrated) { 1776 if (exp->ex_fslocs.migrated) {
1777 BUG_ON(bmval[2]);
1434 status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); 1778 status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err);
1435 if (status) 1779 if (status)
1436 goto out; 1780 goto out;
@@ -1476,22 +1820,42 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1476 if ((buflen -= 16) < 0) 1820 if ((buflen -= 16) < 0)
1477 goto out_resource; 1821 goto out_resource;
1478 1822
1479 WRITE32(2); 1823 if (unlikely(bmval2)) {
1480 WRITE32(bmval0); 1824 WRITE32(3);
1481 WRITE32(bmval1); 1825 WRITE32(bmval0);
1826 WRITE32(bmval1);
1827 WRITE32(bmval2);
1828 } else if (likely(bmval1)) {
1829 WRITE32(2);
1830 WRITE32(bmval0);
1831 WRITE32(bmval1);
1832 } else {
1833 WRITE32(1);
1834 WRITE32(bmval0);
1835 }
1482 attrlenp = p++; /* to be backfilled later */ 1836 attrlenp = p++; /* to be backfilled later */
1483 1837
1484 if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { 1838 if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
1485 u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0; 1839 u32 word0 = nfsd_suppattrs0(minorversion);
1840 u32 word1 = nfsd_suppattrs1(minorversion);
1841 u32 word2 = nfsd_suppattrs2(minorversion);
1842
1486 if ((buflen -= 12) < 0) 1843 if ((buflen -= 12) < 0)
1487 goto out_resource; 1844 goto out_resource;
1488 if (!aclsupport) 1845 if (!aclsupport)
1489 word0 &= ~FATTR4_WORD0_ACL; 1846 word0 &= ~FATTR4_WORD0_ACL;
1490 if (!exp->ex_fslocs.locations) 1847 if (!exp->ex_fslocs.locations)
1491 word0 &= ~FATTR4_WORD0_FS_LOCATIONS; 1848 word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1492 WRITE32(2); 1849 if (!word2) {
1493 WRITE32(word0); 1850 WRITE32(2);
1494 WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); 1851 WRITE32(word0);
1852 WRITE32(word1);
1853 } else {
1854 WRITE32(3);
1855 WRITE32(word0);
1856 WRITE32(word1);
1857 WRITE32(word2);
1858 }
1495 } 1859 }
1496 if (bmval0 & FATTR4_WORD0_TYPE) { 1860 if (bmval0 & FATTR4_WORD0_TYPE) {
1497 if ((buflen -= 4) < 0) 1861 if ((buflen -= 4) < 0)
@@ -1801,6 +2165,13 @@ out_acl:
1801 } 2165 }
1802 WRITE64(stat.ino); 2166 WRITE64(stat.ino);
1803 } 2167 }
2168 if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
2169 WRITE32(3);
2170 WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
2171 WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
2172 WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
2173 }
2174
1804 *attrlenp = htonl((char *)p - (char *)attrlenp - 4); 2175 *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
1805 *countp = p - buffer; 2176 *countp = p - buffer;
1806 status = nfs_ok; 2177 status = nfs_ok;
@@ -2572,6 +2943,143 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
2572} 2943}
2573 2944
2574static __be32 2945static __be32
2946nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr,
2947 struct nfsd4_exchange_id *exid)
2948{
2949 ENCODE_HEAD;
2950 char *major_id;
2951 char *server_scope;
2952 int major_id_sz;
2953 int server_scope_sz;
2954 uint64_t minor_id = 0;
2955
2956 if (nfserr)
2957 return nfserr;
2958
2959 major_id = utsname()->nodename;
2960 major_id_sz = strlen(major_id);
2961 server_scope = utsname()->nodename;
2962 server_scope_sz = strlen(server_scope);
2963
2964 RESERVE_SPACE(
2965 8 /* eir_clientid */ +
2966 4 /* eir_sequenceid */ +
2967 4 /* eir_flags */ +
2968 4 /* spr_how (SP4_NONE) */ +
2969 8 /* so_minor_id */ +
2970 4 /* so_major_id.len */ +
2971 (XDR_QUADLEN(major_id_sz) * 4) +
2972 4 /* eir_server_scope.len */ +
2973 (XDR_QUADLEN(server_scope_sz) * 4) +
2974 4 /* eir_server_impl_id.count (0) */);
2975
2976 WRITEMEM(&exid->clientid, 8);
2977 WRITE32(exid->seqid);
2978 WRITE32(exid->flags);
2979
2980 /* state_protect4_r. Currently only support SP4_NONE */
2981 BUG_ON(exid->spa_how != SP4_NONE);
2982 WRITE32(exid->spa_how);
2983
2984 /* The server_owner struct */
2985 WRITE64(minor_id); /* Minor id */
2986 /* major id */
2987 WRITE32(major_id_sz);
2988 WRITEMEM(major_id, major_id_sz);
2989
2990 /* Server scope */
2991 WRITE32(server_scope_sz);
2992 WRITEMEM(server_scope, server_scope_sz);
2993
2994 /* Implementation id */
2995 WRITE32(0); /* zero length nfs_impl_id4 array */
2996 ADJUST_ARGS();
2997 return 0;
2998}
2999
3000static __be32
3001nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr,
3002 struct nfsd4_create_session *sess)
3003{
3004 ENCODE_HEAD;
3005
3006 if (nfserr)
3007 return nfserr;
3008
3009 RESERVE_SPACE(24);
3010 WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN);
3011 WRITE32(sess->seqid);
3012 WRITE32(sess->flags);
3013 ADJUST_ARGS();
3014
3015 RESERVE_SPACE(28);
3016 WRITE32(0); /* headerpadsz */
3017 WRITE32(sess->fore_channel.maxreq_sz);
3018 WRITE32(sess->fore_channel.maxresp_sz);
3019 WRITE32(sess->fore_channel.maxresp_cached);
3020 WRITE32(sess->fore_channel.maxops);
3021 WRITE32(sess->fore_channel.maxreqs);
3022 WRITE32(sess->fore_channel.nr_rdma_attrs);
3023 ADJUST_ARGS();
3024
3025 if (sess->fore_channel.nr_rdma_attrs) {
3026 RESERVE_SPACE(4);
3027 WRITE32(sess->fore_channel.rdma_attrs);
3028 ADJUST_ARGS();
3029 }
3030
3031 RESERVE_SPACE(28);
3032 WRITE32(0); /* headerpadsz */
3033 WRITE32(sess->back_channel.maxreq_sz);
3034 WRITE32(sess->back_channel.maxresp_sz);
3035 WRITE32(sess->back_channel.maxresp_cached);
3036 WRITE32(sess->back_channel.maxops);
3037 WRITE32(sess->back_channel.maxreqs);
3038 WRITE32(sess->back_channel.nr_rdma_attrs);
3039 ADJUST_ARGS();
3040
3041 if (sess->back_channel.nr_rdma_attrs) {
3042 RESERVE_SPACE(4);
3043 WRITE32(sess->back_channel.rdma_attrs);
3044 ADJUST_ARGS();
3045 }
3046 return 0;
3047}
3048
3049static __be32
3050nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr,
3051 struct nfsd4_destroy_session *destroy_session)
3052{
3053 return nfserr;
3054}
3055
3056__be32
3057nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
3058 struct nfsd4_sequence *seq)
3059{
3060 ENCODE_HEAD;
3061
3062 if (nfserr)
3063 return nfserr;
3064
3065 RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20);
3066 WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
3067 WRITE32(seq->seqid);
3068 WRITE32(seq->slotid);
3069 WRITE32(seq->maxslots);
3070 /*
3071 * FIXME: for now:
3072 * target_maxslots = maxslots
3073 * status_flags = 0
3074 */
3075 WRITE32(seq->maxslots);
3076 WRITE32(0);
3077
3078 ADJUST_ARGS();
3079 return 0;
3080}
3081
3082static __be32
2575nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) 3083nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
2576{ 3084{
2577 return nfserr; 3085 return nfserr;
@@ -2579,6 +3087,11 @@ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
2579 3087
2580typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); 3088typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
2581 3089
3090/*
3091 * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1
3092 * since we don't need to filter out obsolete ops as this is
3093 * done in the decoding phase.
3094 */
2582static nfsd4_enc nfsd4_enc_ops[] = { 3095static nfsd4_enc nfsd4_enc_ops[] = {
2583 [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, 3096 [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
2584 [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, 3097 [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
@@ -2617,8 +3130,77 @@ static nfsd4_enc nfsd4_enc_ops[] = {
2617 [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, 3130 [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
2618 [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, 3131 [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
2619 [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, 3132 [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
3133
3134 /* NFSv4.1 operations */
3135 [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop,
3136 [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
3137 [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id,
3138 [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session,
3139 [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session,
3140 [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
3141 [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
3142 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
3143 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
3144 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
3145 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
3146 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
3147 [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop,
3148 [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
3149 [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
3150 [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
3151 [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
3152 [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
3153 [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
2620}; 3154};
2621 3155
3156/*
3157 * Calculate the total amount of memory that the compound response has taken
3158 * after encoding the current operation.
3159 *
3160 * pad: add on 8 bytes for the next operation's op_code and status so that
3161 * there is room to cache a failure on the next operation.
3162 *
3163 * Compare this length to the session se_fmaxresp_cached.
3164 *
3165 * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
3166 * will be at least a page and will therefore hold the xdr_buf head.
3167 */
3168static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
3169{
3170 int status = 0;
3171 struct xdr_buf *xb = &resp->rqstp->rq_res;
3172 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
3173 struct nfsd4_session *session = NULL;
3174 struct nfsd4_slot *slot = resp->cstate.slot;
3175 u32 length, tlen = 0, pad = 8;
3176
3177 if (!nfsd4_has_session(&resp->cstate))
3178 return status;
3179
3180 session = resp->cstate.session;
3181 if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0)
3182 return status;
3183
3184 if (resp->opcnt >= args->opcnt)
3185 pad = 0; /* this is the last operation */
3186
3187 if (xb->page_len == 0) {
3188 length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
3189 } else {
3190 if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0)
3191 tlen = (char *)resp->p - (char *)xb->tail[0].iov_base;
3192
3193 length = xb->head[0].iov_len + xb->page_len + tlen + pad;
3194 }
3195 dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
3196 length, xb->page_len, tlen, pad);
3197
3198 if (length <= session->se_fmaxresp_cached)
3199 return status;
3200 else
3201 return nfserr_rep_too_big_to_cache;
3202}
3203
2622void 3204void
2623nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) 3205nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
2624{ 3206{
@@ -2635,6 +3217,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
2635 BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || 3217 BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
2636 !nfsd4_enc_ops[op->opnum]); 3218 !nfsd4_enc_ops[op->opnum]);
2637 op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); 3219 op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
3220 /* nfsd4_check_drc_limit guarantees enough room for error status */
3221 if (!op->status && nfsd4_check_drc_limit(resp))
3222 op->status = nfserr_rep_too_big_to_cache;
2638status: 3223status:
2639 /* 3224 /*
2640 * Note: We write the status directly, instead of using WRITE32(), 3225 * Note: We write the status directly, instead of using WRITE32(),
@@ -2735,6 +3320,18 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
2735 iov = &rqstp->rq_res.head[0]; 3320 iov = &rqstp->rq_res.head[0];
2736 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; 3321 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
2737 BUG_ON(iov->iov_len > PAGE_SIZE); 3322 BUG_ON(iov->iov_len > PAGE_SIZE);
3323 if (nfsd4_has_session(&resp->cstate)) {
3324 if (resp->cstate.status == nfserr_replay_cache &&
3325 !nfsd4_not_cached(resp)) {
3326 iov->iov_len = resp->cstate.iovlen;
3327 } else {
3328 nfsd4_store_cache_entry(resp);
3329 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
3330 resp->cstate.slot->sl_inuse = 0;
3331 }
3332 if (resp->cstate.session)
3333 nfsd4_put_session(resp->cstate.session);
3334 }
2738 return 1; 3335 return 1;
2739} 3336}
2740 3337
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index a4ed8644d69c..af16849d243a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -60,6 +60,7 @@ enum {
60 NFSD_FO_UnlockFS, 60 NFSD_FO_UnlockFS,
61 NFSD_Threads, 61 NFSD_Threads,
62 NFSD_Pool_Threads, 62 NFSD_Pool_Threads,
63 NFSD_Pool_Stats,
63 NFSD_Versions, 64 NFSD_Versions,
64 NFSD_Ports, 65 NFSD_Ports,
65 NFSD_MaxBlkSize, 66 NFSD_MaxBlkSize,
@@ -172,6 +173,16 @@ static const struct file_operations exports_operations = {
172 .owner = THIS_MODULE, 173 .owner = THIS_MODULE,
173}; 174};
174 175
176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
177
178static struct file_operations pool_stats_operations = {
179 .open = nfsd_pool_stats_open,
180 .read = seq_read,
181 .llseek = seq_lseek,
182 .release = seq_release,
183 .owner = THIS_MODULE,
184};
185
175/*----------------------------------------------------------------------------*/ 186/*----------------------------------------------------------------------------*/
176/* 187/*
177 * payload - write methods 188 * payload - write methods
@@ -781,8 +792,9 @@ out_free:
781static ssize_t __write_versions(struct file *file, char *buf, size_t size) 792static ssize_t __write_versions(struct file *file, char *buf, size_t size)
782{ 793{
783 char *mesg = buf; 794 char *mesg = buf;
784 char *vers, sign; 795 char *vers, *minorp, sign;
785 int len, num; 796 int len, num;
797 unsigned minor;
786 ssize_t tlen = 0; 798 ssize_t tlen = 0;
787 char *sep; 799 char *sep;
788 800
@@ -803,9 +815,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
803 do { 815 do {
804 sign = *vers; 816 sign = *vers;
805 if (sign == '+' || sign == '-') 817 if (sign == '+' || sign == '-')
806 num = simple_strtol((vers+1), NULL, 0); 818 num = simple_strtol((vers+1), &minorp, 0);
807 else 819 else
808 num = simple_strtol(vers, NULL, 0); 820 num = simple_strtol(vers, &minorp, 0);
821 if (*minorp == '.') {
822 if (num < 4)
823 return -EINVAL;
824 minor = simple_strtoul(minorp+1, NULL, 0);
825 if (minor == 0)
826 return -EINVAL;
827 if (nfsd_minorversion(minor, sign == '-' ?
828 NFSD_CLEAR : NFSD_SET) < 0)
829 return -EINVAL;
830 goto next;
831 }
809 switch(num) { 832 switch(num) {
810 case 2: 833 case 2:
811 case 3: 834 case 3:
@@ -815,6 +838,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
815 default: 838 default:
816 return -EINVAL; 839 return -EINVAL;
817 } 840 }
841 next:
818 vers += len + 1; 842 vers += len + 1;
819 tlen += len; 843 tlen += len;
820 } while ((len = qword_get(&mesg, vers, size)) > 0); 844 } while ((len = qword_get(&mesg, vers, size)) > 0);
@@ -833,6 +857,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
833 num); 857 num);
834 sep = " "; 858 sep = " ";
835 } 859 }
860 if (nfsd_vers(4, NFSD_AVAIL))
861 for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++)
862 len += sprintf(buf+len, " %c4.%u",
863 (nfsd_vers(4, NFSD_TEST) &&
864 nfsd_minorversion(minor, NFSD_TEST)) ?
865 '+' : '-',
866 minor);
836 len += sprintf(buf+len, "\n"); 867 len += sprintf(buf+len, "\n");
837 return len; 868 return len;
838} 869}
@@ -1248,6 +1279,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1248 [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, 1279 [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
1249 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, 1280 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
1250 [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, 1281 [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
1282 [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
1251 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, 1283 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
1252 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, 1284 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
1253 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, 1285 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6f7f26351227..e298e260b5f1 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -180,6 +180,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
180{ 180{
181 __be32 nfserr; 181 __be32 nfserr;
182 int stable = 1; 182 int stable = 1;
183 unsigned long cnt = argp->len;
183 184
184 dprintk("nfsd: WRITE %s %d bytes at %d\n", 185 dprintk("nfsd: WRITE %s %d bytes at %d\n",
185 SVCFH_fmt(&argp->fh), 186 SVCFH_fmt(&argp->fh),
@@ -188,7 +189,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
188 nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, 189 nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
189 argp->offset, 190 argp->offset,
190 rqstp->rq_vec, argp->vlen, 191 rqstp->rq_vec, argp->vlen,
191 argp->len, 192 &cnt,
192 &stable); 193 &stable);
193 return nfsd_return_attrs(nfserr, resp); 194 return nfsd_return_attrs(nfserr, resp);
194} 195}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 7c09852be713..cbba4a935786 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -22,6 +22,7 @@
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/fs_struct.h> 23#include <linux/fs_struct.h>
24#include <linux/kthread.h> 24#include <linux/kthread.h>
25#include <linux/swap.h>
25 26
26#include <linux/sunrpc/types.h> 27#include <linux/sunrpc/types.h>
27#include <linux/sunrpc/stats.h> 28#include <linux/sunrpc/stats.h>
@@ -40,9 +41,6 @@
40extern struct svc_program nfsd_program; 41extern struct svc_program nfsd_program;
41static int nfsd(void *vrqstp); 42static int nfsd(void *vrqstp);
42struct timeval nfssvc_boot; 43struct timeval nfssvc_boot;
43static atomic_t nfsd_busy;
44static unsigned long nfsd_last_call;
45static DEFINE_SPINLOCK(nfsd_call_lock);
46 44
47/* 45/*
48 * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members 46 * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
@@ -123,6 +121,8 @@ struct svc_program nfsd_program = {
123 121
124}; 122};
125 123
124u32 nfsd_supported_minorversion;
125
126int nfsd_vers(int vers, enum vers_op change) 126int nfsd_vers(int vers, enum vers_op change)
127{ 127{
128 if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) 128 if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
@@ -149,6 +149,28 @@ int nfsd_vers(int vers, enum vers_op change)
149 } 149 }
150 return 0; 150 return 0;
151} 151}
152
153int nfsd_minorversion(u32 minorversion, enum vers_op change)
154{
155 if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
156 return -1;
157 switch(change) {
158 case NFSD_SET:
159 nfsd_supported_minorversion = minorversion;
160 break;
161 case NFSD_CLEAR:
162 if (minorversion == 0)
163 return -1;
164 nfsd_supported_minorversion = minorversion - 1;
165 break;
166 case NFSD_TEST:
167 return minorversion <= nfsd_supported_minorversion;
168 case NFSD_AVAIL:
169 return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
170 }
171 return 0;
172}
173
152/* 174/*
153 * Maximum number of nfsd processes 175 * Maximum number of nfsd processes
154 */ 176 */
@@ -200,6 +222,28 @@ void nfsd_reset_versions(void)
200 } 222 }
201} 223}
202 224
225/*
226 * Each session guarantees a negotiated per slot memory cache for replies
227 * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
228 * NFSv4.1 server might want to use more memory for a DRC than a machine
229 * with mutiple services.
230 *
231 * Impose a hard limit on the number of pages for the DRC which varies
232 * according to the machines free pages. This is of course only a default.
233 *
234 * For now this is a #defined shift which could be under admin control
235 * in the future.
236 */
237static void set_max_drc(void)
238{
239 /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */
240 #define NFSD_DRC_SIZE_SHIFT 7
241 nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages()
242 >> NFSD_DRC_SIZE_SHIFT;
243 nfsd_serv->sv_drc_pages_used = 0;
244 dprintk("%s svc_drc_max_pages %u\n", __func__,
245 nfsd_serv->sv_drc_max_pages);
246}
203 247
204int nfsd_create_serv(void) 248int nfsd_create_serv(void)
205{ 249{
@@ -227,11 +271,12 @@ int nfsd_create_serv(void)
227 nfsd_max_blksize /= 2; 271 nfsd_max_blksize /= 2;
228 } 272 }
229 273
230 atomic_set(&nfsd_busy, 0);
231 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, 274 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
232 nfsd_last_thread, nfsd, THIS_MODULE); 275 nfsd_last_thread, nfsd, THIS_MODULE);
233 if (nfsd_serv == NULL) 276 if (nfsd_serv == NULL)
234 err = -ENOMEM; 277 err = -ENOMEM;
278 else
279 set_max_drc();
235 280
236 do_gettimeofday(&nfssvc_boot); /* record boot time */ 281 do_gettimeofday(&nfssvc_boot); /* record boot time */
237 return err; 282 return err;
@@ -375,26 +420,6 @@ nfsd_svc(unsigned short port, int nrservs)
375 return error; 420 return error;
376} 421}
377 422
378static inline void
379update_thread_usage(int busy_threads)
380{
381 unsigned long prev_call;
382 unsigned long diff;
383 int decile;
384
385 spin_lock(&nfsd_call_lock);
386 prev_call = nfsd_last_call;
387 nfsd_last_call = jiffies;
388 decile = busy_threads*10/nfsdstats.th_cnt;
389 if (decile>0 && decile <= 10) {
390 diff = nfsd_last_call - prev_call;
391 if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP)
392 nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP;
393 if (decile == 10)
394 nfsdstats.th_fullcnt++;
395 }
396 spin_unlock(&nfsd_call_lock);
397}
398 423
399/* 424/*
400 * This is the NFS server kernel thread 425 * This is the NFS server kernel thread
@@ -460,8 +485,6 @@ nfsd(void *vrqstp)
460 continue; 485 continue;
461 } 486 }
462 487
463 update_thread_usage(atomic_read(&nfsd_busy));
464 atomic_inc(&nfsd_busy);
465 488
466 /* Lock the export hash tables for reading. */ 489 /* Lock the export hash tables for reading. */
467 exp_readlock(); 490 exp_readlock();
@@ -470,8 +493,6 @@ nfsd(void *vrqstp)
470 493
471 /* Unlock export hash tables */ 494 /* Unlock export hash tables */
472 exp_readunlock(); 495 exp_readunlock();
473 update_thread_usage(atomic_read(&nfsd_busy));
474 atomic_dec(&nfsd_busy);
475 } 496 }
476 497
477 /* Clear signals before calling svc_exit_thread() */ 498 /* Clear signals before calling svc_exit_thread() */
@@ -539,6 +560,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
539 + rqstp->rq_res.head[0].iov_len; 560 + rqstp->rq_res.head[0].iov_len;
540 rqstp->rq_res.head[0].iov_len += sizeof(__be32); 561 rqstp->rq_res.head[0].iov_len += sizeof(__be32);
541 562
563 /* NFSv4.1 DRC requires statp */
564 if (rqstp->rq_vers == 4)
565 nfsd4_set_statp(rqstp, statp);
566
542 /* Now call the procedure handler, and encode NFS status. */ 567 /* Now call the procedure handler, and encode NFS status. */
543 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); 568 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
544 nfserr = map_new_errors(rqstp->rq_vers, nfserr); 569 nfserr = map_new_errors(rqstp->rq_vers, nfserr);
@@ -570,3 +595,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
570 nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); 595 nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
571 return 1; 596 return 1;
572} 597}
598
599int nfsd_pool_stats_open(struct inode *inode, struct file *file)
600{
601 if (nfsd_serv == NULL)
602 return -ENODEV;
603 return svc_pool_stats_open(nfsd_serv, file);
604}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 78376b6c0236..ab93fcfef254 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -366,8 +366,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
366 } 366 }
367 367
368 /* Revoke setuid/setgid on chown */ 368 /* Revoke setuid/setgid on chown */
369 if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || 369 if (!S_ISDIR(inode->i_mode) &&
370 ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) { 370 (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
371 ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) {
371 iap->ia_valid |= ATTR_KILL_PRIV; 372 iap->ia_valid |= ATTR_KILL_PRIV;
372 if (iap->ia_valid & ATTR_MODE) { 373 if (iap->ia_valid & ATTR_MODE) {
373 /* we're setting mode too, just clear the s*id bits */ 374 /* we're setting mode too, just clear the s*id bits */
@@ -960,7 +961,7 @@ static void kill_suid(struct dentry *dentry)
960static __be32 961static __be32
961nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 962nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
962 loff_t offset, struct kvec *vec, int vlen, 963 loff_t offset, struct kvec *vec, int vlen,
963 unsigned long cnt, int *stablep) 964 unsigned long *cnt, int *stablep)
964{ 965{
965 struct svc_export *exp; 966 struct svc_export *exp;
966 struct dentry *dentry; 967 struct dentry *dentry;
@@ -974,7 +975,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
974 err = nfserr_perm; 975 err = nfserr_perm;
975 976
976 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 977 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
977 (!lock_may_write(file->f_path.dentry->d_inode, offset, cnt))) 978 (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt)))
978 goto out; 979 goto out;
979#endif 980#endif
980 981
@@ -1009,7 +1010,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1009 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); 1010 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
1010 set_fs(oldfs); 1011 set_fs(oldfs);
1011 if (host_err >= 0) { 1012 if (host_err >= 0) {
1012 nfsdstats.io_write += cnt; 1013 nfsdstats.io_write += host_err;
1013 fsnotify_modify(file->f_path.dentry); 1014 fsnotify_modify(file->f_path.dentry);
1014 } 1015 }
1015 1016
@@ -1054,9 +1055,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1054 } 1055 }
1055 1056
1056 dprintk("nfsd: write complete host_err=%d\n", host_err); 1057 dprintk("nfsd: write complete host_err=%d\n", host_err);
1057 if (host_err >= 0) 1058 if (host_err >= 0) {
1058 err = 0; 1059 err = 0;
1059 else 1060 *cnt = host_err;
1061 } else
1060 err = nfserrno(host_err); 1062 err = nfserrno(host_err);
1061out: 1063out:
1062 return err; 1064 return err;
@@ -1098,7 +1100,7 @@ out:
1098 */ 1100 */
1099__be32 1101__be32
1100nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1102nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1101 loff_t offset, struct kvec *vec, int vlen, unsigned long cnt, 1103 loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
1102 int *stablep) 1104 int *stablep)
1103{ 1105{
1104 __be32 err = 0; 1106 __be32 err = 0;
@@ -1179,6 +1181,21 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
1179 return 0; 1181 return 0;
1180} 1182}
1181 1183
1184/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
1185 * setting size to 0 may fail for some specific file systems by the permission
1186 * checking which requires WRITE permission but the mode is 000.
1187 * we ignore the resizing(to 0) on the just new created file, since the size is
1188 * 0 after file created.
1189 *
1190 * call this only after vfs_create() is called.
1191 * */
1192static void
1193nfsd_check_ignore_resizing(struct iattr *iap)
1194{
1195 if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
1196 iap->ia_valid &= ~ATTR_SIZE;
1197}
1198
1182/* 1199/*
1183 * Create a file (regular, directory, device, fifo); UNIX sockets 1200 * Create a file (regular, directory, device, fifo); UNIX sockets
1184 * not yet implemented. 1201 * not yet implemented.
@@ -1274,6 +1291,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1274 switch (type) { 1291 switch (type) {
1275 case S_IFREG: 1292 case S_IFREG:
1276 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1293 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1294 if (!host_err)
1295 nfsd_check_ignore_resizing(iap);
1277 break; 1296 break;
1278 case S_IFDIR: 1297 case S_IFDIR:
1279 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); 1298 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
@@ -1427,6 +1446,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1427 /* setattr will sync the child (or not) */ 1446 /* setattr will sync the child (or not) */
1428 } 1447 }
1429 1448
1449 nfsd_check_ignore_resizing(iap);
1450
1430 if (createmode == NFS3_CREATE_EXCLUSIVE) { 1451 if (createmode == NFS3_CREATE_EXCLUSIVE) {
1431 /* Cram the verifier into atime/mtime */ 1452 /* Cram the verifier into atime/mtime */
1432 iap->ia_valid = ATTR_MTIME|ATTR_ATIME 1453 iap->ia_valid = ATTR_MTIME|ATTR_ATIME
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 7dc5b6cb44cd..d39ed1cc5fbf 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -25,13 +25,13 @@ struct svc_rqst;
25#define NLM_MAXCOOKIELEN 32 25#define NLM_MAXCOOKIELEN 32
26#define NLM_MAXSTRLEN 1024 26#define NLM_MAXSTRLEN 1024
27 27
28#define nlm_granted __constant_htonl(NLM_LCK_GRANTED) 28#define nlm_granted cpu_to_be32(NLM_LCK_GRANTED)
29#define nlm_lck_denied __constant_htonl(NLM_LCK_DENIED) 29#define nlm_lck_denied cpu_to_be32(NLM_LCK_DENIED)
30#define nlm_lck_denied_nolocks __constant_htonl(NLM_LCK_DENIED_NOLOCKS) 30#define nlm_lck_denied_nolocks cpu_to_be32(NLM_LCK_DENIED_NOLOCKS)
31#define nlm_lck_blocked __constant_htonl(NLM_LCK_BLOCKED) 31#define nlm_lck_blocked cpu_to_be32(NLM_LCK_BLOCKED)
32#define nlm_lck_denied_grace_period __constant_htonl(NLM_LCK_DENIED_GRACE_PERIOD) 32#define nlm_lck_denied_grace_period cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
33 33
34#define nlm_drop_reply __constant_htonl(30000) 34#define nlm_drop_reply cpu_to_be32(30000)
35 35
36/* Lock info passed via NLM */ 36/* Lock info passed via NLM */
37struct nlm_lock { 37struct nlm_lock {
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 12bfe09de2b1..7353821341ed 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -15,11 +15,11 @@
15#include <linux/lockd/xdr.h> 15#include <linux/lockd/xdr.h>
16 16
17/* error codes new to NLMv4 */ 17/* error codes new to NLMv4 */
18#define nlm4_deadlock __constant_htonl(NLM_DEADLCK) 18#define nlm4_deadlock cpu_to_be32(NLM_DEADLCK)
19#define nlm4_rofs __constant_htonl(NLM_ROFS) 19#define nlm4_rofs cpu_to_be32(NLM_ROFS)
20#define nlm4_stale_fh __constant_htonl(NLM_STALE_FH) 20#define nlm4_stale_fh cpu_to_be32(NLM_STALE_FH)
21#define nlm4_fbig __constant_htonl(NLM_FBIG) 21#define nlm4_fbig cpu_to_be32(NLM_FBIG)
22#define nlm4_failed __constant_htonl(NLM_FAILED) 22#define nlm4_failed cpu_to_be32(NLM_FAILED)
23 23
24 24
25 25
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 54af92c1c70b..214d499718f7 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -109,7 +109,6 @@
109 NFSERR_FILE_OPEN = 10046, /* v4 */ 109 NFSERR_FILE_OPEN = 10046, /* v4 */
110 NFSERR_ADMIN_REVOKED = 10047, /* v4 */ 110 NFSERR_ADMIN_REVOKED = 10047, /* v4 */
111 NFSERR_CB_PATH_DOWN = 10048, /* v4 */ 111 NFSERR_CB_PATH_DOWN = 10048, /* v4 */
112 NFSERR_REPLAY_ME = 10049 /* v4 */
113}; 112};
114 113
115/* NFSv2 file types - beware, these are not the same in NFSv3 */ 114/* NFSv2 file types - beware, these are not the same in NFSv3 */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index b912311a56b1..e3f0cbcbd0db 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -21,6 +21,7 @@
21#define NFS4_FHSIZE 128 21#define NFS4_FHSIZE 128
22#define NFS4_MAXPATHLEN PATH_MAX 22#define NFS4_MAXPATHLEN PATH_MAX
23#define NFS4_MAXNAMLEN NAME_MAX 23#define NFS4_MAXNAMLEN NAME_MAX
24#define NFS4_MAX_SESSIONID_LEN 16
24 25
25#define NFS4_ACCESS_READ 0x0001 26#define NFS4_ACCESS_READ 0x0001
26#define NFS4_ACCESS_LOOKUP 0x0002 27#define NFS4_ACCESS_LOOKUP 0x0002
@@ -38,6 +39,7 @@
38#define NFS4_OPEN_RESULT_CONFIRM 0x0002 39#define NFS4_OPEN_RESULT_CONFIRM 0x0002
39#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 40#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004
40 41
42#define NFS4_SHARE_ACCESS_MASK 0x000F
41#define NFS4_SHARE_ACCESS_READ 0x0001 43#define NFS4_SHARE_ACCESS_READ 0x0001
42#define NFS4_SHARE_ACCESS_WRITE 0x0002 44#define NFS4_SHARE_ACCESS_WRITE 0x0002
43#define NFS4_SHARE_ACCESS_BOTH 0x0003 45#define NFS4_SHARE_ACCESS_BOTH 0x0003
@@ -45,6 +47,19 @@
45#define NFS4_SHARE_DENY_WRITE 0x0002 47#define NFS4_SHARE_DENY_WRITE 0x0002
46#define NFS4_SHARE_DENY_BOTH 0x0003 48#define NFS4_SHARE_DENY_BOTH 0x0003
47 49
50/* nfs41 */
51#define NFS4_SHARE_WANT_MASK 0xFF00
52#define NFS4_SHARE_WANT_NO_PREFERENCE 0x0000
53#define NFS4_SHARE_WANT_READ_DELEG 0x0100
54#define NFS4_SHARE_WANT_WRITE_DELEG 0x0200
55#define NFS4_SHARE_WANT_ANY_DELEG 0x0300
56#define NFS4_SHARE_WANT_NO_DELEG 0x0400
57#define NFS4_SHARE_WANT_CANCEL 0x0500
58
59#define NFS4_SHARE_WHEN_MASK 0xF0000
60#define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000
61#define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000
62
48#define NFS4_SET_TO_SERVER_TIME 0 63#define NFS4_SET_TO_SERVER_TIME 0
49#define NFS4_SET_TO_CLIENT_TIME 1 64#define NFS4_SET_TO_CLIENT_TIME 1
50 65
@@ -88,6 +103,31 @@
88#define NFS4_ACE_GENERIC_EXECUTE 0x001200A0 103#define NFS4_ACE_GENERIC_EXECUTE 0x001200A0
89#define NFS4_ACE_MASK_ALL 0x001F01FF 104#define NFS4_ACE_MASK_ALL 0x001F01FF
90 105
106#define EXCHGID4_FLAG_SUPP_MOVED_REFER 0x00000001
107#define EXCHGID4_FLAG_SUPP_MOVED_MIGR 0x00000002
108#define EXCHGID4_FLAG_USE_NON_PNFS 0x00010000
109#define EXCHGID4_FLAG_USE_PNFS_MDS 0x00020000
110#define EXCHGID4_FLAG_USE_PNFS_DS 0x00040000
111#define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000
112#define EXCHGID4_FLAG_CONFIRMED_R 0x80000000
113/*
114 * Since the validity of these bits depends on whether
115 * they're set in the argument or response, have separate
116 * invalid flag masks for arg (_A) and resp (_R).
117 */
118#define EXCHGID4_FLAG_MASK_A 0x40070003
119#define EXCHGID4_FLAG_MASK_R 0x80070003
120
121#define SEQ4_STATUS_CB_PATH_DOWN 0x00000001
122#define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002
123#define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED 0x00000004
124#define SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED 0x00000008
125#define SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED 0x00000010
126#define SEQ4_STATUS_ADMIN_STATE_REVOKED 0x00000020
127#define SEQ4_STATUS_RECALLABLE_STATE_REVOKED 0x00000040
128#define SEQ4_STATUS_LEASE_MOVED 0x00000080
129#define SEQ4_STATUS_RESTART_RECLAIM_NEEDED 0x00000100
130
91#define NFS4_MAX_UINT64 (~(u64)0) 131#define NFS4_MAX_UINT64 (~(u64)0)
92 132
93enum nfs4_acl_whotype { 133enum nfs4_acl_whotype {
@@ -154,6 +194,28 @@ enum nfs_opnum4 {
154 OP_VERIFY = 37, 194 OP_VERIFY = 37,
155 OP_WRITE = 38, 195 OP_WRITE = 38,
156 OP_RELEASE_LOCKOWNER = 39, 196 OP_RELEASE_LOCKOWNER = 39,
197
198 /* nfs41 */
199 OP_BACKCHANNEL_CTL = 40,
200 OP_BIND_CONN_TO_SESSION = 41,
201 OP_EXCHANGE_ID = 42,
202 OP_CREATE_SESSION = 43,
203 OP_DESTROY_SESSION = 44,
204 OP_FREE_STATEID = 45,
205 OP_GET_DIR_DELEGATION = 46,
206 OP_GETDEVICEINFO = 47,
207 OP_GETDEVICELIST = 48,
208 OP_LAYOUTCOMMIT = 49,
209 OP_LAYOUTGET = 50,
210 OP_LAYOUTRETURN = 51,
211 OP_SECINFO_NO_NAME = 52,
212 OP_SEQUENCE = 53,
213 OP_SET_SSV = 54,
214 OP_TEST_STATEID = 55,
215 OP_WANT_DELEGATION = 56,
216 OP_DESTROY_CLIENTID = 57,
217 OP_RECLAIM_COMPLETE = 58,
218
157 OP_ILLEGAL = 10044, 219 OP_ILLEGAL = 10044,
158}; 220};
159 221
@@ -230,7 +292,48 @@ enum nfsstat4 {
230 NFS4ERR_DEADLOCK = 10045, 292 NFS4ERR_DEADLOCK = 10045,
231 NFS4ERR_FILE_OPEN = 10046, 293 NFS4ERR_FILE_OPEN = 10046,
232 NFS4ERR_ADMIN_REVOKED = 10047, 294 NFS4ERR_ADMIN_REVOKED = 10047,
233 NFS4ERR_CB_PATH_DOWN = 10048 295 NFS4ERR_CB_PATH_DOWN = 10048,
296
297 /* nfs41 */
298 NFS4ERR_BADIOMODE = 10049,
299 NFS4ERR_BADLAYOUT = 10050,
300 NFS4ERR_BAD_SESSION_DIGEST = 10051,
301 NFS4ERR_BADSESSION = 10052,
302 NFS4ERR_BADSLOT = 10053,
303 NFS4ERR_COMPLETE_ALREADY = 10054,
304 NFS4ERR_CONN_NOT_BOUND_TO_SESSION = 10055,
305 NFS4ERR_DELEG_ALREADY_WANTED = 10056,
306 NFS4ERR_BACK_CHAN_BUSY = 10057, /* backchan reqs outstanding */
307 NFS4ERR_LAYOUTTRYLATER = 10058,
308 NFS4ERR_LAYOUTUNAVAILABLE = 10059,
309 NFS4ERR_NOMATCHING_LAYOUT = 10060,
310 NFS4ERR_RECALLCONFLICT = 10061,
311 NFS4ERR_UNKNOWN_LAYOUTTYPE = 10062,
312 NFS4ERR_SEQ_MISORDERED = 10063, /* unexpected seq.id in req */
313 NFS4ERR_SEQUENCE_POS = 10064, /* [CB_]SEQ. op not 1st op */
314 NFS4ERR_REQ_TOO_BIG = 10065, /* request too big */
315 NFS4ERR_REP_TOO_BIG = 10066, /* reply too big */
316 NFS4ERR_REP_TOO_BIG_TO_CACHE = 10067, /* rep. not all cached */
317 NFS4ERR_RETRY_UNCACHED_REP = 10068, /* retry & rep. uncached */
318 NFS4ERR_UNSAFE_COMPOUND = 10069, /* retry/recovery too hard */
319 NFS4ERR_TOO_MANY_OPS = 10070, /* too many ops in [CB_]COMP */
320 NFS4ERR_OP_NOT_IN_SESSION = 10071, /* op needs [CB_]SEQ. op */
321 NFS4ERR_HASH_ALG_UNSUPP = 10072, /* hash alg. not supp. */
322 /* Error 10073 is unused. */
323 NFS4ERR_CLIENTID_BUSY = 10074, /* clientid has state */
324 NFS4ERR_PNFS_IO_HOLE = 10075, /* IO to _SPARSE file hole */
325 NFS4ERR_SEQ_FALSE_RETRY = 10076, /* retry not origional */
326 NFS4ERR_BAD_HIGH_SLOT = 10077, /* sequence arg bad */
327 NFS4ERR_DEADSESSION = 10078, /* persistent session dead */
328 NFS4ERR_ENCR_ALG_UNSUPP = 10079, /* SSV alg mismatch */
329 NFS4ERR_PNFS_NO_LAYOUT = 10080, /* direct I/O with no layout */
330 NFS4ERR_NOT_ONLY_OP = 10081, /* bad compound */
331 NFS4ERR_WRONG_CRED = 10082, /* permissions:state change */
332 NFS4ERR_WRONG_TYPE = 10083, /* current operation mismatch */
333 NFS4ERR_DIRDELEG_UNAVAIL = 10084, /* no directory delegation */
334 NFS4ERR_REJECT_DELEG = 10085, /* on callback */
335 NFS4ERR_RETURNCONFLICT = 10086, /* outstanding layoutreturn */
336 NFS4ERR_DELEG_REVOKED = 10087, /* deleg./layout revoked */
234}; 337};
235 338
236/* 339/*
@@ -265,7 +368,13 @@ enum opentype4 {
265enum createmode4 { 368enum createmode4 {
266 NFS4_CREATE_UNCHECKED = 0, 369 NFS4_CREATE_UNCHECKED = 0,
267 NFS4_CREATE_GUARDED = 1, 370 NFS4_CREATE_GUARDED = 1,
268 NFS4_CREATE_EXCLUSIVE = 2 371 NFS4_CREATE_EXCLUSIVE = 2,
372 /*
373 * New to NFSv4.1. If session is persistent,
374 * GUARDED4 MUST be used. Otherwise, use
375 * EXCLUSIVE4_1 instead of EXCLUSIVE4.
376 */
377 NFS4_CREATE_EXCLUSIVE4_1 = 3
269}; 378};
270 379
271enum limit_by4 { 380enum limit_by4 {
@@ -301,6 +410,8 @@ enum lock_type4 {
301#define FATTR4_WORD0_UNIQUE_HANDLES (1UL << 9) 410#define FATTR4_WORD0_UNIQUE_HANDLES (1UL << 9)
302#define FATTR4_WORD0_LEASE_TIME (1UL << 10) 411#define FATTR4_WORD0_LEASE_TIME (1UL << 10)
303#define FATTR4_WORD0_RDATTR_ERROR (1UL << 11) 412#define FATTR4_WORD0_RDATTR_ERROR (1UL << 11)
413/* Mandatory in NFSv4.1 */
414#define FATTR4_WORD2_SUPPATTR_EXCLCREAT (1UL << 11)
304 415
305/* Recommended Attributes */ 416/* Recommended Attributes */
306#define FATTR4_WORD0_ACL (1UL << 12) 417#define FATTR4_WORD0_ACL (1UL << 12)
@@ -391,6 +502,29 @@ enum {
391 NFSPROC4_CLNT_GETACL, 502 NFSPROC4_CLNT_GETACL,
392 NFSPROC4_CLNT_SETACL, 503 NFSPROC4_CLNT_SETACL,
393 NFSPROC4_CLNT_FS_LOCATIONS, 504 NFSPROC4_CLNT_FS_LOCATIONS,
505
506 /* nfs41 */
507 NFSPROC4_CLNT_EXCHANGE_ID,
508 NFSPROC4_CLNT_CREATE_SESSION,
509 NFSPROC4_CLNT_DESTROY_SESSION,
510 NFSPROC4_CLNT_SEQUENCE,
511 NFSPROC4_CLNT_GET_LEASE_TIME,
512};
513
514/* nfs41 types */
515struct nfs4_sessionid {
516 unsigned char data[NFS4_MAX_SESSIONID_LEN];
517};
518
519/* Create Session Flags */
520#define SESSION4_PERSIST 0x001
521#define SESSION4_BACK_CHAN 0x002
522#define SESSION4_RDMA 0x004
523
524enum state_protect_how4 {
525 SP4_NONE = 0,
526 SP4_MACH_CRED = 1,
527 SP4_SSV = 2
394}; 528};
395 529
396#endif 530#endif
diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
index 04b355c801d8..5bccaab81056 100644
--- a/include/linux/nfsd/cache.h
+++ b/include/linux/nfsd/cache.h
@@ -76,4 +76,12 @@ void nfsd_reply_cache_shutdown(void);
76int nfsd_cache_lookup(struct svc_rqst *, int); 76int nfsd_cache_lookup(struct svc_rqst *, int);
77void nfsd_cache_update(struct svc_rqst *, int, __be32 *); 77void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
78 78
79#ifdef CONFIG_NFSD_V4
80void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
81#else /* CONFIG_NFSD_V4 */
82static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
83{
84}
85#endif /* CONFIG_NFSD_V4 */
86
79#endif /* NFSCACHE_H */ 87#endif /* NFSCACHE_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index e19f45991b2e..2b49d676d0c9 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -23,7 +23,7 @@
23/* 23/*
24 * nfsd version 24 * nfsd version
25 */ 25 */
26#define NFSD_SUPPORTED_MINOR_VERSION 0 26#define NFSD_SUPPORTED_MINOR_VERSION 1
27 27
28/* 28/*
29 * Flags for nfsd_permission 29 * Flags for nfsd_permission
@@ -53,6 +53,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
53extern struct svc_program nfsd_program; 53extern struct svc_program nfsd_program;
54extern struct svc_version nfsd_version2, nfsd_version3, 54extern struct svc_version nfsd_version2, nfsd_version3,
55 nfsd_version4; 55 nfsd_version4;
56extern u32 nfsd_supported_minorversion;
56extern struct mutex nfsd_mutex; 57extern struct mutex nfsd_mutex;
57extern struct svc_serv *nfsd_serv; 58extern struct svc_serv *nfsd_serv;
58 59
@@ -105,7 +106,7 @@ void nfsd_close(struct file *);
105__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *, 106__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
106 loff_t, struct kvec *, int, unsigned long *); 107 loff_t, struct kvec *, int, unsigned long *);
107__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, 108__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
108 loff_t, struct kvec *,int, unsigned long, int *); 109 loff_t, struct kvec *,int, unsigned long *, int *);
109__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, 110__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
110 char *, int *); 111 char *, int *);
111__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, 112__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
@@ -149,6 +150,7 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
149 150
150enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; 151enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
151int nfsd_vers(int vers, enum vers_op change); 152int nfsd_vers(int vers, enum vers_op change);
153int nfsd_minorversion(u32 minorversion, enum vers_op change);
152void nfsd_reset_versions(void); 154void nfsd_reset_versions(void);
153int nfsd_create_serv(void); 155int nfsd_create_serv(void);
154 156
@@ -186,78 +188,119 @@ void nfsd_lockd_shutdown(void);
186/* 188/*
187 * These macros provide pre-xdr'ed values for faster operation. 189 * These macros provide pre-xdr'ed values for faster operation.
188 */ 190 */
189#define nfs_ok __constant_htonl(NFS_OK) 191#define nfs_ok cpu_to_be32(NFS_OK)
190#define nfserr_perm __constant_htonl(NFSERR_PERM) 192#define nfserr_perm cpu_to_be32(NFSERR_PERM)
191#define nfserr_noent __constant_htonl(NFSERR_NOENT) 193#define nfserr_noent cpu_to_be32(NFSERR_NOENT)
192#define nfserr_io __constant_htonl(NFSERR_IO) 194#define nfserr_io cpu_to_be32(NFSERR_IO)
193#define nfserr_nxio __constant_htonl(NFSERR_NXIO) 195#define nfserr_nxio cpu_to_be32(NFSERR_NXIO)
194#define nfserr_eagain __constant_htonl(NFSERR_EAGAIN) 196#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN)
195#define nfserr_acces __constant_htonl(NFSERR_ACCES) 197#define nfserr_acces cpu_to_be32(NFSERR_ACCES)
196#define nfserr_exist __constant_htonl(NFSERR_EXIST) 198#define nfserr_exist cpu_to_be32(NFSERR_EXIST)
197#define nfserr_xdev __constant_htonl(NFSERR_XDEV) 199#define nfserr_xdev cpu_to_be32(NFSERR_XDEV)
198#define nfserr_nodev __constant_htonl(NFSERR_NODEV) 200#define nfserr_nodev cpu_to_be32(NFSERR_NODEV)
199#define nfserr_notdir __constant_htonl(NFSERR_NOTDIR) 201#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR)
200#define nfserr_isdir __constant_htonl(NFSERR_ISDIR) 202#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR)
201#define nfserr_inval __constant_htonl(NFSERR_INVAL) 203#define nfserr_inval cpu_to_be32(NFSERR_INVAL)
202#define nfserr_fbig __constant_htonl(NFSERR_FBIG) 204#define nfserr_fbig cpu_to_be32(NFSERR_FBIG)
203#define nfserr_nospc __constant_htonl(NFSERR_NOSPC) 205#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
204#define nfserr_rofs __constant_htonl(NFSERR_ROFS) 206#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
205#define nfserr_mlink __constant_htonl(NFSERR_MLINK) 207#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
206#define nfserr_opnotsupp __constant_htonl(NFSERR_OPNOTSUPP) 208#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
207#define nfserr_nametoolong __constant_htonl(NFSERR_NAMETOOLONG) 209#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
208#define nfserr_notempty __constant_htonl(NFSERR_NOTEMPTY) 210#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
209#define nfserr_dquot __constant_htonl(NFSERR_DQUOT) 211#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
210#define nfserr_stale __constant_htonl(NFSERR_STALE) 212#define nfserr_stale cpu_to_be32(NFSERR_STALE)
211#define nfserr_remote __constant_htonl(NFSERR_REMOTE) 213#define nfserr_remote cpu_to_be32(NFSERR_REMOTE)
212#define nfserr_wflush __constant_htonl(NFSERR_WFLUSH) 214#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH)
213#define nfserr_badhandle __constant_htonl(NFSERR_BADHANDLE) 215#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE)
214#define nfserr_notsync __constant_htonl(NFSERR_NOT_SYNC) 216#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC)
215#define nfserr_badcookie __constant_htonl(NFSERR_BAD_COOKIE) 217#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE)
216#define nfserr_notsupp __constant_htonl(NFSERR_NOTSUPP) 218#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP)
217#define nfserr_toosmall __constant_htonl(NFSERR_TOOSMALL) 219#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL)
218#define nfserr_serverfault __constant_htonl(NFSERR_SERVERFAULT) 220#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT)
219#define nfserr_badtype __constant_htonl(NFSERR_BADTYPE) 221#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE)
220#define nfserr_jukebox __constant_htonl(NFSERR_JUKEBOX) 222#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX)
221#define nfserr_denied __constant_htonl(NFSERR_DENIED) 223#define nfserr_denied cpu_to_be32(NFSERR_DENIED)
222#define nfserr_deadlock __constant_htonl(NFSERR_DEADLOCK) 224#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK)
223#define nfserr_expired __constant_htonl(NFSERR_EXPIRED) 225#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED)
224#define nfserr_bad_cookie __constant_htonl(NFSERR_BAD_COOKIE) 226#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE)
225#define nfserr_same __constant_htonl(NFSERR_SAME) 227#define nfserr_same cpu_to_be32(NFSERR_SAME)
226#define nfserr_clid_inuse __constant_htonl(NFSERR_CLID_INUSE) 228#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE)
227#define nfserr_stale_clientid __constant_htonl(NFSERR_STALE_CLIENTID) 229#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID)
228#define nfserr_resource __constant_htonl(NFSERR_RESOURCE) 230#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE)
229#define nfserr_moved __constant_htonl(NFSERR_MOVED) 231#define nfserr_moved cpu_to_be32(NFSERR_MOVED)
230#define nfserr_nofilehandle __constant_htonl(NFSERR_NOFILEHANDLE) 232#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE)
231#define nfserr_minor_vers_mismatch __constant_htonl(NFSERR_MINOR_VERS_MISMATCH) 233#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
232#define nfserr_share_denied __constant_htonl(NFSERR_SHARE_DENIED) 234#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)
233#define nfserr_stale_stateid __constant_htonl(NFSERR_STALE_STATEID) 235#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID)
234#define nfserr_old_stateid __constant_htonl(NFSERR_OLD_STATEID) 236#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID)
235#define nfserr_bad_stateid __constant_htonl(NFSERR_BAD_STATEID) 237#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID)
236#define nfserr_bad_seqid __constant_htonl(NFSERR_BAD_SEQID) 238#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID)
237#define nfserr_symlink __constant_htonl(NFSERR_SYMLINK) 239#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK)
238#define nfserr_not_same __constant_htonl(NFSERR_NOT_SAME) 240#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME)
239#define nfserr_restorefh __constant_htonl(NFSERR_RESTOREFH) 241#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH)
240#define nfserr_attrnotsupp __constant_htonl(NFSERR_ATTRNOTSUPP) 242#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP)
241#define nfserr_bad_xdr __constant_htonl(NFSERR_BAD_XDR) 243#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR)
242#define nfserr_openmode __constant_htonl(NFSERR_OPENMODE) 244#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE)
243#define nfserr_locks_held __constant_htonl(NFSERR_LOCKS_HELD) 245#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD)
244#define nfserr_op_illegal __constant_htonl(NFSERR_OP_ILLEGAL) 246#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL)
245#define nfserr_grace __constant_htonl(NFSERR_GRACE) 247#define nfserr_grace cpu_to_be32(NFSERR_GRACE)
246#define nfserr_no_grace __constant_htonl(NFSERR_NO_GRACE) 248#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
247#define nfserr_reclaim_bad __constant_htonl(NFSERR_RECLAIM_BAD) 249#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
248#define nfserr_badname __constant_htonl(NFSERR_BADNAME) 250#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
249#define nfserr_cb_path_down __constant_htonl(NFSERR_CB_PATH_DOWN) 251#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
250#define nfserr_locked __constant_htonl(NFSERR_LOCKED) 252#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
251#define nfserr_wrongsec __constant_htonl(NFSERR_WRONGSEC) 253#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
252#define nfserr_replay_me __constant_htonl(NFSERR_REPLAY_ME) 254#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE)
255#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT)
256#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
257#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION)
258#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT)
259#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
260#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
261#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
262#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
263#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
264#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
265#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
266#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT)
267#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
268#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
269#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS)
270#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
271#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG)
272#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
273#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
274#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
275#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
276#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
277#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
278#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
279#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
280#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
281#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
282#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION)
283#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
284#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
285#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
286#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED)
287#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE)
288#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
289#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG)
290#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT)
291#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
253 292
254/* error codes for internal use */ 293/* error codes for internal use */
255/* if a request fails due to kmalloc failure, it gets dropped. 294/* if a request fails due to kmalloc failure, it gets dropped.
256 * Client should resend eventually 295 * Client should resend eventually
257 */ 296 */
258#define nfserr_dropit __constant_htonl(30000) 297#define nfserr_dropit cpu_to_be32(30000)
259/* end-of-file indicator in readdir */ 298/* end-of-file indicator in readdir */
260#define nfserr_eof __constant_htonl(30001) 299#define nfserr_eof cpu_to_be32(30001)
300/* replay detected */
301#define nfserr_replay_me cpu_to_be32(11001)
302/* nfs41 replay detected */
303#define nfserr_replay_cache cpu_to_be32(11002)
261 304
262/* Check for dir entries '.' and '..' */ 305/* Check for dir entries '.' and '..' */
263#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) 306#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
@@ -300,7 +343,7 @@ extern struct timeval nfssvc_boot;
300 * TIME_BACKUP (unlikely to be supported any time soon) 343 * TIME_BACKUP (unlikely to be supported any time soon)
301 * TIME_CREATE (unlikely to be supported any time soon) 344 * TIME_CREATE (unlikely to be supported any time soon)
302 */ 345 */
303#define NFSD_SUPPORTED_ATTRS_WORD0 \ 346#define NFSD4_SUPPORTED_ATTRS_WORD0 \
304(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \ 347(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \
305 | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \ 348 | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \
306 | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \ 349 | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \
@@ -312,7 +355,7 @@ extern struct timeval nfssvc_boot;
312 | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \ 355 | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \
313 | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL) 356 | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL)
314 357
315#define NFSD_SUPPORTED_ATTRS_WORD1 \ 358#define NFSD4_SUPPORTED_ATTRS_WORD1 \
316(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \ 359(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \
317 | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \ 360 | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
318 | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \ 361 | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
@@ -320,6 +363,35 @@ extern struct timeval nfssvc_boot;
320 | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \ 363 | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
321 | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID) 364 | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
322 365
366#define NFSD4_SUPPORTED_ATTRS_WORD2 0
367
368#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
369 NFSD4_SUPPORTED_ATTRS_WORD0
370
371#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
372 NFSD4_SUPPORTED_ATTRS_WORD1
373
374#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
375 (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
376
377static inline u32 nfsd_suppattrs0(u32 minorversion)
378{
379 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
380 : NFSD4_SUPPORTED_ATTRS_WORD0;
381}
382
383static inline u32 nfsd_suppattrs1(u32 minorversion)
384{
385 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
386 : NFSD4_SUPPORTED_ATTRS_WORD1;
387}
388
389static inline u32 nfsd_suppattrs2(u32 minorversion)
390{
391 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
392 : NFSD4_SUPPORTED_ATTRS_WORD2;
393}
394
323/* These will return ERR_INVAL if specified in GETATTR or READDIR. */ 395/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
324#define NFSD_WRITEONLY_ATTRS_WORD1 \ 396#define NFSD_WRITEONLY_ATTRS_WORD1 \
325(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) 397(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
@@ -330,6 +402,19 @@ extern struct timeval nfssvc_boot;
330#define NFSD_WRITEABLE_ATTRS_WORD1 \ 402#define NFSD_WRITEABLE_ATTRS_WORD1 \
331(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ 403(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
332 | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) 404 | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
405#define NFSD_WRITEABLE_ATTRS_WORD2 0
406
407#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
408 NFSD_WRITEABLE_ATTRS_WORD0
409/*
410 * we currently store the exclusive create verifier in the v_{a,m}time
411 * attributes so the client can't set these at create time using EXCLUSIVE4_1
412 */
413#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
414 (NFSD_WRITEABLE_ATTRS_WORD1 & \
415 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
416#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
417 NFSD_WRITEABLE_ATTRS_WORD2
333 418
334#endif /* CONFIG_NFSD_V4 */ 419#endif /* CONFIG_NFSD_V4 */
335 420
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index fa317f6c154b..afa19016c4a8 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -269,6 +269,13 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src)
269 return dst; 269 return dst;
270} 270}
271 271
272static inline void
273fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
274{
275 dst->fh_size = src->fh_size;
276 memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
277}
278
272static __inline__ struct svc_fh * 279static __inline__ struct svc_fh *
273fh_init(struct svc_fh *fhp, int maxsize) 280fh_init(struct svc_fh *fhp, int maxsize)
274{ 281{
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 128298c0362d..4d61c873feed 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -66,8 +66,7 @@ struct nfs4_cb_recall {
66 u32 cbr_ident; 66 u32 cbr_ident;
67 int cbr_trunc; 67 int cbr_trunc;
68 stateid_t cbr_stateid; 68 stateid_t cbr_stateid;
69 u32 cbr_fhlen; 69 struct knfsd_fh cbr_fh;
70 char cbr_fhval[NFS4_FHSIZE];
71 struct nfs4_delegation *cbr_dp; 70 struct nfs4_delegation *cbr_dp;
72}; 71};
73 72
@@ -86,8 +85,7 @@ struct nfs4_delegation {
86}; 85};
87 86
88#define dl_stateid dl_recall.cbr_stateid 87#define dl_stateid dl_recall.cbr_stateid
89#define dl_fhlen dl_recall.cbr_fhlen 88#define dl_fh dl_recall.cbr_fh
90#define dl_fhval dl_recall.cbr_fhval
91 89
92/* client delegation callback info */ 90/* client delegation callback info */
93struct nfs4_callback { 91struct nfs4_callback {
@@ -101,6 +99,64 @@ struct nfs4_callback {
101 struct rpc_clnt * cb_client; 99 struct rpc_clnt * cb_client;
102}; 100};
103 101
102/* Maximum number of slots per session. 128 is useful for long haul TCP */
103#define NFSD_MAX_SLOTS_PER_SESSION 128
104/* Maximum number of pages per slot cache entry */
105#define NFSD_PAGES_PER_SLOT 1
106/* Maximum number of operations per session compound */
107#define NFSD_MAX_OPS_PER_COMPOUND 16
108
109struct nfsd4_cache_entry {
110 __be32 ce_status;
111 struct kvec ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */
112 struct page *ce_respages[NFSD_PAGES_PER_SLOT + 1];
113 int ce_cachethis;
114 short ce_resused;
115 int ce_opcnt;
116 int ce_rpchdrlen;
117};
118
119struct nfsd4_slot {
120 bool sl_inuse;
121 u32 sl_seqid;
122 struct nfsd4_cache_entry sl_cache_entry;
123};
124
125struct nfsd4_session {
126 struct kref se_ref;
127 struct list_head se_hash; /* hash by sessionid */
128 struct list_head se_perclnt;
129 u32 se_flags;
130 struct nfs4_client *se_client; /* for expire_client */
131 struct nfs4_sessionid se_sessionid;
132 u32 se_fmaxreq_sz;
133 u32 se_fmaxresp_sz;
134 u32 se_fmaxresp_cached;
135 u32 se_fmaxops;
136 u32 se_fnumslots;
137 struct nfsd4_slot se_slots[]; /* forward channel slots */
138};
139
140static inline void
141nfsd4_put_session(struct nfsd4_session *ses)
142{
143 extern void free_session(struct kref *kref);
144 kref_put(&ses->se_ref, free_session);
145}
146
147static inline void
148nfsd4_get_session(struct nfsd4_session *ses)
149{
150 kref_get(&ses->se_ref);
151}
152
153/* formatted contents of nfs4_sessionid */
154struct nfsd4_sessionid {
155 clientid_t clientid;
156 u32 sequence;
157 u32 reserved;
158};
159
104#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */ 160#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
105 161
106/* 162/*
@@ -132,6 +188,12 @@ struct nfs4_client {
132 struct nfs4_callback cl_callback; /* callback info */ 188 struct nfs4_callback cl_callback; /* callback info */
133 atomic_t cl_count; /* ref count */ 189 atomic_t cl_count; /* ref count */
134 u32 cl_firststate; /* recovery dir creation */ 190 u32 cl_firststate; /* recovery dir creation */
191
192 /* for nfs41 */
193 struct list_head cl_sessions;
194 struct nfsd4_slot cl_slot; /* create_session slot */
195 u32 cl_exchange_flags;
196 struct nfs4_sessionid cl_sessionid;
135}; 197};
136 198
137/* struct nfs4_client_reset 199/* struct nfs4_client_reset
@@ -168,8 +230,7 @@ struct nfs4_replay {
168 unsigned int rp_buflen; 230 unsigned int rp_buflen;
169 char *rp_buf; 231 char *rp_buf;
170 unsigned intrp_allocated; 232 unsigned intrp_allocated;
171 int rp_openfh_len; 233 struct knfsd_fh rp_openfh;
172 char rp_openfh[NFS4_FHSIZE];
173 char rp_ibuf[NFSD4_REPLAY_ISIZE]; 234 char rp_ibuf[NFSD4_REPLAY_ISIZE];
174}; 235};
175 236
@@ -217,7 +278,7 @@ struct nfs4_stateowner {
217* share_acces, share_deny on the file. 278* share_acces, share_deny on the file.
218*/ 279*/
219struct nfs4_file { 280struct nfs4_file {
220 struct kref fi_ref; 281 atomic_t fi_ref;
221 struct list_head fi_hash; /* hash by "struct inode *" */ 282 struct list_head fi_hash; /* hash by "struct inode *" */
222 struct list_head fi_stateids; 283 struct list_head fi_stateids;
223 struct list_head fi_delegations; 284 struct list_head fi_delegations;
@@ -259,14 +320,13 @@ struct nfs4_stateid {
259}; 320};
260 321
261/* flags for preprocess_seqid_op() */ 322/* flags for preprocess_seqid_op() */
262#define CHECK_FH 0x00000001 323#define HAS_SESSION 0x00000001
263#define CONFIRM 0x00000002 324#define CONFIRM 0x00000002
264#define OPEN_STATE 0x00000004 325#define OPEN_STATE 0x00000004
265#define LOCK_STATE 0x00000008 326#define LOCK_STATE 0x00000008
266#define RD_STATE 0x00000010 327#define RD_STATE 0x00000010
267#define WR_STATE 0x00000020 328#define WR_STATE 0x00000020
268#define CLOSE_STATE 0x00000040 329#define CLOSE_STATE 0x00000040
269#define DELEG_RET 0x00000080
270 330
271#define seqid_mutating_err(err) \ 331#define seqid_mutating_err(err) \
272 (((err) != nfserr_stale_clientid) && \ 332 (((err) != nfserr_stale_clientid) && \
@@ -274,7 +334,9 @@ struct nfs4_stateid {
274 ((err) != nfserr_stale_stateid) && \ 334 ((err) != nfserr_stale_stateid) && \
275 ((err) != nfserr_bad_stateid)) 335 ((err) != nfserr_bad_stateid))
276 336
277extern __be32 nfs4_preprocess_stateid_op(struct svc_fh *current_fh, 337struct nfsd4_compound_state;
338
339extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
278 stateid_t *stateid, int flags, struct file **filp); 340 stateid_t *stateid, int flags, struct file **filp);
279extern void nfs4_lock_state(void); 341extern void nfs4_lock_state(void);
280extern void nfs4_unlock_state(void); 342extern void nfs4_unlock_state(void);
@@ -290,7 +352,7 @@ extern void nfsd4_init_recdir(char *recdir_name);
290extern int nfsd4_recdir_load(void); 352extern int nfsd4_recdir_load(void);
291extern void nfsd4_shutdown_recdir(void); 353extern void nfsd4_shutdown_recdir(void);
292extern int nfs4_client_to_reclaim(const char *name); 354extern int nfs4_client_to_reclaim(const char *name);
293extern int nfs4_has_reclaimed_state(const char *name); 355extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
294extern void nfsd4_recdir_purge_old(void); 356extern void nfsd4_recdir_purge_old(void);
295extern int nfsd4_create_clid_dir(struct nfs4_client *clp); 357extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
296extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); 358extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
diff --git a/include/linux/nfsd/stats.h b/include/linux/nfsd/stats.h
index 7678cfbe9960..2693ef647df6 100644
--- a/include/linux/nfsd/stats.h
+++ b/include/linux/nfsd/stats.h
@@ -11,6 +11,11 @@
11 11
12#include <linux/nfs4.h> 12#include <linux/nfs4.h>
13 13
14/* thread usage wraps very million seconds (approx one fortnight) */
15#define NFSD_USAGE_WRAP (HZ*1000000)
16
17#ifdef __KERNEL__
18
14struct nfsd_stats { 19struct nfsd_stats {
15 unsigned int rchits; /* repcache hits */ 20 unsigned int rchits; /* repcache hits */
16 unsigned int rcmisses; /* repcache hits */ 21 unsigned int rcmisses; /* repcache hits */
@@ -35,10 +40,6 @@ struct nfsd_stats {
35 40
36}; 41};
37 42
38/* thread usage wraps very million seconds (approx one fortnight) */
39#define NFSD_USAGE_WRAP (HZ*1000000)
40
41#ifdef __KERNEL__
42 43
43extern struct nfsd_stats nfsdstats; 44extern struct nfsd_stats nfsdstats;
44extern struct svc_stat nfsd_svcstats; 45extern struct svc_stat nfsd_svcstats;
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 27bd3e38ec5a..f80d6013fdc3 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -45,10 +45,22 @@
45#define XDR_LEN(n) (((n) + 3) & ~3) 45#define XDR_LEN(n) (((n) + 3) & ~3)
46 46
47struct nfsd4_compound_state { 47struct nfsd4_compound_state {
48 struct svc_fh current_fh; 48 struct svc_fh current_fh;
49 struct svc_fh save_fh; 49 struct svc_fh save_fh;
50 struct nfs4_stateowner *replay_owner; 50 struct nfs4_stateowner *replay_owner;
51}; 51 /* For sessions DRC */
52 struct nfsd4_session *session;
53 struct nfsd4_slot *slot;
54 __be32 *statp;
55 size_t iovlen;
56 u32 minorversion;
57 u32 status;
58};
59
60static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
61{
62 return cs->slot != NULL;
63}
52 64
53struct nfsd4_change_info { 65struct nfsd4_change_info {
54 u32 atomic; 66 u32 atomic;
@@ -90,7 +102,7 @@ struct nfsd4_create {
90 u32 specdata2; 102 u32 specdata2;
91 } dev; /* NF4BLK, NF4CHR */ 103 } dev; /* NF4BLK, NF4CHR */
92 } u; 104 } u;
93 u32 cr_bmval[2]; /* request */ 105 u32 cr_bmval[3]; /* request */
94 struct iattr cr_iattr; /* request */ 106 struct iattr cr_iattr; /* request */
95 struct nfsd4_change_info cr_cinfo; /* response */ 107 struct nfsd4_change_info cr_cinfo; /* response */
96 struct nfs4_acl *cr_acl; 108 struct nfs4_acl *cr_acl;
@@ -105,7 +117,7 @@ struct nfsd4_delegreturn {
105}; 117};
106 118
107struct nfsd4_getattr { 119struct nfsd4_getattr {
108 u32 ga_bmval[2]; /* request */ 120 u32 ga_bmval[3]; /* request */
109 struct svc_fh *ga_fhp; /* response */ 121 struct svc_fh *ga_fhp; /* response */
110}; 122};
111 123
@@ -206,11 +218,9 @@ struct nfsd4_open {
206 stateid_t op_delegate_stateid; /* request - response */ 218 stateid_t op_delegate_stateid; /* request - response */
207 u32 op_create; /* request */ 219 u32 op_create; /* request */
208 u32 op_createmode; /* request */ 220 u32 op_createmode; /* request */
209 u32 op_bmval[2]; /* request */ 221 u32 op_bmval[3]; /* request */
210 union { /* request */ 222 struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
211 struct iattr iattr; /* UNCHECKED4,GUARDED4 */ 223 nfs4_verifier verf; /* EXCLUSIVE4 */
212 nfs4_verifier verf; /* EXCLUSIVE4 */
213 } u;
214 clientid_t op_clientid; /* request */ 224 clientid_t op_clientid; /* request */
215 struct xdr_netobj op_owner; /* request */ 225 struct xdr_netobj op_owner; /* request */
216 u32 op_seqid; /* request */ 226 u32 op_seqid; /* request */
@@ -224,8 +234,8 @@ struct nfsd4_open {
224 struct nfs4_stateowner *op_stateowner; /* used during processing */ 234 struct nfs4_stateowner *op_stateowner; /* used during processing */
225 struct nfs4_acl *op_acl; 235 struct nfs4_acl *op_acl;
226}; 236};
227#define op_iattr u.iattr 237#define op_iattr iattr
228#define op_verf u.verf 238#define op_verf verf
229 239
230struct nfsd4_open_confirm { 240struct nfsd4_open_confirm {
231 stateid_t oc_req_stateid /* request */; 241 stateid_t oc_req_stateid /* request */;
@@ -259,7 +269,7 @@ struct nfsd4_readdir {
259 nfs4_verifier rd_verf; /* request */ 269 nfs4_verifier rd_verf; /* request */
260 u32 rd_dircount; /* request */ 270 u32 rd_dircount; /* request */
261 u32 rd_maxcount; /* request */ 271 u32 rd_maxcount; /* request */
262 u32 rd_bmval[2]; /* request */ 272 u32 rd_bmval[3]; /* request */
263 struct svc_rqst *rd_rqstp; /* response */ 273 struct svc_rqst *rd_rqstp; /* response */
264 struct svc_fh * rd_fhp; /* response */ 274 struct svc_fh * rd_fhp; /* response */
265 275
@@ -301,7 +311,7 @@ struct nfsd4_secinfo {
301 311
302struct nfsd4_setattr { 312struct nfsd4_setattr {
303 stateid_t sa_stateid; /* request */ 313 stateid_t sa_stateid; /* request */
304 u32 sa_bmval[2]; /* request */ 314 u32 sa_bmval[3]; /* request */
305 struct iattr sa_iattr; /* request */ 315 struct iattr sa_iattr; /* request */
306 struct nfs4_acl *sa_acl; 316 struct nfs4_acl *sa_acl;
307}; 317};
@@ -327,7 +337,7 @@ struct nfsd4_setclientid_confirm {
327 337
328/* also used for NVERIFY */ 338/* also used for NVERIFY */
329struct nfsd4_verify { 339struct nfsd4_verify {
330 u32 ve_bmval[2]; /* request */ 340 u32 ve_bmval[3]; /* request */
331 u32 ve_attrlen; /* request */ 341 u32 ve_attrlen; /* request */
332 char * ve_attrval; /* request */ 342 char * ve_attrval; /* request */
333}; 343};
@@ -344,6 +354,54 @@ struct nfsd4_write {
344 nfs4_verifier wr_verifier; /* response */ 354 nfs4_verifier wr_verifier; /* response */
345}; 355};
346 356
357struct nfsd4_exchange_id {
358 nfs4_verifier verifier;
359 struct xdr_netobj clname;
360 u32 flags;
361 clientid_t clientid;
362 u32 seqid;
363 int spa_how;
364};
365
366struct nfsd4_channel_attrs {
367 u32 headerpadsz;
368 u32 maxreq_sz;
369 u32 maxresp_sz;
370 u32 maxresp_cached;
371 u32 maxops;
372 u32 maxreqs;
373 u32 nr_rdma_attrs;
374 u32 rdma_attrs;
375};
376
377struct nfsd4_create_session {
378 clientid_t clientid;
379 struct nfs4_sessionid sessionid;
380 u32 seqid;
381 u32 flags;
382 struct nfsd4_channel_attrs fore_channel;
383 struct nfsd4_channel_attrs back_channel;
384 u32 callback_prog;
385 u32 uid;
386 u32 gid;
387};
388
389struct nfsd4_sequence {
390 struct nfs4_sessionid sessionid; /* request/response */
391 u32 seqid; /* request/response */
392 u32 slotid; /* request/response */
393 u32 maxslots; /* request/response */
394 u32 cachethis; /* request */
395#if 0
396 u32 target_maxslots; /* response */
397 u32 status_flags; /* response */
398#endif /* not yet */
399};
400
401struct nfsd4_destroy_session {
402 struct nfs4_sessionid sessionid;
403};
404
347struct nfsd4_op { 405struct nfsd4_op {
348 int opnum; 406 int opnum;
349 __be32 status; 407 __be32 status;
@@ -378,6 +436,12 @@ struct nfsd4_op {
378 struct nfsd4_verify verify; 436 struct nfsd4_verify verify;
379 struct nfsd4_write write; 437 struct nfsd4_write write;
380 struct nfsd4_release_lockowner release_lockowner; 438 struct nfsd4_release_lockowner release_lockowner;
439
440 /* NFSv4.1 */
441 struct nfsd4_exchange_id exchange_id;
442 struct nfsd4_create_session create_session;
443 struct nfsd4_destroy_session destroy_session;
444 struct nfsd4_sequence sequence;
381 } u; 445 } u;
382 struct nfs4_replay * replay; 446 struct nfs4_replay * replay;
383}; 447};
@@ -416,9 +480,22 @@ struct nfsd4_compoundres {
416 u32 taglen; 480 u32 taglen;
417 char * tag; 481 char * tag;
418 u32 opcnt; 482 u32 opcnt;
419 __be32 * tagp; /* where to encode tag and opcount */ 483 __be32 * tagp; /* tag, opcount encode location */
484 struct nfsd4_compound_state cstate;
420}; 485};
421 486
487static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
488{
489 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
490 return args->opcnt == 1;
491}
492
493static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
494{
495 return !resp->cstate.slot->sl_cache_entry.ce_cachethis ||
496 nfsd4_is_solo_sequence(resp);
497}
498
422#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) 499#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
423 500
424static inline void 501static inline void
@@ -448,7 +525,23 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
448extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, 525extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
449 struct nfsd4_compound_state *, 526 struct nfsd4_compound_state *,
450 struct nfsd4_setclientid_confirm *setclientid_confirm); 527 struct nfsd4_setclientid_confirm *setclientid_confirm);
451extern __be32 nfsd4_process_open1(struct nfsd4_open *open); 528extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
529extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
530 struct nfsd4_sequence *seq);
531extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
532 struct nfsd4_compound_state *,
533struct nfsd4_exchange_id *);
534 extern __be32 nfsd4_create_session(struct svc_rqst *,
535 struct nfsd4_compound_state *,
536 struct nfsd4_create_session *);
537extern __be32 nfsd4_sequence(struct svc_rqst *,
538 struct nfsd4_compound_state *,
539 struct nfsd4_sequence *);
540extern __be32 nfsd4_destroy_session(struct svc_rqst *,
541 struct nfsd4_compound_state *,
542 struct nfsd4_destroy_session *);
543extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
544 struct nfsd4_open *open);
452extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, 545extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
453 struct svc_fh *current_fh, struct nfsd4_open *open); 546 struct svc_fh *current_fh, struct nfsd4_open *open);
454extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, 547extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d3a4c0231933..2a30775959e9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -24,6 +24,15 @@
24 */ 24 */
25typedef int (*svc_thread_fn)(void *); 25typedef int (*svc_thread_fn)(void *);
26 26
27/* statistics for svc_pool structures */
28struct svc_pool_stats {
29 unsigned long packets;
30 unsigned long sockets_queued;
31 unsigned long threads_woken;
32 unsigned long overloads_avoided;
33 unsigned long threads_timedout;
34};
35
27/* 36/*
28 * 37 *
29 * RPC service thread pool. 38 * RPC service thread pool.
@@ -41,6 +50,8 @@ struct svc_pool {
41 struct list_head sp_sockets; /* pending sockets */ 50 struct list_head sp_sockets; /* pending sockets */
42 unsigned int sp_nrthreads; /* # of threads in pool */ 51 unsigned int sp_nrthreads; /* # of threads in pool */
43 struct list_head sp_all_threads; /* all server threads */ 52 struct list_head sp_all_threads; /* all server threads */
53 int sp_nwaking; /* number of threads woken but not yet active */
54 struct svc_pool_stats sp_stats; /* statistics on pool operation */
44} ____cacheline_aligned_in_smp; 55} ____cacheline_aligned_in_smp;
45 56
46/* 57/*
@@ -83,6 +94,8 @@ struct svc_serv {
83 struct module * sv_module; /* optional module to count when 94 struct module * sv_module; /* optional module to count when
84 * adding threads */ 95 * adding threads */
85 svc_thread_fn sv_function; /* main function for threads */ 96 svc_thread_fn sv_function; /* main function for threads */
97 unsigned int sv_drc_max_pages; /* Total pages for DRC */
98 unsigned int sv_drc_pages_used;/* DRC pages used */
86}; 99};
87 100
88/* 101/*
@@ -218,6 +231,7 @@ struct svc_rqst {
218 struct svc_cred rq_cred; /* auth info */ 231 struct svc_cred rq_cred; /* auth info */
219 void * rq_xprt_ctxt; /* transport specific context ptr */ 232 void * rq_xprt_ctxt; /* transport specific context ptr */
220 struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ 233 struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
234 int rq_usedeferral; /* use deferral */
221 235
222 size_t rq_xprt_hlen; /* xprt header len */ 236 size_t rq_xprt_hlen; /* xprt header len */
223 struct xdr_buf rq_arg; 237 struct xdr_buf rq_arg;
@@ -263,6 +277,7 @@ struct svc_rqst {
263 * cache pages */ 277 * cache pages */
264 wait_queue_head_t rq_wait; /* synchronization */ 278 wait_queue_head_t rq_wait; /* synchronization */
265 struct task_struct *rq_task; /* service thread */ 279 struct task_struct *rq_task; /* service thread */
280 int rq_waking; /* 1 if thread is being woken */
266}; 281};
267 282
268/* 283/*
@@ -393,6 +408,7 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
393 void (*shutdown)(struct svc_serv *), 408 void (*shutdown)(struct svc_serv *),
394 svc_thread_fn, struct module *); 409 svc_thread_fn, struct module *);
395int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); 410int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
411int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
396void svc_destroy(struct svc_serv *); 412void svc_destroy(struct svc_serv *);
397int svc_process(struct svc_rqst *); 413int svc_process(struct svc_rqst *);
398int svc_register(const struct svc_serv *, const int, 414int svc_register(const struct svc_serv *, const int,
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 49e1eb454465..d8910b68e1bd 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -69,27 +69,27 @@ struct xdr_buf {
69 * pre-xdr'ed macros. 69 * pre-xdr'ed macros.
70 */ 70 */
71 71
72#define xdr_zero __constant_htonl(0) 72#define xdr_zero cpu_to_be32(0)
73#define xdr_one __constant_htonl(1) 73#define xdr_one cpu_to_be32(1)
74#define xdr_two __constant_htonl(2) 74#define xdr_two cpu_to_be32(2)
75 75
76#define rpc_success __constant_htonl(RPC_SUCCESS) 76#define rpc_success cpu_to_be32(RPC_SUCCESS)
77#define rpc_prog_unavail __constant_htonl(RPC_PROG_UNAVAIL) 77#define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL)
78#define rpc_prog_mismatch __constant_htonl(RPC_PROG_MISMATCH) 78#define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH)
79#define rpc_proc_unavail __constant_htonl(RPC_PROC_UNAVAIL) 79#define rpc_proc_unavail cpu_to_be32(RPC_PROC_UNAVAIL)
80#define rpc_garbage_args __constant_htonl(RPC_GARBAGE_ARGS) 80#define rpc_garbage_args cpu_to_be32(RPC_GARBAGE_ARGS)
81#define rpc_system_err __constant_htonl(RPC_SYSTEM_ERR) 81#define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR)
82#define rpc_drop_reply __constant_htonl(RPC_DROP_REPLY) 82#define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY)
83 83
84#define rpc_auth_ok __constant_htonl(RPC_AUTH_OK) 84#define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK)
85#define rpc_autherr_badcred __constant_htonl(RPC_AUTH_BADCRED) 85#define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED)
86#define rpc_autherr_rejectedcred __constant_htonl(RPC_AUTH_REJECTEDCRED) 86#define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED)
87#define rpc_autherr_badverf __constant_htonl(RPC_AUTH_BADVERF) 87#define rpc_autherr_badverf cpu_to_be32(RPC_AUTH_BADVERF)
88#define rpc_autherr_rejectedverf __constant_htonl(RPC_AUTH_REJECTEDVERF) 88#define rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF)
89#define rpc_autherr_tooweak __constant_htonl(RPC_AUTH_TOOWEAK) 89#define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK)
90#define rpcsec_gsserr_credproblem __constant_htonl(RPCSEC_GSS_CREDPROBLEM) 90#define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM)
91#define rpcsec_gsserr_ctxproblem __constant_htonl(RPCSEC_GSS_CTXPROBLEM) 91#define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM)
92#define rpc_autherr_oldseqnum __constant_htonl(101) 92#define rpc_autherr_oldseqnum cpu_to_be32(101)
93 93
94/* 94/*
95 * Miscellaneous XDR helper functions 95 * Miscellaneous XDR helper functions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 9b49a6ab8ded..8847add6ca16 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1008,6 +1008,8 @@ svc_process(struct svc_rqst *rqstp)
1008 rqstp->rq_res.tail[0].iov_len = 0; 1008 rqstp->rq_res.tail[0].iov_len = 0;
1009 /* Will be turned off only in gss privacy case: */ 1009 /* Will be turned off only in gss privacy case: */
1010 rqstp->rq_splice_ok = 1; 1010 rqstp->rq_splice_ok = 1;
1011 /* Will be turned off only when NFSv4 Sessions are used */
1012 rqstp->rq_usedeferral = 1;
1011 1013
1012 /* Setup reply header */ 1014 /* Setup reply header */
1013 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); 1015 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
@@ -1078,7 +1080,6 @@ svc_process(struct svc_rqst *rqstp)
1078 procp = versp->vs_proc + proc; 1080 procp = versp->vs_proc + proc;
1079 if (proc >= versp->vs_nproc || !procp->pc_func) 1081 if (proc >= versp->vs_nproc || !procp->pc_func)
1080 goto err_bad_proc; 1082 goto err_bad_proc;
1081 rqstp->rq_server = serv;
1082 rqstp->rq_procinfo = procp; 1083 rqstp->rq_procinfo = procp;
1083 1084
1084 /* Syntactic check complete */ 1085 /* Syntactic check complete */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2819ee093f36..c200d92e57e4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -14,6 +14,8 @@
14 14
15#define RPCDBG_FACILITY RPCDBG_SVCXPRT 15#define RPCDBG_FACILITY RPCDBG_SVCXPRT
16 16
17#define SVC_MAX_WAKING 5
18
17static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); 19static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
18static int svc_deferred_recv(struct svc_rqst *rqstp); 20static int svc_deferred_recv(struct svc_rqst *rqstp);
19static struct cache_deferred_req *svc_defer(struct cache_req *req); 21static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -301,6 +303,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
301 struct svc_pool *pool; 303 struct svc_pool *pool;
302 struct svc_rqst *rqstp; 304 struct svc_rqst *rqstp;
303 int cpu; 305 int cpu;
306 int thread_avail;
304 307
305 if (!(xprt->xpt_flags & 308 if (!(xprt->xpt_flags &
306 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) 309 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
@@ -312,18 +315,14 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
312 315
313 spin_lock_bh(&pool->sp_lock); 316 spin_lock_bh(&pool->sp_lock);
314 317
315 if (!list_empty(&pool->sp_threads) &&
316 !list_empty(&pool->sp_sockets))
317 printk(KERN_ERR
318 "svc_xprt_enqueue: "
319 "threads and transports both waiting??\n");
320
321 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { 318 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
322 /* Don't enqueue dead transports */ 319 /* Don't enqueue dead transports */
323 dprintk("svc: transport %p is dead, not enqueued\n", xprt); 320 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
324 goto out_unlock; 321 goto out_unlock;
325 } 322 }
326 323
324 pool->sp_stats.packets++;
325
327 /* Mark transport as busy. It will remain in this state until 326 /* Mark transport as busy. It will remain in this state until
328 * the provider calls svc_xprt_received. We update XPT_BUSY 327 * the provider calls svc_xprt_received. We update XPT_BUSY
329 * atomically because it also guards against trying to enqueue 328 * atomically because it also guards against trying to enqueue
@@ -356,7 +355,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
356 } 355 }
357 356
358 process: 357 process:
359 if (!list_empty(&pool->sp_threads)) { 358 /* Work out whether threads are available */
359 thread_avail = !list_empty(&pool->sp_threads); /* threads are asleep */
360 if (pool->sp_nwaking >= SVC_MAX_WAKING) {
361 /* too many threads are runnable and trying to wake up */
362 thread_avail = 0;
363 pool->sp_stats.overloads_avoided++;
364 }
365
366 if (thread_avail) {
360 rqstp = list_entry(pool->sp_threads.next, 367 rqstp = list_entry(pool->sp_threads.next,
361 struct svc_rqst, 368 struct svc_rqst,
362 rq_list); 369 rq_list);
@@ -371,11 +378,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
371 svc_xprt_get(xprt); 378 svc_xprt_get(xprt);
372 rqstp->rq_reserved = serv->sv_max_mesg; 379 rqstp->rq_reserved = serv->sv_max_mesg;
373 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); 380 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
381 rqstp->rq_waking = 1;
382 pool->sp_nwaking++;
383 pool->sp_stats.threads_woken++;
374 BUG_ON(xprt->xpt_pool != pool); 384 BUG_ON(xprt->xpt_pool != pool);
375 wake_up(&rqstp->rq_wait); 385 wake_up(&rqstp->rq_wait);
376 } else { 386 } else {
377 dprintk("svc: transport %p put into queue\n", xprt); 387 dprintk("svc: transport %p put into queue\n", xprt);
378 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); 388 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
389 pool->sp_stats.sockets_queued++;
379 BUG_ON(xprt->xpt_pool != pool); 390 BUG_ON(xprt->xpt_pool != pool);
380 } 391 }
381 392
@@ -588,6 +599,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
588 int pages; 599 int pages;
589 struct xdr_buf *arg; 600 struct xdr_buf *arg;
590 DECLARE_WAITQUEUE(wait, current); 601 DECLARE_WAITQUEUE(wait, current);
602 long time_left;
591 603
592 dprintk("svc: server %p waiting for data (to = %ld)\n", 604 dprintk("svc: server %p waiting for data (to = %ld)\n",
593 rqstp, timeout); 605 rqstp, timeout);
@@ -636,6 +648,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
636 return -EINTR; 648 return -EINTR;
637 649
638 spin_lock_bh(&pool->sp_lock); 650 spin_lock_bh(&pool->sp_lock);
651 if (rqstp->rq_waking) {
652 rqstp->rq_waking = 0;
653 pool->sp_nwaking--;
654 BUG_ON(pool->sp_nwaking < 0);
655 }
639 xprt = svc_xprt_dequeue(pool); 656 xprt = svc_xprt_dequeue(pool);
640 if (xprt) { 657 if (xprt) {
641 rqstp->rq_xprt = xprt; 658 rqstp->rq_xprt = xprt;
@@ -668,12 +685,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
668 add_wait_queue(&rqstp->rq_wait, &wait); 685 add_wait_queue(&rqstp->rq_wait, &wait);
669 spin_unlock_bh(&pool->sp_lock); 686 spin_unlock_bh(&pool->sp_lock);
670 687
671 schedule_timeout(timeout); 688 time_left = schedule_timeout(timeout);
672 689
673 try_to_freeze(); 690 try_to_freeze();
674 691
675 spin_lock_bh(&pool->sp_lock); 692 spin_lock_bh(&pool->sp_lock);
676 remove_wait_queue(&rqstp->rq_wait, &wait); 693 remove_wait_queue(&rqstp->rq_wait, &wait);
694 if (!time_left)
695 pool->sp_stats.threads_timedout++;
677 696
678 xprt = rqstp->rq_xprt; 697 xprt = rqstp->rq_xprt;
679 if (!xprt) { 698 if (!xprt) {
@@ -958,7 +977,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
958 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); 977 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
959 struct svc_deferred_req *dr; 978 struct svc_deferred_req *dr;
960 979
961 if (rqstp->rq_arg.page_len) 980 if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral)
962 return NULL; /* if more than a page, give up FIXME */ 981 return NULL; /* if more than a page, give up FIXME */
963 if (rqstp->rq_deferred) { 982 if (rqstp->rq_deferred) {
964 dr = rqstp->rq_deferred; 983 dr = rqstp->rq_deferred;
@@ -1112,3 +1131,93 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen)
1112 return totlen; 1131 return totlen;
1113} 1132}
1114EXPORT_SYMBOL_GPL(svc_xprt_names); 1133EXPORT_SYMBOL_GPL(svc_xprt_names);
1134
1135
1136/*----------------------------------------------------------------------------*/
1137
1138static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
1139{
1140 unsigned int pidx = (unsigned int)*pos;
1141 struct svc_serv *serv = m->private;
1142
1143 dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
1144
1145 lock_kernel();
1146 /* bump up the pseudo refcount while traversing */
1147 svc_get(serv);
1148 unlock_kernel();
1149
1150 if (!pidx)
1151 return SEQ_START_TOKEN;
1152 return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
1153}
1154
1155static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
1156{
1157 struct svc_pool *pool = p;
1158 struct svc_serv *serv = m->private;
1159
1160 dprintk("svc_pool_stats_next, *pos=%llu\n", *pos);
1161
1162 if (p == SEQ_START_TOKEN) {
1163 pool = &serv->sv_pools[0];
1164 } else {
1165 unsigned int pidx = (pool - &serv->sv_pools[0]);
1166 if (pidx < serv->sv_nrpools-1)
1167 pool = &serv->sv_pools[pidx+1];
1168 else
1169 pool = NULL;
1170 }
1171 ++*pos;
1172 return pool;
1173}
1174
1175static void svc_pool_stats_stop(struct seq_file *m, void *p)
1176{
1177 struct svc_serv *serv = m->private;
1178
1179 lock_kernel();
1180 /* this function really, really should have been called svc_put() */
1181 svc_destroy(serv);
1182 unlock_kernel();
1183}
1184
1185static int svc_pool_stats_show(struct seq_file *m, void *p)
1186{
1187 struct svc_pool *pool = p;
1188
1189 if (p == SEQ_START_TOKEN) {
1190 seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
1191 return 0;
1192 }
1193
1194 seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
1195 pool->sp_id,
1196 pool->sp_stats.packets,
1197 pool->sp_stats.sockets_queued,
1198 pool->sp_stats.threads_woken,
1199 pool->sp_stats.overloads_avoided,
1200 pool->sp_stats.threads_timedout);
1201
1202 return 0;
1203}
1204
1205static const struct seq_operations svc_pool_stats_seq_ops = {
1206 .start = svc_pool_stats_start,
1207 .next = svc_pool_stats_next,
1208 .stop = svc_pool_stats_stop,
1209 .show = svc_pool_stats_show,
1210};
1211
1212int svc_pool_stats_open(struct svc_serv *serv, struct file *file)
1213{
1214 int err;
1215
1216 err = seq_open(file, &svc_pool_stats_seq_ops);
1217 if (!err)
1218 ((struct seq_file *) file->private_data)->private = serv;
1219 return err;
1220}
1221EXPORT_SYMBOL(svc_pool_stats_open);
1222
1223/*----------------------------------------------------------------------------*/
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9d504234af4a..af3198814c15 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -345,7 +345,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
345 lock_sock(sock->sk); 345 lock_sock(sock->sk);
346 sock->sk->sk_sndbuf = snd * 2; 346 sock->sk->sk_sndbuf = snd * 2;
347 sock->sk->sk_rcvbuf = rcv * 2; 347 sock->sk->sk_rcvbuf = rcv * 2;
348 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
349 release_sock(sock->sk); 348 release_sock(sock->sk);
350#endif 349#endif
351} 350}
@@ -797,23 +796,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
797 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), 796 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
798 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); 797 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
799 798
800 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
801 /* sndbuf needs to have room for one request
802 * per thread, otherwise we can stall even when the
803 * network isn't a bottleneck.
804 *
805 * We count all threads rather than threads in a
806 * particular pool, which provides an upper bound
807 * on the number of threads which will access the socket.
808 *
809 * rcvbuf just needs to be able to hold a few requests.
810 * Normally they will be removed from the queue
811 * as soon a a complete request arrives.
812 */
813 svc_sock_setbufsize(svsk->sk_sock,
814 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
815 3 * serv->sv_max_mesg);
816
817 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 799 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
818 800
819 /* Receive data. If we haven't got the record length yet, get 801 /* Receive data. If we haven't got the record length yet, get
@@ -1061,15 +1043,6 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1061 1043
1062 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1044 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1063 1045
1064 /* initialise setting must have enough space to
1065 * receive and respond to one request.
1066 * svc_tcp_recvfrom will re-adjust if necessary
1067 */
1068 svc_sock_setbufsize(svsk->sk_sock,
1069 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1070 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1071
1072 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1073 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1046 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1074 if (sk->sk_state != TCP_ESTABLISHED) 1047 if (sk->sk_state != TCP_ESTABLISHED)
1075 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1048 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1139,8 +1112,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1139 /* Initialize the socket */ 1112 /* Initialize the socket */
1140 if (sock->type == SOCK_DGRAM) 1113 if (sock->type == SOCK_DGRAM)
1141 svc_udp_init(svsk, serv); 1114 svc_udp_init(svsk, serv);
1142 else 1115 else {
1116 /* initialise setting must have enough space to
1117 * receive and respond to one request.
1118 */
1119 svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
1120 4 * serv->sv_max_mesg);
1143 svc_tcp_init(svsk, serv); 1121 svc_tcp_init(svsk, serv);
1122 }
1144 1123
1145 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1124 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1146 svsk, svsk->sk_sk); 1125 svsk, svsk->sk_sk);