aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/nfs
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Makefile15
-rw-r--r--fs/nfs/callback.c187
-rw-r--r--fs/nfs/callback.h70
-rw-r--r--fs/nfs/callback_proc.c85
-rw-r--r--fs/nfs/callback_xdr.c481
-rw-r--r--fs/nfs/delegation.c342
-rw-r--r--fs/nfs/delegation.h57
-rw-r--r--fs/nfs/dir.c1562
-rw-r--r--fs/nfs/direct.c808
-rw-r--r--fs/nfs/file.c484
-rw-r--r--fs/nfs/idmap.c498
-rw-r--r--fs/nfs/inode.c2003
-rw-r--r--fs/nfs/mount_clnt.c183
-rw-r--r--fs/nfs/nfs2xdr.c711
-rw-r--r--fs/nfs/nfs3proc.c859
-rw-r--r--fs/nfs/nfs3xdr.c1023
-rw-r--r--fs/nfs/nfs4proc.c2786
-rw-r--r--fs/nfs/nfs4renewd.c148
-rw-r--r--fs/nfs/nfs4state.c932
-rw-r--r--fs/nfs/nfs4xdr.c4034
-rw-r--r--fs/nfs/nfsroot.c513
-rw-r--r--fs/nfs/pagelist.c309
-rw-r--r--fs/nfs/proc.c655
-rw-r--r--fs/nfs/read.c618
-rw-r--r--fs/nfs/symlink.c117
-rw-r--r--fs/nfs/unlink.c227
-rw-r--r--fs/nfs/write.c1431
27 files changed, 21138 insertions, 0 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
new file mode 100644
index 000000000000..b4baa031edf4
--- /dev/null
+++ b/fs/nfs/Makefile
@@ -0,0 +1,15 @@
1#
2# Makefile for the Linux nfs filesystem routines.
3#
4
5obj-$(CONFIG_NFS_FS) += nfs.o
6
7nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \
8 proc.o read.o symlink.o unlink.o write.o
9nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
10nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
11nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
12 delegation.o idmap.o \
13 callback.o callback_xdr.o callback_proc.o
14nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
15nfs-objs := $(nfs-y)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
new file mode 100644
index 000000000000..560d6175dd58
--- /dev/null
+++ b/fs/nfs/callback.c
@@ -0,0 +1,187 @@
1/*
2 * linux/fs/nfs/callback.c
3 *
4 * Copyright (C) 2004 Trond Myklebust
5 *
6 * NFSv4 callback handling
7 */
8
9#include <linux/config.h>
10#include <linux/completion.h>
11#include <linux/ip.h>
12#include <linux/module.h>
13#include <linux/smp_lock.h>
14#include <linux/sunrpc/svc.h>
15#include <linux/sunrpc/svcsock.h>
16#include <linux/nfs_fs.h>
17#include "callback.h"
18
19#define NFSDBG_FACILITY NFSDBG_CALLBACK
20
21struct nfs_callback_data {
22 unsigned int users;
23 struct svc_serv *serv;
24 pid_t pid;
25 struct completion started;
26 struct completion stopped;
27};
28
29static struct nfs_callback_data nfs_callback_info;
30static DECLARE_MUTEX(nfs_callback_sema);
31static struct svc_program nfs4_callback_program;
32
33unsigned short nfs_callback_tcpport;
34
35/*
36 * This is the callback kernel thread.
37 */
38static void nfs_callback_svc(struct svc_rqst *rqstp)
39{
40 struct svc_serv *serv = rqstp->rq_server;
41 int err;
42
43 __module_get(THIS_MODULE);
44 lock_kernel();
45
46 nfs_callback_info.pid = current->pid;
47 daemonize("nfsv4-svc");
48 /* Process request with signals blocked, but allow SIGKILL. */
49 allow_signal(SIGKILL);
50
51 complete(&nfs_callback_info.started);
52
53 while (nfs_callback_info.users != 0 || !signalled()) {
54 /*
55 * Listen for a request on the socket
56 */
57 err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT);
58 if (err == -EAGAIN || err == -EINTR)
59 continue;
60 if (err < 0) {
61 printk(KERN_WARNING
62 "%s: terminating on error %d\n",
63 __FUNCTION__, -err);
64 break;
65 }
66 dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__,
67 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
68 svc_process(serv, rqstp);
69 }
70
71 nfs_callback_info.pid = 0;
72 complete(&nfs_callback_info.stopped);
73 unlock_kernel();
74 module_put_and_exit(0);
75}
76
77/*
78 * Bring up the server process if it is not already up.
79 */
80int nfs_callback_up(void)
81{
82 struct svc_serv *serv;
83 struct svc_sock *svsk;
84 int ret = 0;
85
86 lock_kernel();
87 down(&nfs_callback_sema);
88 if (nfs_callback_info.users++ || nfs_callback_info.pid != 0)
89 goto out;
90 init_completion(&nfs_callback_info.started);
91 init_completion(&nfs_callback_info.stopped);
92 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE);
93 ret = -ENOMEM;
94 if (!serv)
95 goto out_err;
96 /* FIXME: We don't want to register this socket with the portmapper */
97 ret = svc_makesock(serv, IPPROTO_TCP, 0);
98 if (ret < 0)
99 goto out_destroy;
100 if (!list_empty(&serv->sv_permsocks)) {
101 svsk = list_entry(serv->sv_permsocks.next,
102 struct svc_sock, sk_list);
103 nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport);
104 dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport);
105 } else
106 BUG();
107 ret = svc_create_thread(nfs_callback_svc, serv);
108 if (ret < 0)
109 goto out_destroy;
110 nfs_callback_info.serv = serv;
111 wait_for_completion(&nfs_callback_info.started);
112out:
113 up(&nfs_callback_sema);
114 unlock_kernel();
115 return ret;
116out_destroy:
117 svc_destroy(serv);
118out_err:
119 nfs_callback_info.users--;
120 goto out;
121}
122
123/*
124 * Kill the server process if it is not already up.
125 */
126int nfs_callback_down(void)
127{
128 int ret = 0;
129
130 lock_kernel();
131 down(&nfs_callback_sema);
132 if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
133 goto out;
134 kill_proc(nfs_callback_info.pid, SIGKILL, 1);
135 wait_for_completion(&nfs_callback_info.stopped);
136out:
137 up(&nfs_callback_sema);
138 unlock_kernel();
139 return ret;
140}
141
142static int nfs_callback_authenticate(struct svc_rqst *rqstp)
143{
144 struct in_addr *addr = &rqstp->rq_addr.sin_addr;
145 struct nfs4_client *clp;
146
147 /* Don't talk to strangers */
148 clp = nfs4_find_client(addr);
149 if (clp == NULL)
150 return SVC_DROP;
151 dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
152 nfs4_put_client(clp);
153 switch (rqstp->rq_authop->flavour) {
154 case RPC_AUTH_NULL:
155 if (rqstp->rq_proc != CB_NULL)
156 return SVC_DENIED;
157 break;
158 case RPC_AUTH_UNIX:
159 break;
160 case RPC_AUTH_GSS:
161 /* FIXME: RPCSEC_GSS handling? */
162 default:
163 return SVC_DENIED;
164 }
165 return SVC_OK;
166}
167
168/*
169 * Define NFS4 callback program
170 */
171extern struct svc_version nfs4_callback_version1;
172
173static struct svc_version *nfs4_callback_version[] = {
174 [1] = &nfs4_callback_version1,
175};
176
177static struct svc_stat nfs4_callback_stats;
178
179static struct svc_program nfs4_callback_program = {
180 .pg_prog = NFS4_CALLBACK, /* RPC service number */
181 .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
182 .pg_vers = nfs4_callback_version, /* version table */
183 .pg_name = "NFSv4 callback", /* service name */
184 .pg_class = "nfs", /* authentication class */
185 .pg_stats = &nfs4_callback_stats,
186 .pg_authenticate = nfs_callback_authenticate,
187};
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
new file mode 100644
index 000000000000..a0db2d4f9415
--- /dev/null
+++ b/fs/nfs/callback.h
@@ -0,0 +1,70 @@
1/*
2 * linux/fs/nfs/callback.h
3 *
4 * Copyright (C) 2004 Trond Myklebust
5 *
6 * NFSv4 callback definitions
7 */
8#ifndef __LINUX_FS_NFS_CALLBACK_H
9#define __LINUX_FS_NFS_CALLBACK_H
10
11#define NFS4_CALLBACK 0x40000000
12#define NFS4_CALLBACK_XDRSIZE 2048
13#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE)
14
15enum nfs4_callback_procnum {
16 CB_NULL = 0,
17 CB_COMPOUND = 1,
18};
19
20enum nfs4_callback_opnum {
21 OP_CB_GETATTR = 3,
22 OP_CB_RECALL = 4,
23 OP_CB_ILLEGAL = 10044,
24};
25
26struct cb_compound_hdr_arg {
27 int taglen;
28 const char *tag;
29 unsigned int callback_ident;
30 unsigned nops;
31};
32
33struct cb_compound_hdr_res {
34 uint32_t *status;
35 int taglen;
36 const char *tag;
37 uint32_t *nops;
38};
39
40struct cb_getattrargs {
41 struct sockaddr_in *addr;
42 struct nfs_fh fh;
43 uint32_t bitmap[2];
44};
45
46struct cb_getattrres {
47 uint32_t status;
48 uint32_t bitmap[2];
49 uint64_t size;
50 uint64_t change_attr;
51 struct timespec ctime;
52 struct timespec mtime;
53};
54
55struct cb_recallargs {
56 struct sockaddr_in *addr;
57 struct nfs_fh fh;
58 nfs4_stateid stateid;
59 uint32_t truncate;
60};
61
62extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
63extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
64
65extern int nfs_callback_up(void);
66extern int nfs_callback_down(void);
67
68extern unsigned short nfs_callback_tcpport;
69
70#endif /* __LINUX_FS_NFS_CALLBACK_H */
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
new file mode 100644
index 000000000000..ece27e42b93b
--- /dev/null
+++ b/fs/nfs/callback_proc.c
@@ -0,0 +1,85 @@
1/*
2 * linux/fs/nfs/callback_proc.c
3 *
4 * Copyright (C) 2004 Trond Myklebust
5 *
6 * NFSv4 callback procedures
7 */
8#include <linux/config.h>
9#include <linux/nfs4.h>
10#include <linux/nfs_fs.h>
11#include "callback.h"
12#include "delegation.h"
13
14#define NFSDBG_FACILITY NFSDBG_CALLBACK
15
16unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
17{
18 struct nfs4_client *clp;
19 struct nfs_delegation *delegation;
20 struct nfs_inode *nfsi;
21 struct inode *inode;
22
23 res->bitmap[0] = res->bitmap[1] = 0;
24 res->status = htonl(NFS4ERR_BADHANDLE);
25 clp = nfs4_find_client(&args->addr->sin_addr);
26 if (clp == NULL)
27 goto out;
28 inode = nfs_delegation_find_inode(clp, &args->fh);
29 if (inode == NULL)
30 goto out_putclient;
31 nfsi = NFS_I(inode);
32 down_read(&nfsi->rwsem);
33 delegation = nfsi->delegation;
34 if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
35 goto out_iput;
36 res->size = i_size_read(inode);
37 res->change_attr = NFS_CHANGE_ATTR(inode);
38 res->ctime = inode->i_ctime;
39 res->mtime = inode->i_mtime;
40 res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
41 args->bitmap[0];
42 res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
43 args->bitmap[1];
44 res->status = 0;
45out_iput:
46 up_read(&nfsi->rwsem);
47 iput(inode);
48out_putclient:
49 nfs4_put_client(clp);
50out:
51 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
52 return res->status;
53}
54
55unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
56{
57 struct nfs4_client *clp;
58 struct inode *inode;
59 unsigned res;
60
61 res = htonl(NFS4ERR_BADHANDLE);
62 clp = nfs4_find_client(&args->addr->sin_addr);
63 if (clp == NULL)
64 goto out;
65 inode = nfs_delegation_find_inode(clp, &args->fh);
66 if (inode == NULL)
67 goto out_putclient;
68 /* Set up a helper thread to actually return the delegation */
69 switch(nfs_async_inode_return_delegation(inode, &args->stateid)) {
70 case 0:
71 res = 0;
72 break;
73 case -ENOENT:
74 res = htonl(NFS4ERR_BAD_STATEID);
75 break;
76 default:
77 res = htonl(NFS4ERR_RESOURCE);
78 }
79 iput(inode);
80out_putclient:
81 nfs4_put_client(clp);
82out:
83 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
84 return res;
85}
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
new file mode 100644
index 000000000000..d271df9df2b2
--- /dev/null
+++ b/fs/nfs/callback_xdr.c
@@ -0,0 +1,481 @@
1/*
2 * linux/fs/nfs/callback_xdr.c
3 *
4 * Copyright (C) 2004 Trond Myklebust
5 *
6 * NFSv4 callback encode/decode procedures
7 */
8#include <linux/config.h>
9#include <linux/kernel.h>
10#include <linux/sunrpc/svc.h>
11#include <linux/nfs4.h>
12#include <linux/nfs_fs.h>
13#include "callback.h"
14
15#define CB_OP_TAGLEN_MAXSZ (512)
16#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
17#define CB_OP_GETATTR_BITMAP_MAXSZ (4)
18#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
19 CB_OP_GETATTR_BITMAP_MAXSZ + \
20 2 + 2 + 3 + 3)
21#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
22
23#define NFSDBG_FACILITY NFSDBG_CALLBACK
24
25typedef unsigned (*callback_process_op_t)(void *, void *);
26typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
27typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
28
29
30struct callback_op {
31 callback_process_op_t process_op;
32 callback_decode_arg_t decode_args;
33 callback_encode_res_t encode_res;
34 long res_maxsize;
35};
36
37static struct callback_op callback_ops[];
38
39static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
40{
41 return htonl(NFS4_OK);
42}
43
44static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
45{
46 return xdr_argsize_check(rqstp, p);
47}
48
49static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
50{
51 return xdr_ressize_check(rqstp, p);
52}
53
54static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes)
55{
56 uint32_t *p;
57
58 p = xdr_inline_decode(xdr, nbytes);
59 if (unlikely(p == NULL))
60 printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n");
61 return p;
62}
63
64static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
65{
66 uint32_t *p;
67
68 p = read_buf(xdr, 4);
69 if (unlikely(p == NULL))
70 return htonl(NFS4ERR_RESOURCE);
71 *len = ntohl(*p);
72
73 if (*len != 0) {
74 p = read_buf(xdr, *len);
75 if (unlikely(p == NULL))
76 return htonl(NFS4ERR_RESOURCE);
77 *str = (const char *)p;
78 } else
79 *str = NULL;
80
81 return 0;
82}
83
84static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
85{
86 uint32_t *p;
87
88 p = read_buf(xdr, 4);
89 if (unlikely(p == NULL))
90 return htonl(NFS4ERR_RESOURCE);
91 fh->size = ntohl(*p);
92 if (fh->size > NFS4_FHSIZE)
93 return htonl(NFS4ERR_BADHANDLE);
94 p = read_buf(xdr, fh->size);
95 if (unlikely(p == NULL))
96 return htonl(NFS4ERR_RESOURCE);
97 memcpy(&fh->data[0], p, fh->size);
98 memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size);
99 return 0;
100}
101
102static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
103{
104 uint32_t *p;
105 unsigned int attrlen;
106
107 p = read_buf(xdr, 4);
108 if (unlikely(p == NULL))
109 return htonl(NFS4ERR_RESOURCE);
110 attrlen = ntohl(*p);
111 p = read_buf(xdr, attrlen << 2);
112 if (unlikely(p == NULL))
113 return htonl(NFS4ERR_RESOURCE);
114 if (likely(attrlen > 0))
115 bitmap[0] = ntohl(*p++);
116 if (attrlen > 1)
117 bitmap[1] = ntohl(*p);
118 return 0;
119}
120
121static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
122{
123 uint32_t *p;
124
125 p = read_buf(xdr, 16);
126 if (unlikely(p == NULL))
127 return htonl(NFS4ERR_RESOURCE);
128 memcpy(stateid->data, p, 16);
129 return 0;
130}
131
132static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
133{
134 uint32_t *p;
135 unsigned int minor_version;
136 unsigned status;
137
138 status = decode_string(xdr, &hdr->taglen, &hdr->tag);
139 if (unlikely(status != 0))
140 return status;
141 /* We do not like overly long tags! */
142 if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) {
143 printk("NFSv4 CALLBACK %s: client sent tag of length %u\n",
144 __FUNCTION__, hdr->taglen);
145 return htonl(NFS4ERR_RESOURCE);
146 }
147 p = read_buf(xdr, 12);
148 if (unlikely(p == NULL))
149 return htonl(NFS4ERR_RESOURCE);
150 minor_version = ntohl(*p++);
151 /* Check minor version is zero. */
152 if (minor_version != 0) {
153 printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n",
154 __FUNCTION__, minor_version);
155 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
156 }
157 hdr->callback_ident = ntohl(*p++);
158 hdr->nops = ntohl(*p);
159 return 0;
160}
161
162static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
163{
164 uint32_t *p;
165 p = read_buf(xdr, 4);
166 if (unlikely(p == NULL))
167 return htonl(NFS4ERR_RESOURCE);
168 *op = ntohl(*p);
169 return 0;
170}
171
172static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
173{
174 unsigned status;
175
176 status = decode_fh(xdr, &args->fh);
177 if (unlikely(status != 0))
178 goto out;
179 args->addr = &rqstp->rq_addr;
180 status = decode_bitmap(xdr, args->bitmap);
181out:
182 dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
183 return status;
184}
185
186static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
187{
188 uint32_t *p;
189 unsigned status;
190
191 args->addr = &rqstp->rq_addr;
192 status = decode_stateid(xdr, &args->stateid);
193 if (unlikely(status != 0))
194 goto out;
195 p = read_buf(xdr, 4);
196 if (unlikely(p == NULL)) {
197 status = htonl(NFS4ERR_RESOURCE);
198 goto out;
199 }
200 args->truncate = ntohl(*p);
201 status = decode_fh(xdr, &args->fh);
202out:
203 dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
204 return 0;
205}
206
207static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
208{
209 uint32_t *p;
210
211 p = xdr_reserve_space(xdr, 4 + len);
212 if (unlikely(p == NULL))
213 return htonl(NFS4ERR_RESOURCE);
214 xdr_encode_opaque(p, str, len);
215 return 0;
216}
217
218#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE)
219#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY)
220static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep)
221{
222 uint32_t bm[2];
223 uint32_t *p;
224
225 bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0);
226 bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1);
227 if (bm[1] != 0) {
228 p = xdr_reserve_space(xdr, 16);
229 if (unlikely(p == NULL))
230 return htonl(NFS4ERR_RESOURCE);
231 *p++ = htonl(2);
232 *p++ = bm[0];
233 *p++ = bm[1];
234 } else if (bm[0] != 0) {
235 p = xdr_reserve_space(xdr, 12);
236 if (unlikely(p == NULL))
237 return htonl(NFS4ERR_RESOURCE);
238 *p++ = htonl(1);
239 *p++ = bm[0];
240 } else {
241 p = xdr_reserve_space(xdr, 8);
242 if (unlikely(p == NULL))
243 return htonl(NFS4ERR_RESOURCE);
244 *p++ = htonl(0);
245 }
246 *savep = p;
247 return 0;
248}
249
250static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change)
251{
252 uint32_t *p;
253
254 if (!(bitmap[0] & FATTR4_WORD0_CHANGE))
255 return 0;
256 p = xdr_reserve_space(xdr, 8);
257 if (unlikely(p == 0))
258 return htonl(NFS4ERR_RESOURCE);
259 p = xdr_encode_hyper(p, change);
260 return 0;
261}
262
263static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size)
264{
265 uint32_t *p;
266
267 if (!(bitmap[0] & FATTR4_WORD0_SIZE))
268 return 0;
269 p = xdr_reserve_space(xdr, 8);
270 if (unlikely(p == 0))
271 return htonl(NFS4ERR_RESOURCE);
272 p = xdr_encode_hyper(p, size);
273 return 0;
274}
275
276static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
277{
278 uint32_t *p;
279
280 p = xdr_reserve_space(xdr, 12);
281 if (unlikely(p == 0))
282 return htonl(NFS4ERR_RESOURCE);
283 p = xdr_encode_hyper(p, time->tv_sec);
284 *p = htonl(time->tv_nsec);
285 return 0;
286}
287
288static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
289{
290 if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
291 return 0;
292 return encode_attr_time(xdr,time);
293}
294
295static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
296{
297 if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY))
298 return 0;
299 return encode_attr_time(xdr,time);
300}
301
302static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
303{
304 unsigned status;
305
306 hdr->status = xdr_reserve_space(xdr, 4);
307 if (unlikely(hdr->status == NULL))
308 return htonl(NFS4ERR_RESOURCE);
309 status = encode_string(xdr, hdr->taglen, hdr->tag);
310 if (unlikely(status != 0))
311 return status;
312 hdr->nops = xdr_reserve_space(xdr, 4);
313 if (unlikely(hdr->nops == NULL))
314 return htonl(NFS4ERR_RESOURCE);
315 return 0;
316}
317
318static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
319{
320 uint32_t *p;
321
322 p = xdr_reserve_space(xdr, 8);
323 if (unlikely(p == NULL))
324 return htonl(NFS4ERR_RESOURCE);
325 *p++ = htonl(op);
326 *p = res;
327 return 0;
328}
329
330static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
331{
332 uint32_t *savep;
333 unsigned status = res->status;
334
335 if (unlikely(status != 0))
336 goto out;
337 status = encode_attr_bitmap(xdr, res->bitmap, &savep);
338 if (unlikely(status != 0))
339 goto out;
340 status = encode_attr_change(xdr, res->bitmap, res->change_attr);
341 if (unlikely(status != 0))
342 goto out;
343 status = encode_attr_size(xdr, res->bitmap, res->size);
344 if (unlikely(status != 0))
345 goto out;
346 status = encode_attr_ctime(xdr, res->bitmap, &res->ctime);
347 if (unlikely(status != 0))
348 goto out;
349 status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
350 *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
351out:
352 dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
353 return status;
354}
355
356static unsigned process_op(struct svc_rqst *rqstp,
357 struct xdr_stream *xdr_in, void *argp,
358 struct xdr_stream *xdr_out, void *resp)
359{
360 struct callback_op *op;
361 unsigned int op_nr;
362 unsigned int status = 0;
363 long maxlen;
364 unsigned res;
365
366 dprintk("%s: start\n", __FUNCTION__);
367 status = decode_op_hdr(xdr_in, &op_nr);
368 if (unlikely(status != 0)) {
369 op_nr = OP_CB_ILLEGAL;
370 op = &callback_ops[0];
371 } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
372 op_nr = OP_CB_ILLEGAL;
373 op = &callback_ops[0];
374 status = htonl(NFS4ERR_OP_ILLEGAL);
375 } else
376 op = &callback_ops[op_nr];
377
378 maxlen = xdr_out->end - xdr_out->p;
379 if (maxlen > 0 && maxlen < PAGE_SIZE) {
380 if (likely(status == 0 && op->decode_args != NULL))
381 status = op->decode_args(rqstp, xdr_in, argp);
382 if (likely(status == 0 && op->process_op != NULL))
383 status = op->process_op(argp, resp);
384 } else
385 status = htonl(NFS4ERR_RESOURCE);
386
387 res = encode_op_hdr(xdr_out, op_nr, status);
388 if (status == 0)
389 status = res;
390 if (op->encode_res != NULL && status == 0)
391 status = op->encode_res(rqstp, xdr_out, resp);
392 dprintk("%s: done, status = %d\n", __FUNCTION__, status);
393 return status;
394}
395
396/*
397 * Decode, process and encode a COMPOUND
398 */
399static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
400{
401 struct cb_compound_hdr_arg hdr_arg;
402 struct cb_compound_hdr_res hdr_res;
403 struct xdr_stream xdr_in, xdr_out;
404 uint32_t *p;
405 unsigned int status;
406 unsigned int nops = 1;
407
408 dprintk("%s: start\n", __FUNCTION__);
409
410 xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
411
412 p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
413 rqstp->rq_res.head[0].iov_len = PAGE_SIZE;
414 xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
415
416 decode_compound_hdr_arg(&xdr_in, &hdr_arg);
417 hdr_res.taglen = hdr_arg.taglen;
418 hdr_res.tag = hdr_arg.tag;
419 encode_compound_hdr_res(&xdr_out, &hdr_res);
420
421 for (;;) {
422 status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp);
423 if (status != 0)
424 break;
425 if (nops == hdr_arg.nops)
426 break;
427 nops++;
428 }
429 *hdr_res.status = status;
430 *hdr_res.nops = htonl(nops);
431 dprintk("%s: done, status = %u\n", __FUNCTION__, status);
432 return rpc_success;
433}
434
435/*
436 * Define NFS4 callback COMPOUND ops.
437 */
438static struct callback_op callback_ops[] = {
439 [0] = {
440 .res_maxsize = CB_OP_HDR_RES_MAXSZ,
441 },
442 [OP_CB_GETATTR] = {
443 .process_op = (callback_process_op_t)nfs4_callback_getattr,
444 .decode_args = (callback_decode_arg_t)decode_getattr_args,
445 .encode_res = (callback_encode_res_t)encode_getattr_res,
446 .res_maxsize = CB_OP_GETATTR_RES_MAXSZ,
447 },
448 [OP_CB_RECALL] = {
449 .process_op = (callback_process_op_t)nfs4_callback_recall,
450 .decode_args = (callback_decode_arg_t)decode_recall_args,
451 .res_maxsize = CB_OP_RECALL_RES_MAXSZ,
452 }
453};
454
455/*
456 * Define NFS4 callback procedures
457 */
458static struct svc_procedure nfs4_callback_procedures1[] = {
459 [CB_NULL] = {
460 .pc_func = nfs4_callback_null,
461 .pc_decode = (kxdrproc_t)nfs4_decode_void,
462 .pc_encode = (kxdrproc_t)nfs4_encode_void,
463 .pc_xdrressize = 1,
464 },
465 [CB_COMPOUND] = {
466 .pc_func = nfs4_callback_compound,
467 .pc_encode = (kxdrproc_t)nfs4_encode_void,
468 .pc_argsize = 256,
469 .pc_ressize = 256,
470 .pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
471 }
472};
473
474struct svc_version nfs4_callback_version1 = {
475 .vs_vers = 1,
476 .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
477 .vs_proc = nfs4_callback_procedures1,
478 .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
479 .vs_dispatch = NULL,
480};
481
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
new file mode 100644
index 000000000000..5b9c60f97791
--- /dev/null
+++ b/fs/nfs/delegation.c
@@ -0,0 +1,342 @@
1/*
2 * linux/fs/nfs/delegation.c
3 *
4 * Copyright (C) 2004 Trond Myklebust
5 *
6 * NFS file delegation management
7 *
8 */
9#include <linux/config.h>
10#include <linux/completion.h>
11#include <linux/module.h>
12#include <linux/sched.h>
13#include <linux/spinlock.h>
14
15#include <linux/nfs4.h>
16#include <linux/nfs_fs.h>
17#include <linux/nfs_xdr.h>
18
19#include "delegation.h"
20
21static struct nfs_delegation *nfs_alloc_delegation(void)
22{
23 return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL);
24}
25
26static void nfs_free_delegation(struct nfs_delegation *delegation)
27{
28 if (delegation->cred)
29 put_rpccred(delegation->cred);
30 kfree(delegation);
31}
32
33static void nfs_delegation_claim_opens(struct inode *inode)
34{
35 struct nfs_inode *nfsi = NFS_I(inode);
36 struct nfs_open_context *ctx;
37 struct nfs4_state *state;
38
39again:
40 spin_lock(&inode->i_lock);
41 list_for_each_entry(ctx, &nfsi->open_files, list) {
42 state = ctx->state;
43 if (state == NULL)
44 continue;
45 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
46 continue;
47 get_nfs_open_context(ctx);
48 spin_unlock(&inode->i_lock);
49 if (nfs4_open_delegation_recall(ctx->dentry, state) < 0)
50 return;
51 put_nfs_open_context(ctx);
52 goto again;
53 }
54 spin_unlock(&inode->i_lock);
55}
56
57/*
58 * Set up a delegation on an inode
59 */
60void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
61{
62 struct nfs_delegation *delegation = NFS_I(inode)->delegation;
63
64 if (delegation == NULL)
65 return;
66 memcpy(delegation->stateid.data, res->delegation.data,
67 sizeof(delegation->stateid.data));
68 delegation->type = res->delegation_type;
69 delegation->maxsize = res->maxsize;
70 put_rpccred(cred);
71 delegation->cred = get_rpccred(cred);
72 delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
73 NFS_I(inode)->delegation_state = delegation->type;
74 smp_wmb();
75}
76
77/*
78 * Set up a delegation on an inode
79 */
80int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
81{
82 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
83 struct nfs_inode *nfsi = NFS_I(inode);
84 struct nfs_delegation *delegation;
85 int status = 0;
86
87 delegation = nfs_alloc_delegation();
88 if (delegation == NULL)
89 return -ENOMEM;
90 memcpy(delegation->stateid.data, res->delegation.data,
91 sizeof(delegation->stateid.data));
92 delegation->type = res->delegation_type;
93 delegation->maxsize = res->maxsize;
94 delegation->cred = get_rpccred(cred);
95 delegation->inode = inode;
96
97 spin_lock(&clp->cl_lock);
98 if (nfsi->delegation == NULL) {
99 list_add(&delegation->super_list, &clp->cl_delegations);
100 nfsi->delegation = delegation;
101 nfsi->delegation_state = delegation->type;
102 delegation = NULL;
103 } else {
104 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
105 sizeof(delegation->stateid)) != 0 ||
106 delegation->type != nfsi->delegation->type) {
107 printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
108 __FUNCTION__, NIPQUAD(clp->cl_addr));
109 status = -EIO;
110 }
111 }
112 spin_unlock(&clp->cl_lock);
113 if (delegation != NULL)
114 kfree(delegation);
115 return status;
116}
117
118static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
119{
120 int res = 0;
121
122 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
123
124 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
125 nfs_free_delegation(delegation);
126 return res;
127}
128
129/* Sync all data to disk upon delegation return */
130static void nfs_msync_inode(struct inode *inode)
131{
132 filemap_fdatawrite(inode->i_mapping);
133 nfs_wb_all(inode);
134 filemap_fdatawait(inode->i_mapping);
135}
136
137/*
138 * Basic procedure for returning a delegation to the server
139 */
140int nfs_inode_return_delegation(struct inode *inode)
141{
142 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
143 struct nfs_inode *nfsi = NFS_I(inode);
144 struct nfs_delegation *delegation;
145 int res = 0;
146
147 nfs_msync_inode(inode);
148 down_read(&clp->cl_sem);
149 /* Guard against new delegated open calls */
150 down_write(&nfsi->rwsem);
151 spin_lock(&clp->cl_lock);
152 delegation = nfsi->delegation;
153 if (delegation != NULL) {
154 list_del_init(&delegation->super_list);
155 nfsi->delegation = NULL;
156 nfsi->delegation_state = 0;
157 }
158 spin_unlock(&clp->cl_lock);
159 nfs_delegation_claim_opens(inode);
160 up_write(&nfsi->rwsem);
161 up_read(&clp->cl_sem);
162 nfs_msync_inode(inode);
163
164 if (delegation != NULL)
165 res = nfs_do_return_delegation(inode, delegation);
166 return res;
167}
168
169/*
170 * Return all delegations associated to a super block
171 */
172void nfs_return_all_delegations(struct super_block *sb)
173{
174 struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
175 struct nfs_delegation *delegation;
176 struct inode *inode;
177
178 if (clp == NULL)
179 return;
180restart:
181 spin_lock(&clp->cl_lock);
182 list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
183 if (delegation->inode->i_sb != sb)
184 continue;
185 inode = igrab(delegation->inode);
186 if (inode == NULL)
187 continue;
188 spin_unlock(&clp->cl_lock);
189 nfs_inode_return_delegation(inode);
190 iput(inode);
191 goto restart;
192 }
193 spin_unlock(&clp->cl_lock);
194}
195
196/*
197 * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
198 */
199void nfs_handle_cb_pathdown(struct nfs4_client *clp)
200{
201 struct nfs_delegation *delegation;
202 struct inode *inode;
203
204 if (clp == NULL)
205 return;
206restart:
207 spin_lock(&clp->cl_lock);
208 list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
209 inode = igrab(delegation->inode);
210 if (inode == NULL)
211 continue;
212 spin_unlock(&clp->cl_lock);
213 nfs_inode_return_delegation(inode);
214 iput(inode);
215 goto restart;
216 }
217 spin_unlock(&clp->cl_lock);
218}
219
220struct recall_threadargs {
221 struct inode *inode;
222 struct nfs4_client *clp;
223 const nfs4_stateid *stateid;
224
225 struct completion started;
226 int result;
227};
228
229static int recall_thread(void *data)
230{
231 struct recall_threadargs *args = (struct recall_threadargs *)data;
232 struct inode *inode = igrab(args->inode);
233 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
234 struct nfs_inode *nfsi = NFS_I(inode);
235 struct nfs_delegation *delegation;
236
237 daemonize("nfsv4-delegreturn");
238
239 nfs_msync_inode(inode);
240 down_read(&clp->cl_sem);
241 down_write(&nfsi->rwsem);
242 spin_lock(&clp->cl_lock);
243 delegation = nfsi->delegation;
244 if (delegation != NULL && memcmp(delegation->stateid.data,
245 args->stateid->data,
246 sizeof(delegation->stateid.data)) == 0) {
247 list_del_init(&delegation->super_list);
248 nfsi->delegation = NULL;
249 nfsi->delegation_state = 0;
250 args->result = 0;
251 } else {
252 delegation = NULL;
253 args->result = -ENOENT;
254 }
255 spin_unlock(&clp->cl_lock);
256 complete(&args->started);
257 nfs_delegation_claim_opens(inode);
258 up_write(&nfsi->rwsem);
259 up_read(&clp->cl_sem);
260 nfs_msync_inode(inode);
261
262 if (delegation != NULL)
263 nfs_do_return_delegation(inode, delegation);
264 iput(inode);
265 module_put_and_exit(0);
266}
267
268/*
269 * Asynchronous delegation recall!
270 */
271int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
272{
273 struct recall_threadargs data = {
274 .inode = inode,
275 .stateid = stateid,
276 };
277 int status;
278
279 init_completion(&data.started);
280 __module_get(THIS_MODULE);
281 status = kernel_thread(recall_thread, &data, CLONE_KERNEL);
282 if (status < 0)
283 goto out_module_put;
284 wait_for_completion(&data.started);
285 return data.result;
286out_module_put:
287 module_put(THIS_MODULE);
288 return status;
289}
290
291/*
292 * Retrieve the inode associated with a delegation
293 */
294struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
295{
296 struct nfs_delegation *delegation;
297 struct inode *res = NULL;
298 spin_lock(&clp->cl_lock);
299 list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
300 if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
301 res = igrab(delegation->inode);
302 break;
303 }
304 }
305 spin_unlock(&clp->cl_lock);
306 return res;
307}
308
309/*
310 * Mark all delegations as needing to be reclaimed
311 */
312void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
313{
314 struct nfs_delegation *delegation;
315 spin_lock(&clp->cl_lock);
316 list_for_each_entry(delegation, &clp->cl_delegations, super_list)
317 delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
318 spin_unlock(&clp->cl_lock);
319}
320
321/*
322 * Reap all unclaimed delegations after reboot recovery is done
323 */
324void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
325{
326 struct nfs_delegation *delegation, *n;
327 LIST_HEAD(head);
328 spin_lock(&clp->cl_lock);
329 list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
330 if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
331 continue;
332 list_move(&delegation->super_list, &head);
333 NFS_I(delegation->inode)->delegation = NULL;
334 NFS_I(delegation->inode)->delegation_state = 0;
335 }
336 spin_unlock(&clp->cl_lock);
337 while(!list_empty(&head)) {
338 delegation = list_entry(head.next, struct nfs_delegation, super_list);
339 list_del(&delegation->super_list);
340 nfs_free_delegation(delegation);
341 }
342}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
new file mode 100644
index 000000000000..3f6c45a29d6a
--- /dev/null
+++ b/fs/nfs/delegation.h
@@ -0,0 +1,57 @@
1/*
2 * linux/fs/nfs/delegation.h
3 *
4 * Copyright (c) Trond Myklebust
5 *
6 * Definitions pertaining to NFS delegated files
7 */
8#ifndef FS_NFS_DELEGATION_H
9#define FS_NFS_DELEGATION_H
10
11#if defined(CONFIG_NFS_V4)
12/*
13 * NFSv4 delegation
14 */
15struct nfs_delegation {
16 struct list_head super_list;
17 struct rpc_cred *cred;
18 struct inode *inode;
19 nfs4_stateid stateid;
20 int type;
21#define NFS_DELEGATION_NEED_RECLAIM 1
22 long flags;
23 loff_t maxsize;
24};
25
26int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
27void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
28int nfs_inode_return_delegation(struct inode *inode);
29int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
30
31struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
32void nfs_return_all_delegations(struct super_block *sb);
33void nfs_handle_cb_pathdown(struct nfs4_client *clp);
34
35void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
36void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
37
38/* NFSv4 delegation-related procedures */
39int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
40int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
41
42static inline int nfs_have_delegation(struct inode *inode, int flags)
43{
44 flags &= FMODE_READ|FMODE_WRITE;
45 smp_rmb();
46 if ((NFS_I(inode)->delegation_state & flags) == flags)
47 return 1;
48 return 0;
49}
50#else
51static inline int nfs_have_delegation(struct inode *inode, int flags)
52{
53 return 0;
54}
55#endif
56
57#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
new file mode 100644
index 000000000000..73f96acd5d37
--- /dev/null
+++ b/fs/nfs/dir.c
@@ -0,0 +1,1562 @@
1/*
2 * linux/fs/nfs/dir.c
3 *
4 * Copyright (C) 1992 Rick Sladkey
5 *
6 * nfs directory handling functions
7 *
8 * 10 Apr 1996 Added silly rename for unlink --okir
9 * 28 Sep 1996 Improved directory cache --okir
10 * 23 Aug 1997 Claus Heine claus@momo.math.rwth-aachen.de
11 * Re-implemented silly rename for unlink, newly implemented
12 * silly rename for nfs_rename() following the suggestions
13 * of Olaf Kirch (okir) found in this file.
14 * Following Linus comments on my original hack, this version
15 * depends only on the dcache stuff and doesn't touch the inode
16 * layer (iput() and friends).
17 * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM
18 */
19
20#include <linux/time.h>
21#include <linux/errno.h>
22#include <linux/stat.h>
23#include <linux/fcntl.h>
24#include <linux/string.h>
25#include <linux/kernel.h>
26#include <linux/slab.h>
27#include <linux/mm.h>
28#include <linux/sunrpc/clnt.h>
29#include <linux/nfs_fs.h>
30#include <linux/nfs_mount.h>
31#include <linux/pagemap.h>
32#include <linux/smp_lock.h>
33#include <linux/namei.h>
34
35#include "delegation.h"
36
37#define NFS_PARANOIA 1
38/* #define NFS_DEBUG_VERBOSE 1 */
39
40static int nfs_opendir(struct inode *, struct file *);
41static int nfs_readdir(struct file *, void *, filldir_t);
42static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
43static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
44static int nfs_mkdir(struct inode *, struct dentry *, int);
45static int nfs_rmdir(struct inode *, struct dentry *);
46static int nfs_unlink(struct inode *, struct dentry *);
47static int nfs_symlink(struct inode *, struct dentry *, const char *);
48static int nfs_link(struct dentry *, struct inode *, struct dentry *);
49static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
50static int nfs_rename(struct inode *, struct dentry *,
51 struct inode *, struct dentry *);
52static int nfs_fsync_dir(struct file *, struct dentry *, int);
53
54struct file_operations nfs_dir_operations = {
55 .read = generic_read_dir,
56 .readdir = nfs_readdir,
57 .open = nfs_opendir,
58 .release = nfs_release,
59 .fsync = nfs_fsync_dir,
60};
61
62struct inode_operations nfs_dir_inode_operations = {
63 .create = nfs_create,
64 .lookup = nfs_lookup,
65 .link = nfs_link,
66 .unlink = nfs_unlink,
67 .symlink = nfs_symlink,
68 .mkdir = nfs_mkdir,
69 .rmdir = nfs_rmdir,
70 .mknod = nfs_mknod,
71 .rename = nfs_rename,
72 .permission = nfs_permission,
73 .getattr = nfs_getattr,
74 .setattr = nfs_setattr,
75};
76
77#ifdef CONFIG_NFS_V4
78
79static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
80struct inode_operations nfs4_dir_inode_operations = {
81 .create = nfs_create,
82 .lookup = nfs_atomic_lookup,
83 .link = nfs_link,
84 .unlink = nfs_unlink,
85 .symlink = nfs_symlink,
86 .mkdir = nfs_mkdir,
87 .rmdir = nfs_rmdir,
88 .mknod = nfs_mknod,
89 .rename = nfs_rename,
90 .permission = nfs_permission,
91 .getattr = nfs_getattr,
92 .setattr = nfs_setattr,
93};
94
95#endif /* CONFIG_NFS_V4 */
96
97/*
98 * Open file
99 */
100static int
101nfs_opendir(struct inode *inode, struct file *filp)
102{
103 int res = 0;
104
105 lock_kernel();
106 /* Call generic open code in order to cache credentials */
107 if (!res)
108 res = nfs_open(inode, filp);
109 unlock_kernel();
110 return res;
111}
112
113typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int);
114typedef struct {
115 struct file *file;
116 struct page *page;
117 unsigned long page_index;
118 u32 *ptr;
119 u64 target;
120 struct nfs_entry *entry;
121 decode_dirent_t decode;
122 int plus;
123 int error;
124} nfs_readdir_descriptor_t;
125
126/* Now we cache directories properly, by stuffing the dirent
127 * data directly in the page cache.
128 *
129 * Inode invalidation due to refresh etc. takes care of
130 * _everything_, no sloppy entry flushing logic, no extraneous
131 * copying, network direct to page cache, the way it was meant
132 * to be.
133 *
134 * NOTE: Dirent information verification is done always by the
135 * page-in of the RPC reply, nowhere else, this simplies
136 * things substantially.
137 */
138static
139int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
140{
141 struct file *file = desc->file;
142 struct inode *inode = file->f_dentry->d_inode;
143 struct rpc_cred *cred = nfs_file_cred(file);
144 unsigned long timestamp;
145 int error;
146
147 dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
148
149 again:
150 timestamp = jiffies;
151 error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page,
152 NFS_SERVER(inode)->dtsize, desc->plus);
153 if (error < 0) {
154 /* We requested READDIRPLUS, but the server doesn't grok it */
155 if (error == -ENOTSUPP && desc->plus) {
156 NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
157 NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
158 desc->plus = 0;
159 goto again;
160 }
161 goto error;
162 }
163 SetPageUptodate(page);
164 NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
165 /* Ensure consistent page alignment of the data.
166 * Note: assumes we have exclusive access to this mapping either
167 * throught inode->i_sem or some other mechanism.
168 */
169 if (page->index == 0) {
170 invalidate_inode_pages(inode->i_mapping);
171 NFS_I(inode)->readdir_timestamp = timestamp;
172 }
173 unlock_page(page);
174 return 0;
175 error:
176 SetPageError(page);
177 unlock_page(page);
178 nfs_zap_caches(inode);
179 desc->error = error;
180 return -EIO;
181}
182
183static inline
184int dir_decode(nfs_readdir_descriptor_t *desc)
185{
186 u32 *p = desc->ptr;
187 p = desc->decode(p, desc->entry, desc->plus);
188 if (IS_ERR(p))
189 return PTR_ERR(p);
190 desc->ptr = p;
191 return 0;
192}
193
194static inline
195void dir_page_release(nfs_readdir_descriptor_t *desc)
196{
197 kunmap(desc->page);
198 page_cache_release(desc->page);
199 desc->page = NULL;
200 desc->ptr = NULL;
201}
202
203/*
204 * Given a pointer to a buffer that has already been filled by a call
205 * to readdir, find the next entry.
206 *
207 * If the end of the buffer has been reached, return -EAGAIN, if not,
208 * return the offset within the buffer of the next entry to be
209 * read.
210 */
211static inline
212int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
213{
214 struct nfs_entry *entry = desc->entry;
215 int loop_count = 0,
216 status;
217
218 while((status = dir_decode(desc)) == 0) {
219 dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie);
220 if (entry->prev_cookie == desc->target)
221 break;
222 if (loop_count++ > 200) {
223 loop_count = 0;
224 schedule();
225 }
226 }
227 dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
228 return status;
229}
230
231/*
232 * Find the given page, and call find_dirent() in order to try to
233 * return the next entry.
234 */
235static inline
236int find_dirent_page(nfs_readdir_descriptor_t *desc)
237{
238 struct inode *inode = desc->file->f_dentry->d_inode;
239 struct page *page;
240 int status;
241
242 dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
243
244 page = read_cache_page(inode->i_mapping, desc->page_index,
245 (filler_t *)nfs_readdir_filler, desc);
246 if (IS_ERR(page)) {
247 status = PTR_ERR(page);
248 goto out;
249 }
250 if (!PageUptodate(page))
251 goto read_error;
252
253 /* NOTE: Someone else may have changed the READDIRPLUS flag */
254 desc->page = page;
255 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
256 status = find_dirent(desc, page);
257 if (status < 0)
258 dir_page_release(desc);
259 out:
260 dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
261 return status;
262 read_error:
263 page_cache_release(page);
264 return -EIO;
265}
266
267/*
268 * Recurse through the page cache pages, and return a
269 * filled nfs_entry structure of the next directory entry if possible.
270 *
271 * The target for the search is 'desc->target'.
272 */
273static inline
274int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
275{
276 int loop_count = 0;
277 int res;
278
279 dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target);
280 for (;;) {
281 res = find_dirent_page(desc);
282 if (res != -EAGAIN)
283 break;
284 /* Align to beginning of next page */
285 desc->page_index ++;
286 if (loop_count++ > 200) {
287 loop_count = 0;
288 schedule();
289 }
290 }
291 dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
292 return res;
293}
294
295static inline unsigned int dt_type(struct inode *inode)
296{
297 return (inode->i_mode >> 12) & 15;
298}
299
300static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
301
302/*
303 * Once we've found the start of the dirent within a page: fill 'er up...
304 */
305static
306int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
307 filldir_t filldir)
308{
309 struct file *file = desc->file;
310 struct nfs_entry *entry = desc->entry;
311 struct dentry *dentry = NULL;
312 unsigned long fileid;
313 int loop_count = 0,
314 res;
315
316 dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target);
317
318 for(;;) {
319 unsigned d_type = DT_UNKNOWN;
320 /* Note: entry->prev_cookie contains the cookie for
321 * retrieving the current dirent on the server */
322 fileid = nfs_fileid_to_ino_t(entry->ino);
323
324 /* Get a dentry if we have one */
325 if (dentry != NULL)
326 dput(dentry);
327 dentry = nfs_readdir_lookup(desc);
328
329 /* Use readdirplus info */
330 if (dentry != NULL && dentry->d_inode != NULL) {
331 d_type = dt_type(dentry->d_inode);
332 fileid = dentry->d_inode->i_ino;
333 }
334
335 res = filldir(dirent, entry->name, entry->len,
336 entry->prev_cookie, fileid, d_type);
337 if (res < 0)
338 break;
339 file->f_pos = desc->target = entry->cookie;
340 if (dir_decode(desc) != 0) {
341 desc->page_index ++;
342 break;
343 }
344 if (loop_count++ > 200) {
345 loop_count = 0;
346 schedule();
347 }
348 }
349 dir_page_release(desc);
350 if (dentry != NULL)
351 dput(dentry);
352 dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
353 return res;
354}
355
356/*
357 * If we cannot find a cookie in our cache, we suspect that this is
358 * because it points to a deleted file, so we ask the server to return
359 * whatever it thinks is the next entry. We then feed this to filldir.
360 * If all goes well, we should then be able to find our way round the
361 * cache on the next call to readdir_search_pagecache();
362 *
363 * NOTE: we cannot add the anonymous page to the pagecache because
364 * the data it contains might not be page aligned. Besides,
365 * we should already have a complete representation of the
366 * directory in the page cache by the time we get here.
367 */
368static inline
369int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
370 filldir_t filldir)
371{
372 struct file *file = desc->file;
373 struct inode *inode = file->f_dentry->d_inode;
374 struct rpc_cred *cred = nfs_file_cred(file);
375 struct page *page = NULL;
376 int status;
377
378 dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target);
379
380 page = alloc_page(GFP_HIGHUSER);
381 if (!page) {
382 status = -ENOMEM;
383 goto out;
384 }
385 desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target,
386 page,
387 NFS_SERVER(inode)->dtsize,
388 desc->plus);
389 NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
390 desc->page = page;
391 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
392 if (desc->error >= 0) {
393 if ((status = dir_decode(desc)) == 0)
394 desc->entry->prev_cookie = desc->target;
395 } else
396 status = -EIO;
397 if (status < 0)
398 goto out_release;
399
400 status = nfs_do_filldir(desc, dirent, filldir);
401
402 /* Reset read descriptor so it searches the page cache from
403 * the start upon the next call to readdir_search_pagecache() */
404 desc->page_index = 0;
405 desc->entry->cookie = desc->entry->prev_cookie = 0;
406 desc->entry->eof = 0;
407 out:
408 dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
409 return status;
410 out_release:
411 dir_page_release(desc);
412 goto out;
413}
414
415/* The file offset position is now represented as a true offset into the
416 * page cache as is the case in most of the other filesystems.
417 */
418static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
419{
420 struct dentry *dentry = filp->f_dentry;
421 struct inode *inode = dentry->d_inode;
422 nfs_readdir_descriptor_t my_desc,
423 *desc = &my_desc;
424 struct nfs_entry my_entry;
425 struct nfs_fh fh;
426 struct nfs_fattr fattr;
427 long res;
428
429 lock_kernel();
430
431 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
432 if (res < 0) {
433 unlock_kernel();
434 return res;
435 }
436
437 /*
438 * filp->f_pos points to the file offset in the page cache.
439 * but if the cache has meanwhile been zapped, we need to
440 * read from the last dirent to revalidate f_pos
441 * itself.
442 */
443 memset(desc, 0, sizeof(*desc));
444
445 desc->file = filp;
446 desc->target = filp->f_pos;
447 desc->decode = NFS_PROTO(inode)->decode_dirent;
448 desc->plus = NFS_USE_READDIRPLUS(inode);
449
450 my_entry.cookie = my_entry.prev_cookie = 0;
451 my_entry.eof = 0;
452 my_entry.fh = &fh;
453 my_entry.fattr = &fattr;
454 desc->entry = &my_entry;
455
456 while(!desc->entry->eof) {
457 res = readdir_search_pagecache(desc);
458 if (res == -EBADCOOKIE) {
459 /* This means either end of directory */
460 if (desc->entry->cookie != desc->target) {
461 /* Or that the server has 'lost' a cookie */
462 res = uncached_readdir(desc, dirent, filldir);
463 if (res >= 0)
464 continue;
465 }
466 res = 0;
467 break;
468 }
469 if (res == -ETOOSMALL && desc->plus) {
470 NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
471 nfs_zap_caches(inode);
472 desc->plus = 0;
473 desc->entry->eof = 0;
474 continue;
475 }
476 if (res < 0)
477 break;
478
479 res = nfs_do_filldir(desc, dirent, filldir);
480 if (res < 0) {
481 res = 0;
482 break;
483 }
484 }
485 unlock_kernel();
486 if (desc->error < 0)
487 return desc->error;
488 if (res < 0)
489 return res;
490 return 0;
491}
492
493/*
494 * All directory operations under NFS are synchronous, so fsync()
495 * is a dummy operation.
496 */
497int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
498{
499 return 0;
500}
501
502/*
503 * A check for whether or not the parent directory has changed.
504 * In the case it has, we assume that the dentries are untrustworthy
505 * and may need to be looked up again.
506 */
507static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
508{
509 if (IS_ROOT(dentry))
510 return 1;
511 if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0
512 || nfs_attribute_timeout(dir))
513 return 0;
514 return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata);
515}
516
517static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
518{
519 dentry->d_fsdata = (void *)verf;
520}
521
522/*
523 * Whenever an NFS operation succeeds, we know that the dentry
524 * is valid, so we update the revalidation timestamp.
525 */
526static inline void nfs_renew_times(struct dentry * dentry)
527{
528 dentry->d_time = jiffies;
529}
530
531static inline
532int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
533{
534 struct nfs_server *server = NFS_SERVER(inode);
535
536 if (nd != NULL) {
537 int ndflags = nd->flags;
538 /* VFS wants an on-the-wire revalidation */
539 if (ndflags & LOOKUP_REVAL)
540 goto out_force;
541 /* This is an open(2) */
542 if ((ndflags & LOOKUP_OPEN) &&
543 !(ndflags & LOOKUP_CONTINUE) &&
544 !(server->flags & NFS_MOUNT_NOCTO))
545 goto out_force;
546 }
547 return nfs_revalidate_inode(server, inode);
548out_force:
549 return __nfs_revalidate_inode(server, inode);
550}
551
552/*
553 * We judge how long we want to trust negative
554 * dentries by looking at the parent inode mtime.
555 *
556 * If parent mtime has changed, we revalidate, else we wait for a
557 * period corresponding to the parent's attribute cache timeout value.
558 */
559static inline
560int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
561 struct nameidata *nd)
562{
563 int ndflags = 0;
564
565 if (nd)
566 ndflags = nd->flags;
567 /* Don't revalidate a negative dentry if we're creating a new file */
568 if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE))
569 return 0;
570 return !nfs_check_verifier(dir, dentry);
571}
572
573/*
574 * This is called every time the dcache has a lookup hit,
575 * and we should check whether we can really trust that
576 * lookup.
577 *
578 * NOTE! The hit can be a negative hit too, don't assume
579 * we have an inode!
580 *
581 * If the parent directory is seen to have changed, we throw out the
582 * cached dentry and do a new lookup.
583 */
584static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
585{
586 struct inode *dir;
587 struct inode *inode;
588 struct dentry *parent;
589 int error;
590 struct nfs_fh fhandle;
591 struct nfs_fattr fattr;
592 unsigned long verifier;
593
594 parent = dget_parent(dentry);
595 lock_kernel();
596 dir = parent->d_inode;
597 inode = dentry->d_inode;
598
599 if (!inode) {
600 if (nfs_neg_need_reval(dir, dentry, nd))
601 goto out_bad;
602 goto out_valid;
603 }
604
605 if (is_bad_inode(inode)) {
606 dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
607 dentry->d_parent->d_name.name, dentry->d_name.name);
608 goto out_bad;
609 }
610
611 /* Revalidate parent directory attribute cache */
612 if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
613 goto out_zap_parent;
614
615 /* Force a full look up iff the parent directory has changed */
616 if (nfs_check_verifier(dir, dentry)) {
617 if (nfs_lookup_verify_inode(inode, nd))
618 goto out_zap_parent;
619 goto out_valid;
620 }
621
622 if (NFS_STALE(inode))
623 goto out_bad;
624
625 verifier = nfs_save_change_attribute(dir);
626 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
627 if (error)
628 goto out_bad;
629 if (nfs_compare_fh(NFS_FH(inode), &fhandle))
630 goto out_bad;
631 if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
632 goto out_bad;
633
634 nfs_renew_times(dentry);
635 nfs_set_verifier(dentry, verifier);
636 out_valid:
637 unlock_kernel();
638 dput(parent);
639 return 1;
640out_zap_parent:
641 nfs_zap_caches(dir);
642 out_bad:
643 NFS_CACHEINV(dir);
644 if (inode && S_ISDIR(inode->i_mode)) {
645 /* Purge readdir caches. */
646 nfs_zap_caches(inode);
647 /* If we have submounts, don't unhash ! */
648 if (have_submounts(dentry))
649 goto out_valid;
650 shrink_dcache_parent(dentry);
651 }
652 d_drop(dentry);
653 unlock_kernel();
654 dput(parent);
655 return 0;
656}
657
658/*
659 * This is called from dput() when d_count is going to 0.
660 */
661static int nfs_dentry_delete(struct dentry *dentry)
662{
663 dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
664 dentry->d_parent->d_name.name, dentry->d_name.name,
665 dentry->d_flags);
666
667 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
668 /* Unhash it, so that ->d_iput() would be called */
669 return 1;
670 }
671 if (!(dentry->d_sb->s_flags & MS_ACTIVE)) {
672 /* Unhash it, so that ancestors of killed async unlink
673 * files will be cleaned up during umount */
674 return 1;
675 }
676 return 0;
677
678}
679
680/*
681 * Called when the dentry loses inode.
682 * We use it to clean up silly-renamed files.
683 */
684static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
685{
686 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
687 lock_kernel();
688 inode->i_nlink--;
689 nfs_complete_unlink(dentry);
690 unlock_kernel();
691 }
692 /* When creating a negative dentry, we want to renew d_time */
693 nfs_renew_times(dentry);
694 iput(inode);
695}
696
697struct dentry_operations nfs_dentry_operations = {
698 .d_revalidate = nfs_lookup_revalidate,
699 .d_delete = nfs_dentry_delete,
700 .d_iput = nfs_dentry_iput,
701};
702
703static inline
704int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
705{
706 if (NFS_PROTO(dir)->version == 2)
707 return 0;
708 if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
709 return 0;
710 return (nd->intent.open.flags & O_EXCL) != 0;
711}
712
713static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
714{
715 struct dentry *res;
716 struct inode *inode = NULL;
717 int error;
718 struct nfs_fh fhandle;
719 struct nfs_fattr fattr;
720
721 dfprintk(VFS, "NFS: lookup(%s/%s)\n",
722 dentry->d_parent->d_name.name, dentry->d_name.name);
723
724 res = ERR_PTR(-ENAMETOOLONG);
725 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
726 goto out;
727
728 res = ERR_PTR(-ENOMEM);
729 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
730
731 lock_kernel();
732 /* Revalidate parent directory attribute cache */
733 error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
734 if (error < 0) {
735 res = ERR_PTR(error);
736 goto out_unlock;
737 }
738
739 /* If we're doing an exclusive create, optimize away the lookup */
740 if (nfs_is_exclusive_create(dir, nd))
741 goto no_entry;
742
743 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
744 if (error == -ENOENT)
745 goto no_entry;
746 if (error < 0) {
747 res = ERR_PTR(error);
748 goto out_unlock;
749 }
750 res = ERR_PTR(-EACCES);
751 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
752 if (!inode)
753 goto out_unlock;
754no_entry:
755 res = d_add_unique(dentry, inode);
756 if (res != NULL)
757 dentry = res;
758 nfs_renew_times(dentry);
759 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
760out_unlock:
761 unlock_kernel();
762out:
763 return res;
764}
765
766#ifdef CONFIG_NFS_V4
767static int nfs_open_revalidate(struct dentry *, struct nameidata *);
768
769struct dentry_operations nfs4_dentry_operations = {
770 .d_revalidate = nfs_open_revalidate,
771 .d_delete = nfs_dentry_delete,
772 .d_iput = nfs_dentry_iput,
773};
774
775static int is_atomic_open(struct inode *dir, struct nameidata *nd)
776{
777 if (!nd)
778 return 0;
779 /* Check that we are indeed trying to open this file */
780 if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN))
781 return 0;
782 /* NFS does not (yet) have a stateful open for directories */
783 if (nd->flags & LOOKUP_DIRECTORY)
784 return 0;
785 /* Are we trying to write to a read only partition? */
786 if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
787 return 0;
788 return 1;
789}
790
791static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
792{
793 struct dentry *res = NULL;
794 struct inode *inode = NULL;
795 int error;
796
797 /* Check that we are indeed trying to open this file */
798 if (!is_atomic_open(dir, nd))
799 goto no_open;
800
801 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
802 res = ERR_PTR(-ENAMETOOLONG);
803 goto out;
804 }
805 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
806
807 /* Let vfs_create() deal with O_EXCL */
808 if (nd->intent.open.flags & O_EXCL)
809 goto no_entry;
810
811 /* Open the file on the server */
812 lock_kernel();
813 /* Revalidate parent directory attribute cache */
814 error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
815 if (error < 0) {
816 res = ERR_PTR(error);
817 goto out;
818 }
819
820 if (nd->intent.open.flags & O_CREAT) {
821 nfs_begin_data_update(dir);
822 inode = nfs4_atomic_open(dir, dentry, nd);
823 nfs_end_data_update(dir);
824 } else
825 inode = nfs4_atomic_open(dir, dentry, nd);
826 unlock_kernel();
827 if (IS_ERR(inode)) {
828 error = PTR_ERR(inode);
829 switch (error) {
830 /* Make a negative dentry */
831 case -ENOENT:
832 inode = NULL;
833 break;
834 /* This turned out not to be a regular file */
835 case -ELOOP:
836 if (!(nd->intent.open.flags & O_NOFOLLOW))
837 goto no_open;
838 /* case -EISDIR: */
839 /* case -EINVAL: */
840 default:
841 res = ERR_PTR(error);
842 goto out;
843 }
844 }
845no_entry:
846 res = d_add_unique(dentry, inode);
847 if (res != NULL)
848 dentry = res;
849 nfs_renew_times(dentry);
850 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
851out:
852 return res;
853no_open:
854 return nfs_lookup(dir, dentry, nd);
855}
856
857static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
858{
859 struct dentry *parent = NULL;
860 struct inode *inode = dentry->d_inode;
861 struct inode *dir;
862 unsigned long verifier;
863 int openflags, ret = 0;
864
865 parent = dget_parent(dentry);
866 dir = parent->d_inode;
867 if (!is_atomic_open(dir, nd))
868 goto no_open;
869 /* We can't create new files in nfs_open_revalidate(), so we
870 * optimize away revalidation of negative dentries.
871 */
872 if (inode == NULL)
873 goto out;
874 /* NFS only supports OPEN on regular files */
875 if (!S_ISREG(inode->i_mode))
876 goto no_open;
877 openflags = nd->intent.open.flags;
878 /* We cannot do exclusive creation on a positive dentry */
879 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
880 goto no_open;
881 /* We can't create new files, or truncate existing ones here */
882 openflags &= ~(O_CREAT|O_TRUNC);
883
884 /*
885 * Note: we're not holding inode->i_sem and so may be racing with
886 * operations that change the directory. We therefore save the
887 * change attribute *before* we do the RPC call.
888 */
889 lock_kernel();
890 verifier = nfs_save_change_attribute(dir);
891 ret = nfs4_open_revalidate(dir, dentry, openflags);
892 if (!ret)
893 nfs_set_verifier(dentry, verifier);
894 unlock_kernel();
895out:
896 dput(parent);
897 if (!ret)
898 d_drop(dentry);
899 return ret;
900no_open:
901 dput(parent);
902 if (inode != NULL && nfs_have_delegation(inode, FMODE_READ))
903 return 1;
904 return nfs_lookup_revalidate(dentry, nd);
905}
906#endif /* CONFIG_NFSV4 */
907
908static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
909{
910 struct dentry *parent = desc->file->f_dentry;
911 struct inode *dir = parent->d_inode;
912 struct nfs_entry *entry = desc->entry;
913 struct dentry *dentry, *alias;
914 struct qstr name = {
915 .name = entry->name,
916 .len = entry->len,
917 };
918 struct inode *inode;
919
920 switch (name.len) {
921 case 2:
922 if (name.name[0] == '.' && name.name[1] == '.')
923 return dget_parent(parent);
924 break;
925 case 1:
926 if (name.name[0] == '.')
927 return dget(parent);
928 }
929 name.hash = full_name_hash(name.name, name.len);
930 dentry = d_lookup(parent, &name);
931 if (dentry != NULL)
932 return dentry;
933 if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
934 return NULL;
935 /* Note: caller is already holding the dir->i_sem! */
936 dentry = d_alloc(parent, &name);
937 if (dentry == NULL)
938 return NULL;
939 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
940 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
941 if (!inode) {
942 dput(dentry);
943 return NULL;
944 }
945 alias = d_add_unique(dentry, inode);
946 if (alias != NULL) {
947 dput(dentry);
948 dentry = alias;
949 }
950 nfs_renew_times(dentry);
951 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
952 return dentry;
953}
954
955/*
956 * Code common to create, mkdir, and mknod.
957 */
958int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
959 struct nfs_fattr *fattr)
960{
961 struct inode *inode;
962 int error = -EACCES;
963
964 /* We may have been initialized further down */
965 if (dentry->d_inode)
966 return 0;
967 if (fhandle->size == 0) {
968 struct inode *dir = dentry->d_parent->d_inode;
969 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
970 if (error)
971 goto out_err;
972 }
973 if (!(fattr->valid & NFS_ATTR_FATTR)) {
974 struct nfs_server *server = NFS_SB(dentry->d_sb);
975 error = server->rpc_ops->getattr(server, fhandle, fattr);
976 if (error < 0)
977 goto out_err;
978 }
979 error = -ENOMEM;
980 inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
981 if (inode == NULL)
982 goto out_err;
983 d_instantiate(dentry, inode);
984 return 0;
985out_err:
986 d_drop(dentry);
987 return error;
988}
989
990/*
991 * Following a failed create operation, we drop the dentry rather
992 * than retain a negative dentry. This avoids a problem in the event
993 * that the operation succeeded on the server, but an error in the
994 * reply path made it appear to have failed.
995 */
996static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
997 struct nameidata *nd)
998{
999 struct iattr attr;
1000 int error;
1001 int open_flags = 0;
1002
1003 dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id,
1004 dir->i_ino, dentry->d_name.name);
1005
1006 attr.ia_mode = mode;
1007 attr.ia_valid = ATTR_MODE;
1008
1009 if (nd && (nd->flags & LOOKUP_CREATE))
1010 open_flags = nd->intent.open.flags;
1011
1012 lock_kernel();
1013 nfs_begin_data_update(dir);
1014 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
1015 nfs_end_data_update(dir);
1016 if (error != 0)
1017 goto out_err;
1018 nfs_renew_times(dentry);
1019 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1020 unlock_kernel();
1021 return 0;
1022out_err:
1023 unlock_kernel();
1024 d_drop(dentry);
1025 return error;
1026}
1027
1028/*
1029 * See comments for nfs_proc_create regarding failed operations.
1030 */
1031static int
1032nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1033{
1034 struct iattr attr;
1035 int status;
1036
1037 dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
1038 dir->i_ino, dentry->d_name.name);
1039
1040 if (!new_valid_dev(rdev))
1041 return -EINVAL;
1042
1043 attr.ia_mode = mode;
1044 attr.ia_valid = ATTR_MODE;
1045
1046 lock_kernel();
1047 nfs_begin_data_update(dir);
1048 status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
1049 nfs_end_data_update(dir);
1050 if (status != 0)
1051 goto out_err;
1052 nfs_renew_times(dentry);
1053 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1054 unlock_kernel();
1055 return 0;
1056out_err:
1057 unlock_kernel();
1058 d_drop(dentry);
1059 return status;
1060}
1061
1062/*
1063 * See comments for nfs_proc_create regarding failed operations.
1064 */
1065static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1066{
1067 struct iattr attr;
1068 int error;
1069
1070 dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
1071 dir->i_ino, dentry->d_name.name);
1072
1073 attr.ia_valid = ATTR_MODE;
1074 attr.ia_mode = mode | S_IFDIR;
1075
1076 lock_kernel();
1077 nfs_begin_data_update(dir);
1078 error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
1079 nfs_end_data_update(dir);
1080 if (error != 0)
1081 goto out_err;
1082 nfs_renew_times(dentry);
1083 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1084 unlock_kernel();
1085 return 0;
1086out_err:
1087 d_drop(dentry);
1088 unlock_kernel();
1089 return error;
1090}
1091
1092static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1093{
1094 int error;
1095
1096 dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
1097 dir->i_ino, dentry->d_name.name);
1098
1099 lock_kernel();
1100 nfs_begin_data_update(dir);
1101 error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
1102 /* Ensure the VFS deletes this inode */
1103 if (error == 0 && dentry->d_inode != NULL)
1104 dentry->d_inode->i_nlink = 0;
1105 nfs_end_data_update(dir);
1106 unlock_kernel();
1107
1108 return error;
1109}
1110
1111static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
1112{
1113 static unsigned int sillycounter;
1114 const int i_inosize = sizeof(dir->i_ino)*2;
1115 const int countersize = sizeof(sillycounter)*2;
1116 const int slen = sizeof(".nfs") + i_inosize + countersize - 1;
1117 char silly[slen+1];
1118 struct qstr qsilly;
1119 struct dentry *sdentry;
1120 int error = -EIO;
1121
1122 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
1123 dentry->d_parent->d_name.name, dentry->d_name.name,
1124 atomic_read(&dentry->d_count));
1125
1126#ifdef NFS_PARANOIA
1127if (!dentry->d_inode)
1128printk("NFS: silly-renaming %s/%s, negative dentry??\n",
1129dentry->d_parent->d_name.name, dentry->d_name.name);
1130#endif
1131 /*
1132 * We don't allow a dentry to be silly-renamed twice.
1133 */
1134 error = -EBUSY;
1135 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1136 goto out;
1137
1138 sprintf(silly, ".nfs%*.*lx",
1139 i_inosize, i_inosize, dentry->d_inode->i_ino);
1140
1141 sdentry = NULL;
1142 do {
1143 char *suffix = silly + slen - countersize;
1144
1145 dput(sdentry);
1146 sillycounter++;
1147 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
1148
1149 dfprintk(VFS, "trying to rename %s to %s\n",
1150 dentry->d_name.name, silly);
1151
1152 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
1153 /*
1154 * N.B. Better to return EBUSY here ... it could be
1155 * dangerous to delete the file while it's in use.
1156 */
1157 if (IS_ERR(sdentry))
1158 goto out;
1159 } while(sdentry->d_inode != NULL); /* need negative lookup */
1160
1161 qsilly.name = silly;
1162 qsilly.len = strlen(silly);
1163 nfs_begin_data_update(dir);
1164 if (dentry->d_inode) {
1165 nfs_begin_data_update(dentry->d_inode);
1166 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1167 dir, &qsilly);
1168 nfs_end_data_update(dentry->d_inode);
1169 } else
1170 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1171 dir, &qsilly);
1172 nfs_end_data_update(dir);
1173 if (!error) {
1174 nfs_renew_times(dentry);
1175 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1176 d_move(dentry, sdentry);
1177 error = nfs_async_unlink(dentry);
1178 /* If we return 0 we don't unlink */
1179 }
1180 dput(sdentry);
1181out:
1182 return error;
1183}
1184
1185/*
1186 * Remove a file after making sure there are no pending writes,
1187 * and after checking that the file has only one user.
1188 *
1189 * We invalidate the attribute cache and free the inode prior to the operation
1190 * to avoid possible races if the server reuses the inode.
1191 */
1192static int nfs_safe_remove(struct dentry *dentry)
1193{
1194 struct inode *dir = dentry->d_parent->d_inode;
1195 struct inode *inode = dentry->d_inode;
1196 int error = -EBUSY;
1197
1198 dfprintk(VFS, "NFS: safe_remove(%s/%s)\n",
1199 dentry->d_parent->d_name.name, dentry->d_name.name);
1200
1201 /* If the dentry was sillyrenamed, we simply call d_delete() */
1202 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1203 error = 0;
1204 goto out;
1205 }
1206
1207 nfs_begin_data_update(dir);
1208 if (inode != NULL) {
1209 nfs_begin_data_update(inode);
1210 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
1211 /* The VFS may want to delete this inode */
1212 if (error == 0)
1213 inode->i_nlink--;
1214 nfs_end_data_update(inode);
1215 } else
1216 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
1217 nfs_end_data_update(dir);
1218out:
1219 return error;
1220}
1221
1222/* We do silly rename. In case sillyrename() returns -EBUSY, the inode
1223 * belongs to an active ".nfs..." file and we return -EBUSY.
1224 *
1225 * If sillyrename() returns 0, we do nothing, otherwise we unlink.
1226 */
1227static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1228{
1229 int error;
1230 int need_rehash = 0;
1231
1232 dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
1233 dir->i_ino, dentry->d_name.name);
1234
1235 lock_kernel();
1236 spin_lock(&dcache_lock);
1237 spin_lock(&dentry->d_lock);
1238 if (atomic_read(&dentry->d_count) > 1) {
1239 spin_unlock(&dentry->d_lock);
1240 spin_unlock(&dcache_lock);
1241 error = nfs_sillyrename(dir, dentry);
1242 unlock_kernel();
1243 return error;
1244 }
1245 if (!d_unhashed(dentry)) {
1246 __d_drop(dentry);
1247 need_rehash = 1;
1248 }
1249 spin_unlock(&dentry->d_lock);
1250 spin_unlock(&dcache_lock);
1251 error = nfs_safe_remove(dentry);
1252 if (!error) {
1253 nfs_renew_times(dentry);
1254 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1255 } else if (need_rehash)
1256 d_rehash(dentry);
1257 unlock_kernel();
1258 return error;
1259}
1260
1261static int
1262nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1263{
1264 struct iattr attr;
1265 struct nfs_fattr sym_attr;
1266 struct nfs_fh sym_fh;
1267 struct qstr qsymname;
1268 int error;
1269
1270 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
1271 dir->i_ino, dentry->d_name.name, symname);
1272
1273#ifdef NFS_PARANOIA
1274if (dentry->d_inode)
1275printk("nfs_proc_symlink: %s/%s not negative!\n",
1276dentry->d_parent->d_name.name, dentry->d_name.name);
1277#endif
1278 /*
1279 * Fill in the sattr for the call.
1280 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
1281 */
1282 attr.ia_valid = ATTR_MODE;
1283 attr.ia_mode = S_IFLNK | S_IRWXUGO;
1284
1285 qsymname.name = symname;
1286 qsymname.len = strlen(symname);
1287
1288 lock_kernel();
1289 nfs_begin_data_update(dir);
1290 error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
1291 &attr, &sym_fh, &sym_attr);
1292 nfs_end_data_update(dir);
1293 if (!error) {
1294 error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
1295 } else {
1296 if (error == -EEXIST)
1297 printk("nfs_proc_symlink: %s/%s already exists??\n",
1298 dentry->d_parent->d_name.name, dentry->d_name.name);
1299 d_drop(dentry);
1300 }
1301 unlock_kernel();
1302 return error;
1303}
1304
1305static int
1306nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1307{
1308 struct inode *inode = old_dentry->d_inode;
1309 int error;
1310
1311 dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
1312 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1313 dentry->d_parent->d_name.name, dentry->d_name.name);
1314
1315 /*
1316 * Drop the dentry in advance to force a new lookup.
1317 * Since nfs_proc_link doesn't return a file handle,
1318 * we can't use the existing dentry.
1319 */
1320 lock_kernel();
1321 d_drop(dentry);
1322
1323 nfs_begin_data_update(dir);
1324 nfs_begin_data_update(inode);
1325 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
1326 nfs_end_data_update(inode);
1327 nfs_end_data_update(dir);
1328 unlock_kernel();
1329 return error;
1330}
1331
1332/*
1333 * RENAME
1334 * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
1335 * different file handle for the same inode after a rename (e.g. when
1336 * moving to a different directory). A fail-safe method to do so would
1337 * be to look up old_dir/old_name, create a link to new_dir/new_name and
1338 * rename the old file using the sillyrename stuff. This way, the original
1339 * file in old_dir will go away when the last process iput()s the inode.
1340 *
1341 * FIXED.
1342 *
1343 * It actually works quite well. One needs to have the possibility for
1344 * at least one ".nfs..." file in each directory the file ever gets
1345 * moved or linked to which happens automagically with the new
1346 * implementation that only depends on the dcache stuff instead of
1347 * using the inode layer
1348 *
1349 * Unfortunately, things are a little more complicated than indicated
1350 * above. For a cross-directory move, we want to make sure we can get
1351 * rid of the old inode after the operation. This means there must be
1352 * no pending writes (if it's a file), and the use count must be 1.
1353 * If these conditions are met, we can drop the dentries before doing
1354 * the rename.
1355 */
1356static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1357 struct inode *new_dir, struct dentry *new_dentry)
1358{
1359 struct inode *old_inode = old_dentry->d_inode;
1360 struct inode *new_inode = new_dentry->d_inode;
1361 struct dentry *dentry = NULL, *rehash = NULL;
1362 int error = -EBUSY;
1363
1364 /*
1365 * To prevent any new references to the target during the rename,
1366 * we unhash the dentry and free the inode in advance.
1367 */
1368 lock_kernel();
1369 if (!d_unhashed(new_dentry)) {
1370 d_drop(new_dentry);
1371 rehash = new_dentry;
1372 }
1373
1374 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
1375 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1376 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
1377 atomic_read(&new_dentry->d_count));
1378
1379 /*
1380 * First check whether the target is busy ... we can't
1381 * safely do _any_ rename if the target is in use.
1382 *
1383 * For files, make a copy of the dentry and then do a
1384 * silly-rename. If the silly-rename succeeds, the
1385 * copied dentry is hashed and becomes the new target.
1386 */
1387 if (!new_inode)
1388 goto go_ahead;
1389 if (S_ISDIR(new_inode->i_mode))
1390 goto out;
1391 else if (atomic_read(&new_dentry->d_count) > 2) {
1392 int err;
1393 /* copy the target dentry's name */
1394 dentry = d_alloc(new_dentry->d_parent,
1395 &new_dentry->d_name);
1396 if (!dentry)
1397 goto out;
1398
1399 /* silly-rename the existing target ... */
1400 err = nfs_sillyrename(new_dir, new_dentry);
1401 if (!err) {
1402 new_dentry = rehash = dentry;
1403 new_inode = NULL;
1404 /* instantiate the replacement target */
1405 d_instantiate(new_dentry, NULL);
1406 } else if (atomic_read(&new_dentry->d_count) > 1) {
1407 /* dentry still busy? */
1408#ifdef NFS_PARANOIA
1409 printk("nfs_rename: target %s/%s busy, d_count=%d\n",
1410 new_dentry->d_parent->d_name.name,
1411 new_dentry->d_name.name,
1412 atomic_read(&new_dentry->d_count));
1413#endif
1414 goto out;
1415 }
1416 }
1417
1418go_ahead:
1419 /*
1420 * ... prune child dentries and writebacks if needed.
1421 */
1422 if (atomic_read(&old_dentry->d_count) > 1) {
1423 nfs_wb_all(old_inode);
1424 shrink_dcache_parent(old_dentry);
1425 }
1426
1427 if (new_inode)
1428 d_delete(new_dentry);
1429
1430 nfs_begin_data_update(old_dir);
1431 nfs_begin_data_update(new_dir);
1432 nfs_begin_data_update(old_inode);
1433 error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
1434 new_dir, &new_dentry->d_name);
1435 nfs_end_data_update(old_inode);
1436 nfs_end_data_update(new_dir);
1437 nfs_end_data_update(old_dir);
1438out:
1439 if (rehash)
1440 d_rehash(rehash);
1441 if (!error) {
1442 if (!S_ISDIR(old_inode->i_mode))
1443 d_move(old_dentry, new_dentry);
1444 nfs_renew_times(new_dentry);
1445 nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
1446 }
1447
1448 /* new dentry created? */
1449 if (dentry)
1450 dput(dentry);
1451 unlock_kernel();
1452 return error;
1453}
1454
1455int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
1456{
1457 struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
1458
1459 if (cache->cred != cred
1460 || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
1461 || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
1462 return -ENOENT;
1463 memcpy(res, cache, sizeof(*res));
1464 return 0;
1465}
1466
1467void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
1468{
1469 struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
1470
1471 if (cache->cred != set->cred) {
1472 if (cache->cred)
1473 put_rpccred(cache->cred);
1474 cache->cred = get_rpccred(set->cred);
1475 }
1476 NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
1477 cache->jiffies = set->jiffies;
1478 cache->mask = set->mask;
1479}
1480
1481static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
1482{
1483 struct nfs_access_entry cache;
1484 int status;
1485
1486 status = nfs_access_get_cached(inode, cred, &cache);
1487 if (status == 0)
1488 goto out;
1489
1490 /* Be clever: ask server to check for all possible rights */
1491 cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
1492 cache.cred = cred;
1493 cache.jiffies = jiffies;
1494 status = NFS_PROTO(inode)->access(inode, &cache);
1495 if (status != 0)
1496 return status;
1497 nfs_access_add_cache(inode, &cache);
1498out:
1499 if ((cache.mask & mask) == mask)
1500 return 0;
1501 return -EACCES;
1502}
1503
1504int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
1505{
1506 struct rpc_cred *cred;
1507 int res = 0;
1508
1509 if (mask == 0)
1510 goto out;
1511 /* Is this sys_access() ? */
1512 if (nd != NULL && (nd->flags & LOOKUP_ACCESS))
1513 goto force_lookup;
1514
1515 switch (inode->i_mode & S_IFMT) {
1516 case S_IFLNK:
1517 goto out;
1518 case S_IFREG:
1519 /* NFSv4 has atomic_open... */
1520 if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
1521 && nd != NULL
1522 && (nd->flags & LOOKUP_OPEN))
1523 goto out;
1524 break;
1525 case S_IFDIR:
1526 /*
1527 * Optimize away all write operations, since the server
1528 * will check permissions when we perform the op.
1529 */
1530 if ((mask & MAY_WRITE) && !(mask & MAY_READ))
1531 goto out;
1532 }
1533
1534force_lookup:
1535 lock_kernel();
1536
1537 if (!NFS_PROTO(inode)->access)
1538 goto out_notsup;
1539
1540 cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
1541 if (!IS_ERR(cred)) {
1542 res = nfs_do_access(inode, cred, mask);
1543 put_rpccred(cred);
1544 } else
1545 res = PTR_ERR(cred);
1546 unlock_kernel();
1547out:
1548 return res;
1549out_notsup:
1550 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
1551 if (res == 0)
1552 res = generic_permission(inode, mask, NULL);
1553 unlock_kernel();
1554 return res;
1555}
1556
1557/*
1558 * Local variables:
1559 * version-control: t
1560 * kept-new-versions: 5
1561 * End:
1562 */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
new file mode 100644
index 000000000000..68df803f27ca
--- /dev/null
+++ b/fs/nfs/direct.c
@@ -0,0 +1,808 @@
1/*
2 * linux/fs/nfs/direct.c
3 *
4 * Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
5 *
6 * High-performance uncached I/O for the Linux NFS client
7 *
8 * There are important applications whose performance or correctness
9 * depends on uncached access to file data. Database clusters
10 * (multiple copies of the same instance running on separate hosts)
11 * implement their own cache coherency protocol that subsumes file
12 * system cache protocols. Applications that process datasets
13 * considerably larger than the client's memory do not always benefit
14 * from a local cache. A streaming video server, for instance, has no
15 * need to cache the contents of a file.
16 *
17 * When an application requests uncached I/O, all read and write requests
18 * are made directly to the server; data stored or fetched via these
19 * requests is not cached in the Linux page cache. The client does not
20 * correct unaligned requests from applications. All requested bytes are
21 * held on permanent storage before a direct write system call returns to
22 * an application.
23 *
24 * Solaris implements an uncached I/O facility called directio() that
25 * is used for backups and sequential I/O to very large files. Solaris
26 * also supports uncaching whole NFS partitions with "-o forcedirectio,"
27 * an undocumented mount option.
28 *
29 * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
30 * help from Andrew Morton.
31 *
32 * 18 Dec 2001 Initial implementation for 2.4 --cel
33 * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
34 * 08 Jun 2003 Port to 2.5 APIs --cel
35 * 31 Mar 2004 Handle direct I/O without VFS support --cel
36 * 15 Sep 2004 Parallel async reads --cel
37 *
38 */
39
40#include <linux/config.h>
41#include <linux/errno.h>
42#include <linux/sched.h>
43#include <linux/kernel.h>
44#include <linux/smp_lock.h>
45#include <linux/file.h>
46#include <linux/pagemap.h>
47#include <linux/kref.h>
48
49#include <linux/nfs_fs.h>
50#include <linux/nfs_page.h>
51#include <linux/sunrpc/clnt.h>
52
53#include <asm/system.h>
54#include <asm/uaccess.h>
55#include <asm/atomic.h>
56
57#define NFSDBG_FACILITY NFSDBG_VFS
58#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
59
60static kmem_cache_t *nfs_direct_cachep;
61
62/*
63 * This represents a set of asynchronous requests that we're waiting on
64 */
65struct nfs_direct_req {
66 struct kref kref; /* release manager */
67 struct list_head list; /* nfs_read_data structs */
68 wait_queue_head_t wait; /* wait for i/o completion */
69 struct page ** pages; /* pages in our buffer */
70 unsigned int npages; /* count of pages */
71 atomic_t complete, /* i/os we're waiting for */
72 count, /* bytes actually processed */
73 error; /* any reported error */
74};
75
76
77/**
78 * nfs_get_user_pages - find and set up pages underlying user's buffer
79 * rw: direction (read or write)
80 * user_addr: starting address of this segment of user's buffer
81 * count: size of this segment
82 * @pages: returned array of page struct pointers underlying user's buffer
83 */
84static inline int
85nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
86 struct page ***pages)
87{
88 int result = -ENOMEM;
89 unsigned long page_count;
90 size_t array_size;
91
92 /* set an arbitrary limit to prevent type overflow */
93 /* XXX: this can probably be as large as INT_MAX */
94 if (size > MAX_DIRECTIO_SIZE) {
95 *pages = NULL;
96 return -EFBIG;
97 }
98
99 page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
100 page_count -= user_addr >> PAGE_SHIFT;
101
102 array_size = (page_count * sizeof(struct page *));
103 *pages = kmalloc(array_size, GFP_KERNEL);
104 if (*pages) {
105 down_read(&current->mm->mmap_sem);
106 result = get_user_pages(current, current->mm, user_addr,
107 page_count, (rw == READ), 0,
108 *pages, NULL);
109 up_read(&current->mm->mmap_sem);
110 }
111 return result;
112}
113
114/**
115 * nfs_free_user_pages - tear down page struct array
116 * @pages: array of page struct pointers underlying target buffer
117 * @npages: number of pages in the array
118 * @do_dirty: dirty the pages as we release them
119 */
120static void
121nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
122{
123 int i;
124 for (i = 0; i < npages; i++) {
125 if (do_dirty)
126 set_page_dirty_lock(pages[i]);
127 page_cache_release(pages[i]);
128 }
129 kfree(pages);
130}
131
132/**
133 * nfs_direct_req_release - release nfs_direct_req structure for direct read
134 * @kref: kref object embedded in an nfs_direct_req structure
135 *
136 */
137static void nfs_direct_req_release(struct kref *kref)
138{
139 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
140 kmem_cache_free(nfs_direct_cachep, dreq);
141}
142
143/**
144 * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
145 * @count: count of bytes for the read request
146 * @rsize: local rsize setting
147 *
148 * Note we also set the number of requests we have in the dreq when we are
149 * done. This prevents races with I/O completion so we will always wait
150 * until all requests have been dispatched and completed.
151 */
152static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
153{
154 struct list_head *list;
155 struct nfs_direct_req *dreq;
156 unsigned int reads = 0;
157
158 dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
159 if (!dreq)
160 return NULL;
161
162 kref_init(&dreq->kref);
163 init_waitqueue_head(&dreq->wait);
164 INIT_LIST_HEAD(&dreq->list);
165 atomic_set(&dreq->count, 0);
166 atomic_set(&dreq->error, 0);
167
168 list = &dreq->list;
169 for(;;) {
170 struct nfs_read_data *data = nfs_readdata_alloc();
171
172 if (unlikely(!data)) {
173 while (!list_empty(list)) {
174 data = list_entry(list->next,
175 struct nfs_read_data, pages);
176 list_del(&data->pages);
177 nfs_readdata_free(data);
178 }
179 kref_put(&dreq->kref, nfs_direct_req_release);
180 return NULL;
181 }
182
183 INIT_LIST_HEAD(&data->pages);
184 list_add(&data->pages, list);
185
186 data->req = (struct nfs_page *) dreq;
187 reads++;
188 if (nbytes <= rsize)
189 break;
190 nbytes -= rsize;
191 }
192 kref_get(&dreq->kref);
193 atomic_set(&dreq->complete, reads);
194 return dreq;
195}
196
197/**
198 * nfs_direct_read_result - handle a read reply for a direct read request
199 * @data: address of NFS READ operation control block
200 * @status: status of this NFS READ operation
201 *
202 * We must hold a reference to all the pages in this direct read request
203 * until the RPCs complete. This could be long *after* we are woken up in
204 * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
205 */
206static void nfs_direct_read_result(struct nfs_read_data *data, int status)
207{
208 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
209
210 if (likely(status >= 0))
211 atomic_add(data->res.count, &dreq->count);
212 else
213 atomic_set(&dreq->error, status);
214
215 if (unlikely(atomic_dec_and_test(&dreq->complete))) {
216 nfs_free_user_pages(dreq->pages, dreq->npages, 1);
217 wake_up(&dreq->wait);
218 kref_put(&dreq->kref, nfs_direct_req_release);
219 }
220}
221
222/**
223 * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
224 * @dreq: address of nfs_direct_req struct for this request
225 * @inode: target inode
226 * @ctx: target file open context
227 * @user_addr: starting address of this segment of user's buffer
228 * @count: size of this segment
229 * @file_offset: offset in file to begin the operation
230 *
231 * For each nfs_read_data struct that was allocated on the list, dispatch
232 * an NFS READ operation
233 */
234static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
235 struct inode *inode, struct nfs_open_context *ctx,
236 unsigned long user_addr, size_t count, loff_t file_offset)
237{
238 struct list_head *list = &dreq->list;
239 struct page **pages = dreq->pages;
240 unsigned int curpage, pgbase;
241 unsigned int rsize = NFS_SERVER(inode)->rsize;
242
243 curpage = 0;
244 pgbase = user_addr & ~PAGE_MASK;
245 do {
246 struct nfs_read_data *data;
247 unsigned int bytes;
248
249 bytes = rsize;
250 if (count < rsize)
251 bytes = count;
252
253 data = list_entry(list->next, struct nfs_read_data, pages);
254 list_del_init(&data->pages);
255
256 data->inode = inode;
257 data->cred = ctx->cred;
258 data->args.fh = NFS_FH(inode);
259 data->args.context = ctx;
260 data->args.offset = file_offset;
261 data->args.pgbase = pgbase;
262 data->args.pages = &pages[curpage];
263 data->args.count = bytes;
264 data->res.fattr = &data->fattr;
265 data->res.eof = 0;
266 data->res.count = bytes;
267
268 NFS_PROTO(inode)->read_setup(data);
269
270 data->task.tk_cookie = (unsigned long) inode;
271 data->task.tk_calldata = data;
272 data->task.tk_release = nfs_readdata_release;
273 data->complete = nfs_direct_read_result;
274
275 lock_kernel();
276 rpc_execute(&data->task);
277 unlock_kernel();
278
279 dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
280 data->task.tk_pid,
281 inode->i_sb->s_id,
282 (long long)NFS_FILEID(inode),
283 bytes,
284 (unsigned long long)data->args.offset);
285
286 file_offset += bytes;
287 pgbase += bytes;
288 curpage += pgbase >> PAGE_SHIFT;
289 pgbase &= ~PAGE_MASK;
290
291 count -= bytes;
292 } while (count != 0);
293}
294
295/**
296 * nfs_direct_read_wait - wait for I/O completion for direct reads
297 * @dreq: request on which we are to wait
298 * @intr: whether or not this wait can be interrupted
299 *
300 * Collects and returns the final error value/byte-count.
301 */
302static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
303{
304 int result = 0;
305
306 if (intr) {
307 result = wait_event_interruptible(dreq->wait,
308 (atomic_read(&dreq->complete) == 0));
309 } else {
310 wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
311 }
312
313 if (!result)
314 result = atomic_read(&dreq->error);
315 if (!result)
316 result = atomic_read(&dreq->count);
317
318 kref_put(&dreq->kref, nfs_direct_req_release);
319 return (ssize_t) result;
320}
321
322/**
323 * nfs_direct_read_seg - Read in one iov segment. Generate separate
324 * read RPCs for each "rsize" bytes.
325 * @inode: target inode
326 * @ctx: target file open context
327 * @user_addr: starting address of this segment of user's buffer
328 * @count: size of this segment
329 * @file_offset: offset in file to begin the operation
330 * @pages: array of addresses of page structs defining user's buffer
331 * @nr_pages: number of pages in the array
332 *
333 */
334static ssize_t nfs_direct_read_seg(struct inode *inode,
335 struct nfs_open_context *ctx, unsigned long user_addr,
336 size_t count, loff_t file_offset, struct page **pages,
337 unsigned int nr_pages)
338{
339 ssize_t result;
340 sigset_t oldset;
341 struct rpc_clnt *clnt = NFS_CLIENT(inode);
342 struct nfs_direct_req *dreq;
343
344 dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
345 if (!dreq)
346 return -ENOMEM;
347
348 dreq->pages = pages;
349 dreq->npages = nr_pages;
350
351 rpc_clnt_sigmask(clnt, &oldset);
352 nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
353 file_offset);
354 result = nfs_direct_read_wait(dreq, clnt->cl_intr);
355 rpc_clnt_sigunmask(clnt, &oldset);
356
357 return result;
358}
359
360/**
361 * nfs_direct_read - For each iov segment, map the user's buffer
362 * then generate read RPCs.
363 * @inode: target inode
364 * @ctx: target file open context
365 * @iov: array of vectors that define I/O buffer
366 * file_offset: offset in file to begin the operation
367 * nr_segs: size of iovec array
368 *
369 * We've already pushed out any non-direct writes so that this read
370 * will see them when we read from the server.
371 */
372static ssize_t
373nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
374 const struct iovec *iov, loff_t file_offset,
375 unsigned long nr_segs)
376{
377 ssize_t tot_bytes = 0;
378 unsigned long seg = 0;
379
380 while ((seg < nr_segs) && (tot_bytes >= 0)) {
381 ssize_t result;
382 int page_count;
383 struct page **pages;
384 const struct iovec *vec = &iov[seg++];
385 unsigned long user_addr = (unsigned long) vec->iov_base;
386 size_t size = vec->iov_len;
387
388 page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
389 if (page_count < 0) {
390 nfs_free_user_pages(pages, 0, 0);
391 if (tot_bytes > 0)
392 break;
393 return page_count;
394 }
395
396 result = nfs_direct_read_seg(inode, ctx, user_addr, size,
397 file_offset, pages, page_count);
398
399 if (result <= 0) {
400 if (tot_bytes > 0)
401 break;
402 return result;
403 }
404 tot_bytes += result;
405 file_offset += result;
406 if (result < size)
407 break;
408 }
409
410 return tot_bytes;
411}
412
413/**
414 * nfs_direct_write_seg - Write out one iov segment. Generate separate
415 * write RPCs for each "wsize" bytes, then commit.
416 * @inode: target inode
417 * @ctx: target file open context
418 * user_addr: starting address of this segment of user's buffer
419 * count: size of this segment
420 * file_offset: offset in file to begin the operation
421 * @pages: array of addresses of page structs defining user's buffer
422 * nr_pages: size of pages array
423 */
424static ssize_t nfs_direct_write_seg(struct inode *inode,
425 struct nfs_open_context *ctx, unsigned long user_addr,
426 size_t count, loff_t file_offset, struct page **pages,
427 int nr_pages)
428{
429 const unsigned int wsize = NFS_SERVER(inode)->wsize;
430 size_t request;
431 int curpage, need_commit;
432 ssize_t result, tot_bytes;
433 struct nfs_writeverf first_verf;
434 struct nfs_write_data *wdata;
435
436 wdata = nfs_writedata_alloc();
437 if (!wdata)
438 return -ENOMEM;
439
440 wdata->inode = inode;
441 wdata->cred = ctx->cred;
442 wdata->args.fh = NFS_FH(inode);
443 wdata->args.context = ctx;
444 wdata->args.stable = NFS_UNSTABLE;
445 if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
446 wdata->args.stable = NFS_FILE_SYNC;
447 wdata->res.fattr = &wdata->fattr;
448 wdata->res.verf = &wdata->verf;
449
450 nfs_begin_data_update(inode);
451retry:
452 need_commit = 0;
453 tot_bytes = 0;
454 curpage = 0;
455 request = count;
456 wdata->args.pgbase = user_addr & ~PAGE_MASK;
457 wdata->args.offset = file_offset;
458 do {
459 wdata->args.count = request;
460 if (wdata->args.count > wsize)
461 wdata->args.count = wsize;
462 wdata->args.pages = &pages[curpage];
463
464 dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
465 wdata->args.count, (long long) wdata->args.offset,
466 user_addr + tot_bytes, wdata->args.pgbase, curpage);
467
468 lock_kernel();
469 result = NFS_PROTO(inode)->write(wdata);
470 unlock_kernel();
471
472 if (result <= 0) {
473 if (tot_bytes > 0)
474 break;
475 goto out;
476 }
477
478 if (tot_bytes == 0)
479 memcpy(&first_verf.verifier, &wdata->verf.verifier,
480 sizeof(first_verf.verifier));
481 if (wdata->verf.committed != NFS_FILE_SYNC) {
482 need_commit = 1;
483 if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
484 sizeof(first_verf.verifier)));
485 goto sync_retry;
486 }
487
488 tot_bytes += result;
489
490 /* in case of a short write: stop now, let the app recover */
491 if (result < wdata->args.count)
492 break;
493
494 wdata->args.offset += result;
495 wdata->args.pgbase += result;
496 curpage += wdata->args.pgbase >> PAGE_SHIFT;
497 wdata->args.pgbase &= ~PAGE_MASK;
498 request -= result;
499 } while (request != 0);
500
501 /*
502 * Commit data written so far, even in the event of an error
503 */
504 if (need_commit) {
505 wdata->args.count = tot_bytes;
506 wdata->args.offset = file_offset;
507
508 lock_kernel();
509 result = NFS_PROTO(inode)->commit(wdata);
510 unlock_kernel();
511
512 if (result < 0 || memcmp(&first_verf.verifier,
513 &wdata->verf.verifier,
514 sizeof(first_verf.verifier)) != 0)
515 goto sync_retry;
516 }
517 result = tot_bytes;
518
519out:
520 nfs_end_data_update_defer(inode);
521 nfs_writedata_free(wdata);
522 return result;
523
524sync_retry:
525 wdata->args.stable = NFS_FILE_SYNC;
526 goto retry;
527}
528
529/**
530 * nfs_direct_write - For each iov segment, map the user's buffer
531 * then generate write and commit RPCs.
532 * @inode: target inode
533 * @ctx: target file open context
534 * @iov: array of vectors that define I/O buffer
535 * file_offset: offset in file to begin the operation
536 * nr_segs: size of iovec array
537 *
538 * Upon return, generic_file_direct_IO invalidates any cached pages
539 * that non-direct readers might access, so they will pick up these
540 * writes immediately.
541 */
542static ssize_t nfs_direct_write(struct inode *inode,
543 struct nfs_open_context *ctx, const struct iovec *iov,
544 loff_t file_offset, unsigned long nr_segs)
545{
546 ssize_t tot_bytes = 0;
547 unsigned long seg = 0;
548
549 while ((seg < nr_segs) && (tot_bytes >= 0)) {
550 ssize_t result;
551 int page_count;
552 struct page **pages;
553 const struct iovec *vec = &iov[seg++];
554 unsigned long user_addr = (unsigned long) vec->iov_base;
555 size_t size = vec->iov_len;
556
557 page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
558 if (page_count < 0) {
559 nfs_free_user_pages(pages, 0, 0);
560 if (tot_bytes > 0)
561 break;
562 return page_count;
563 }
564
565 result = nfs_direct_write_seg(inode, ctx, user_addr, size,
566 file_offset, pages, page_count);
567 nfs_free_user_pages(pages, page_count, 0);
568
569 if (result <= 0) {
570 if (tot_bytes > 0)
571 break;
572 return result;
573 }
574 tot_bytes += result;
575 file_offset += result;
576 if (result < size)
577 break;
578 }
579 return tot_bytes;
580}
581
582/**
583 * nfs_direct_IO - NFS address space operation for direct I/O
584 * rw: direction (read or write)
585 * @iocb: target I/O control block
586 * @iov: array of vectors that define I/O buffer
587 * file_offset: offset in file to begin the operation
588 * nr_segs: size of iovec array
589 *
590 */
591ssize_t
592nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
593 loff_t file_offset, unsigned long nr_segs)
594{
595 ssize_t result = -EINVAL;
596 struct file *file = iocb->ki_filp;
597 struct nfs_open_context *ctx;
598 struct dentry *dentry = file->f_dentry;
599 struct inode *inode = dentry->d_inode;
600
601 /*
602 * No support for async yet
603 */
604 if (!is_sync_kiocb(iocb))
605 return result;
606
607 ctx = (struct nfs_open_context *)file->private_data;
608 switch (rw) {
609 case READ:
610 dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
611 dentry->d_name.name, file_offset, nr_segs);
612
613 result = nfs_direct_read(inode, ctx, iov,
614 file_offset, nr_segs);
615 break;
616 case WRITE:
617 dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
618 dentry->d_name.name, file_offset, nr_segs);
619
620 result = nfs_direct_write(inode, ctx, iov,
621 file_offset, nr_segs);
622 break;
623 default:
624 break;
625 }
626 return result;
627}
628
629/**
630 * nfs_file_direct_read - file direct read operation for NFS files
631 * @iocb: target I/O control block
632 * @buf: user's buffer into which to read data
633 * count: number of bytes to read
634 * pos: byte offset in file where reading starts
635 *
636 * We use this function for direct reads instead of calling
637 * generic_file_aio_read() in order to avoid gfar's check to see if
638 * the request starts before the end of the file. For that check
639 * to work, we must generate a GETATTR before each direct read, and
640 * even then there is a window between the GETATTR and the subsequent
641 * READ where the file size could change. So our preference is simply
642 * to do all reads the application wants, and the server will take
643 * care of managing the end of file boundary.
644 *
645 * This function also eliminates unnecessarily updating the file's
646 * atime locally, as the NFS server sets the file's atime, and this
647 * client must read the updated atime from the server back into its
648 * cache.
649 */
650ssize_t
651nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
652{
653 ssize_t retval = -EINVAL;
654 loff_t *ppos = &iocb->ki_pos;
655 struct file *file = iocb->ki_filp;
656 struct nfs_open_context *ctx =
657 (struct nfs_open_context *) file->private_data;
658 struct dentry *dentry = file->f_dentry;
659 struct address_space *mapping = file->f_mapping;
660 struct inode *inode = mapping->host;
661 struct iovec iov = {
662 .iov_base = buf,
663 .iov_len = count,
664 };
665
666 dprintk("nfs: direct read(%s/%s, %lu@%lu)\n",
667 dentry->d_parent->d_name.name, dentry->d_name.name,
668 (unsigned long) count, (unsigned long) pos);
669
670 if (!is_sync_kiocb(iocb))
671 goto out;
672 if (count < 0)
673 goto out;
674 retval = -EFAULT;
675 if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
676 goto out;
677 retval = 0;
678 if (!count)
679 goto out;
680
681 if (mapping->nrpages) {
682 retval = filemap_fdatawrite(mapping);
683 if (retval == 0)
684 retval = nfs_wb_all(inode);
685 if (retval == 0)
686 retval = filemap_fdatawait(mapping);
687 if (retval)
688 goto out;
689 }
690
691 retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
692 if (retval > 0)
693 *ppos = pos + retval;
694
695out:
696 return retval;
697}
698
699/**
700 * nfs_file_direct_write - file direct write operation for NFS files
701 * @iocb: target I/O control block
702 * @buf: user's buffer from which to write data
703 * count: number of bytes to write
704 * pos: byte offset in file where writing starts
705 *
706 * We use this function for direct writes instead of calling
707 * generic_file_aio_write() in order to avoid taking the inode
708 * semaphore and updating the i_size. The NFS server will set
709 * the new i_size and this client must read the updated size
710 * back into its cache. We let the server do generic write
711 * parameter checking and report problems.
712 *
713 * We also avoid an unnecessary invocation of generic_osync_inode(),
714 * as it is fairly meaningless to sync the metadata of an NFS file.
715 *
716 * We eliminate local atime updates, see direct read above.
717 *
718 * We avoid unnecessary page cache invalidations for normal cached
719 * readers of this file.
720 *
721 * Note that O_APPEND is not supported for NFS direct writes, as there
722 * is no atomic O_APPEND write facility in the NFS protocol.
723 */
724ssize_t
725nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
726{
727 ssize_t retval = -EINVAL;
728 loff_t *ppos = &iocb->ki_pos;
729 unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
730 struct file *file = iocb->ki_filp;
731 struct nfs_open_context *ctx =
732 (struct nfs_open_context *) file->private_data;
733 struct dentry *dentry = file->f_dentry;
734 struct address_space *mapping = file->f_mapping;
735 struct inode *inode = mapping->host;
736 struct iovec iov = {
737 .iov_base = (char __user *)buf,
738 .iov_len = count,
739 };
740
741 dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n",
742 dentry->d_parent->d_name.name, dentry->d_name.name,
743 inode->i_ino, (unsigned long) count, (unsigned long) pos);
744
745 if (!is_sync_kiocb(iocb))
746 goto out;
747 if (count < 0)
748 goto out;
749 if (pos < 0)
750 goto out;
751 retval = -EFAULT;
752 if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
753 goto out;
754 if (file->f_error) {
755 retval = file->f_error;
756 file->f_error = 0;
757 goto out;
758 }
759 retval = -EFBIG;
760 if (limit != RLIM_INFINITY) {
761 if (pos >= limit) {
762 send_sig(SIGXFSZ, current, 0);
763 goto out;
764 }
765 if (count > limit - (unsigned long) pos)
766 count = limit - (unsigned long) pos;
767 }
768 retval = 0;
769 if (!count)
770 goto out;
771
772 if (mapping->nrpages) {
773 retval = filemap_fdatawrite(mapping);
774 if (retval == 0)
775 retval = nfs_wb_all(inode);
776 if (retval == 0)
777 retval = filemap_fdatawait(mapping);
778 if (retval)
779 goto out;
780 }
781
782 retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
783 if (mapping->nrpages)
784 invalidate_inode_pages2(mapping);
785 if (retval > 0)
786 *ppos = pos + retval;
787
788out:
789 return retval;
790}
791
792int nfs_init_directcache(void)
793{
794 nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
795 sizeof(struct nfs_direct_req),
796 0, SLAB_RECLAIM_ACCOUNT,
797 NULL, NULL);
798 if (nfs_direct_cachep == NULL)
799 return -ENOMEM;
800
801 return 0;
802}
803
804void nfs_destroy_directcache(void)
805{
806 if (kmem_cache_destroy(nfs_direct_cachep))
807 printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
808}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
new file mode 100644
index 000000000000..f06eee6dcff5
--- /dev/null
+++ b/fs/nfs/file.c
@@ -0,0 +1,484 @@
1/*
2 * linux/fs/nfs/file.c
3 *
4 * Copyright (C) 1992 Rick Sladkey
5 *
6 * Changes Copyright (C) 1994 by Florian La Roche
7 * - Do not copy data too often around in the kernel.
8 * - In nfs_file_read the return value of kmalloc wasn't checked.
9 * - Put in a better version of read look-ahead buffering. Original idea
10 * and implementation by Wai S Kok elekokws@ee.nus.sg.
11 *
12 * Expire cache on write to a file by Wai S Kok (Oct 1994).
13 *
14 * Total rewrite of read side for new NFS buffer cache.. Linus.
15 *
16 * nfs regular file handling functions
17 */
18
19#include <linux/time.h>
20#include <linux/kernel.h>
21#include <linux/errno.h>
22#include <linux/fcntl.h>
23#include <linux/stat.h>
24#include <linux/nfs_fs.h>
25#include <linux/nfs_mount.h>
26#include <linux/mm.h>
27#include <linux/slab.h>
28#include <linux/pagemap.h>
29#include <linux/smp_lock.h>
30
31#include <asm/uaccess.h>
32#include <asm/system.h>
33
34#include "delegation.h"
35
36#define NFSDBG_FACILITY NFSDBG_FILE
37
38static int nfs_file_open(struct inode *, struct file *);
39static int nfs_file_release(struct inode *, struct file *);
40static int nfs_file_mmap(struct file *, struct vm_area_struct *);
41static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
42static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
43static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
44static int nfs_file_flush(struct file *);
45static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
46static int nfs_check_flags(int flags);
47static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
48static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
49
50struct file_operations nfs_file_operations = {
51 .llseek = remote_llseek,
52 .read = do_sync_read,
53 .write = do_sync_write,
54 .aio_read = nfs_file_read,
55 .aio_write = nfs_file_write,
56 .mmap = nfs_file_mmap,
57 .open = nfs_file_open,
58 .flush = nfs_file_flush,
59 .release = nfs_file_release,
60 .fsync = nfs_fsync,
61 .lock = nfs_lock,
62 .flock = nfs_flock,
63 .sendfile = nfs_file_sendfile,
64 .check_flags = nfs_check_flags,
65};
66
67struct inode_operations nfs_file_inode_operations = {
68 .permission = nfs_permission,
69 .getattr = nfs_getattr,
70 .setattr = nfs_setattr,
71};
72
73/* Hack for future NFS swap support */
74#ifndef IS_SWAPFILE
75# define IS_SWAPFILE(inode) (0)
76#endif
77
78static int nfs_check_flags(int flags)
79{
80 if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
81 return -EINVAL;
82
83 return 0;
84}
85
86/*
87 * Open file
88 */
89static int
90nfs_file_open(struct inode *inode, struct file *filp)
91{
92 struct nfs_server *server = NFS_SERVER(inode);
93 int (*open)(struct inode *, struct file *);
94 int res;
95
96 res = nfs_check_flags(filp->f_flags);
97 if (res)
98 return res;
99
100 lock_kernel();
101 /* Do NFSv4 open() call */
102 if ((open = server->rpc_ops->file_open) != NULL)
103 res = open(inode, filp);
104 unlock_kernel();
105 return res;
106}
107
108static int
109nfs_file_release(struct inode *inode, struct file *filp)
110{
111 /* Ensure that dirty pages are flushed out with the right creds */
112 if (filp->f_mode & FMODE_WRITE)
113 filemap_fdatawrite(filp->f_mapping);
114 return NFS_PROTO(inode)->file_release(inode, filp);
115}
116
117/*
118 * Flush all dirty pages, and check for write errors.
119 *
120 */
121static int
122nfs_file_flush(struct file *file)
123{
124 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
125 struct inode *inode = file->f_dentry->d_inode;
126 int status;
127
128 dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
129
130 if ((file->f_mode & FMODE_WRITE) == 0)
131 return 0;
132 lock_kernel();
133 /* Ensure that data+attribute caches are up to date after close() */
134 status = nfs_wb_all(inode);
135 if (!status) {
136 status = ctx->error;
137 ctx->error = 0;
138 if (!status && !nfs_have_delegation(inode, FMODE_READ))
139 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
140 }
141 unlock_kernel();
142 return status;
143}
144
145static ssize_t
146nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
147{
148 struct dentry * dentry = iocb->ki_filp->f_dentry;
149 struct inode * inode = dentry->d_inode;
150 ssize_t result;
151
152#ifdef CONFIG_NFS_DIRECTIO
153 if (iocb->ki_filp->f_flags & O_DIRECT)
154 return nfs_file_direct_read(iocb, buf, count, pos);
155#endif
156
157 dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
158 dentry->d_parent->d_name.name, dentry->d_name.name,
159 (unsigned long) count, (unsigned long) pos);
160
161 result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
162 if (!result)
163 result = generic_file_aio_read(iocb, buf, count, pos);
164 return result;
165}
166
167static ssize_t
168nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
169 read_actor_t actor, void *target)
170{
171 struct dentry *dentry = filp->f_dentry;
172 struct inode *inode = dentry->d_inode;
173 ssize_t res;
174
175 dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
176 dentry->d_parent->d_name.name, dentry->d_name.name,
177 (unsigned long) count, (unsigned long long) *ppos);
178
179 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
180 if (!res)
181 res = generic_file_sendfile(filp, ppos, count, actor, target);
182 return res;
183}
184
185static int
186nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
187{
188 struct dentry *dentry = file->f_dentry;
189 struct inode *inode = dentry->d_inode;
190 int status;
191
192 dfprintk(VFS, "nfs: mmap(%s/%s)\n",
193 dentry->d_parent->d_name.name, dentry->d_name.name);
194
195 status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
196 if (!status)
197 status = generic_file_mmap(file, vma);
198 return status;
199}
200
201/*
202 * Flush any dirty pages for this process, and check for write errors.
203 * The return status from this call provides a reliable indication of
204 * whether any write errors occurred for this process.
205 */
206static int
207nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
208{
209 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
210 struct inode *inode = dentry->d_inode;
211 int status;
212
213 dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
214
215 lock_kernel();
216 status = nfs_wb_all(inode);
217 if (!status) {
218 status = ctx->error;
219 ctx->error = 0;
220 }
221 unlock_kernel();
222 return status;
223}
224
225/*
226 * This does the "real" work of the write. The generic routine has
227 * allocated the page, locked it, done all the page alignment stuff
228 * calculations etc. Now we should just copy the data from user
229 * space and write it back to the real medium..
230 *
231 * If the writer ends up delaying the write, the writer needs to
232 * increment the page use counts until he is done with the page.
233 */
234static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
235{
236 return nfs_flush_incompatible(file, page);
237}
238
239static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
240{
241 long status;
242
243 lock_kernel();
244 status = nfs_updatepage(file, page, offset, to-offset);
245 unlock_kernel();
246 return status;
247}
248
249struct address_space_operations nfs_file_aops = {
250 .readpage = nfs_readpage,
251 .readpages = nfs_readpages,
252 .set_page_dirty = __set_page_dirty_nobuffers,
253 .writepage = nfs_writepage,
254 .writepages = nfs_writepages,
255 .prepare_write = nfs_prepare_write,
256 .commit_write = nfs_commit_write,
257#ifdef CONFIG_NFS_DIRECTIO
258 .direct_IO = nfs_direct_IO,
259#endif
260};
261
262/*
263 * Write to a file (through the page cache).
264 */
265static ssize_t
266nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
267{
268 struct dentry * dentry = iocb->ki_filp->f_dentry;
269 struct inode * inode = dentry->d_inode;
270 ssize_t result;
271
272#ifdef CONFIG_NFS_DIRECTIO
273 if (iocb->ki_filp->f_flags & O_DIRECT)
274 return nfs_file_direct_write(iocb, buf, count, pos);
275#endif
276
277 dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
278 dentry->d_parent->d_name.name, dentry->d_name.name,
279 inode->i_ino, (unsigned long) count, (unsigned long) pos);
280
281 result = -EBUSY;
282 if (IS_SWAPFILE(inode))
283 goto out_swapfile;
284 result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
285 if (result)
286 goto out;
287
288 result = count;
289 if (!count)
290 goto out;
291
292 result = generic_file_aio_write(iocb, buf, count, pos);
293out:
294 return result;
295
296out_swapfile:
297 printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
298 goto out;
299}
300
301static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
302{
303 struct inode *inode = filp->f_mapping->host;
304 int status = 0;
305
306 lock_kernel();
307 /* Use local locking if mounted with "-onolock" */
308 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
309 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
310 else {
311 struct file_lock *cfl = posix_test_lock(filp, fl);
312
313 fl->fl_type = F_UNLCK;
314 if (cfl != NULL)
315 memcpy(fl, cfl, sizeof(*fl));
316 }
317 unlock_kernel();
318 return status;
319}
320
321static int do_vfs_lock(struct file *file, struct file_lock *fl)
322{
323 int res = 0;
324 switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
325 case FL_POSIX:
326 res = posix_lock_file_wait(file, fl);
327 break;
328 case FL_FLOCK:
329 res = flock_lock_file_wait(file, fl);
330 break;
331 default:
332 BUG();
333 }
334 if (res < 0)
335 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
336 __FUNCTION__);
337 return res;
338}
339
340static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
341{
342 struct inode *inode = filp->f_mapping->host;
343 sigset_t oldset;
344 int status;
345
346 rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
347 /*
348 * Flush all pending writes before doing anything
349 * with locks..
350 */
351 filemap_fdatawrite(filp->f_mapping);
352 down(&inode->i_sem);
353 nfs_wb_all(inode);
354 up(&inode->i_sem);
355 filemap_fdatawait(filp->f_mapping);
356
357 /* NOTE: special case
358 * If we're signalled while cleaning up locks on process exit, we
359 * still need to complete the unlock.
360 */
361 lock_kernel();
362 /* Use local locking if mounted with "-onolock" */
363 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
364 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
365 else
366 status = do_vfs_lock(filp, fl);
367 unlock_kernel();
368 rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
369 return status;
370}
371
372static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
373{
374 struct inode *inode = filp->f_mapping->host;
375 sigset_t oldset;
376 int status;
377
378 rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
379 /*
380 * Flush all pending writes before doing anything
381 * with locks..
382 */
383 status = filemap_fdatawrite(filp->f_mapping);
384 if (status == 0) {
385 down(&inode->i_sem);
386 status = nfs_wb_all(inode);
387 up(&inode->i_sem);
388 if (status == 0)
389 status = filemap_fdatawait(filp->f_mapping);
390 }
391 if (status < 0)
392 goto out;
393
394 lock_kernel();
395 /* Use local locking if mounted with "-onolock" */
396 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
397 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
398 /* If we were signalled we still need to ensure that
399 * we clean up any state on the server. We therefore
400 * record the lock call as having succeeded in order to
401 * ensure that locks_remove_posix() cleans it out when
402 * the process exits.
403 */
404 if (status == -EINTR || status == -ERESTARTSYS)
405 do_vfs_lock(filp, fl);
406 } else
407 status = do_vfs_lock(filp, fl);
408 unlock_kernel();
409 if (status < 0)
410 goto out;
411 /*
412 * Make sure we clear the cache whenever we try to get the lock.
413 * This makes locking act as a cache coherency point.
414 */
415 filemap_fdatawrite(filp->f_mapping);
416 down(&inode->i_sem);
417 nfs_wb_all(inode); /* we may have slept */
418 up(&inode->i_sem);
419 filemap_fdatawait(filp->f_mapping);
420 nfs_zap_caches(inode);
421out:
422 rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
423 return status;
424}
425
426/*
427 * Lock a (portion of) a file
428 */
429static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
430{
431 struct inode * inode = filp->f_mapping->host;
432
433 dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
434 inode->i_sb->s_id, inode->i_ino,
435 fl->fl_type, fl->fl_flags,
436 (long long)fl->fl_start, (long long)fl->fl_end);
437
438 if (!inode)
439 return -EINVAL;
440
441 /* No mandatory locks over NFS */
442 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
443 return -ENOLCK;
444
445 if (IS_GETLK(cmd))
446 return do_getlk(filp, cmd, fl);
447 if (fl->fl_type == F_UNLCK)
448 return do_unlk(filp, cmd, fl);
449 return do_setlk(filp, cmd, fl);
450}
451
452/*
453 * Lock a (portion of) a file
454 */
455static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
456{
457 struct inode * inode = filp->f_mapping->host;
458
459 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
460 inode->i_sb->s_id, inode->i_ino,
461 fl->fl_type, fl->fl_flags);
462
463 if (!inode)
464 return -EINVAL;
465
466 /*
467 * No BSD flocks over NFS allowed.
468 * Note: we could try to fake a POSIX lock request here by
469 * using ((u32) filp | 0x80000000) or some such as the pid.
470 * Not sure whether that would be unique, though, or whether
471 * that would break in other places.
472 */
473 if (!(fl->fl_flags & FL_FLOCK))
474 return -ENOLCK;
475
476 /* We're simulating flock() locks using posix locks on the server */
477 fl->fl_owner = (fl_owner_t)filp;
478 fl->fl_start = 0;
479 fl->fl_end = OFFSET_MAX;
480
481 if (fl->fl_type == F_UNLCK)
482 return do_unlk(filp, cmd, fl);
483 return do_setlk(filp, cmd, fl);
484}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
new file mode 100644
index 000000000000..b74c4e3a64e2
--- /dev/null
+++ b/fs/nfs/idmap.c
@@ -0,0 +1,498 @@
1/*
2 * fs/nfs/idmap.c
3 *
4 * UID and GID to name mapping for clients.
5 *
6 * Copyright (c) 2002 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Marius Aamodt Eriksen <marius@umich.edu>
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
25 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */
36
37#include <linux/module.h>
38#include <linux/init.h>
39#include <linux/types.h>
40#include <linux/slab.h>
41#include <linux/socket.h>
42#include <linux/in.h>
43#include <linux/sched.h>
44
45#include <linux/sunrpc/clnt.h>
46#include <linux/workqueue.h>
47#include <linux/sunrpc/rpc_pipe_fs.h>
48
49#include <linux/nfs_fs_sb.h>
50#include <linux/nfs_fs.h>
51
52#include <linux/nfs_idmap.h>
53
54#define IDMAP_HASH_SZ 128
55
56struct idmap_hashent {
57 __u32 ih_id;
58 int ih_namelen;
59 char ih_name[IDMAP_NAMESZ];
60};
61
62struct idmap_hashtable {
63 __u8 h_type;
64 struct idmap_hashent h_entries[IDMAP_HASH_SZ];
65};
66
67struct idmap {
68 char idmap_path[48];
69 struct dentry *idmap_dentry;
70 wait_queue_head_t idmap_wq;
71 struct idmap_msg idmap_im;
72 struct semaphore idmap_lock; /* Serializes upcalls */
73 struct semaphore idmap_im_lock; /* Protects the hashtable */
74 struct idmap_hashtable idmap_user_hash;
75 struct idmap_hashtable idmap_group_hash;
76};
77
78static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
79 char __user *, size_t);
80static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
81 size_t);
82void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
83
84static unsigned int fnvhash32(const void *, size_t);
85
86static struct rpc_pipe_ops idmap_upcall_ops = {
87 .upcall = idmap_pipe_upcall,
88 .downcall = idmap_pipe_downcall,
89 .destroy_msg = idmap_pipe_destroy_msg,
90};
91
92void
93nfs_idmap_new(struct nfs4_client *clp)
94{
95 struct idmap *idmap;
96
97 if (clp->cl_idmap != NULL)
98 return;
99 if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
100 return;
101
102 memset(idmap, 0, sizeof(*idmap));
103
104 snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
105 "%s/idmap", clp->cl_rpcclient->cl_pathname);
106
107 idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
108 idmap, &idmap_upcall_ops, 0);
109 if (IS_ERR(idmap->idmap_dentry)) {
110 kfree(idmap);
111 return;
112 }
113
114 init_MUTEX(&idmap->idmap_lock);
115 init_MUTEX(&idmap->idmap_im_lock);
116 init_waitqueue_head(&idmap->idmap_wq);
117 idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
118 idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
119
120 clp->cl_idmap = idmap;
121}
122
123void
124nfs_idmap_delete(struct nfs4_client *clp)
125{
126 struct idmap *idmap = clp->cl_idmap;
127
128 if (!idmap)
129 return;
130 rpc_unlink(idmap->idmap_path);
131 clp->cl_idmap = NULL;
132 kfree(idmap);
133}
134
135/*
136 * Helper routines for manipulating the hashtable
137 */
138static inline struct idmap_hashent *
139idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
140{
141 return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
142}
143
144static struct idmap_hashent *
145idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
146{
147 struct idmap_hashent *he = idmap_name_hash(h, name, len);
148
149 if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
150 return NULL;
151 return he;
152}
153
154static inline struct idmap_hashent *
155idmap_id_hash(struct idmap_hashtable* h, __u32 id)
156{
157 return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
158}
159
160static struct idmap_hashent *
161idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
162{
163 struct idmap_hashent *he = idmap_id_hash(h, id);
164 if (he->ih_id != id || he->ih_namelen == 0)
165 return NULL;
166 return he;
167}
168
169/*
170 * Routines for allocating new entries in the hashtable.
171 * For now, we just have 1 entry per bucket, so it's all
172 * pretty trivial.
173 */
174static inline struct idmap_hashent *
175idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len)
176{
177 return idmap_name_hash(h, name, len);
178}
179
180static inline struct idmap_hashent *
181idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
182{
183 return idmap_id_hash(h, id);
184}
185
186static void
187idmap_update_entry(struct idmap_hashent *he, const char *name,
188 size_t namelen, __u32 id)
189{
190 he->ih_id = id;
191 memcpy(he->ih_name, name, namelen);
192 he->ih_name[namelen] = '\0';
193 he->ih_namelen = namelen;
194}
195
196/*
197 * Name -> ID
198 */
199static int
200nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
201 const char *name, size_t namelen, __u32 *id)
202{
203 struct rpc_pipe_msg msg;
204 struct idmap_msg *im;
205 struct idmap_hashent *he;
206 DECLARE_WAITQUEUE(wq, current);
207 int ret = -EIO;
208
209 im = &idmap->idmap_im;
210
211 /*
212 * String sanity checks
213 * Note that the userland daemon expects NUL terminated strings
214 */
215 for (;;) {
216 if (namelen == 0)
217 return -EINVAL;
218 if (name[namelen-1] != '\0')
219 break;
220 namelen--;
221 }
222 if (namelen >= IDMAP_NAMESZ)
223 return -EINVAL;
224
225 down(&idmap->idmap_lock);
226 down(&idmap->idmap_im_lock);
227
228 he = idmap_lookup_name(h, name, namelen);
229 if (he != NULL) {
230 *id = he->ih_id;
231 ret = 0;
232 goto out;
233 }
234
235 memset(im, 0, sizeof(*im));
236 memcpy(im->im_name, name, namelen);
237
238 im->im_type = h->h_type;
239 im->im_conv = IDMAP_CONV_NAMETOID;
240
241 memset(&msg, 0, sizeof(msg));
242 msg.data = im;
243 msg.len = sizeof(*im);
244
245 add_wait_queue(&idmap->idmap_wq, &wq);
246 if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
247 remove_wait_queue(&idmap->idmap_wq, &wq);
248 goto out;
249 }
250
251 set_current_state(TASK_UNINTERRUPTIBLE);
252 up(&idmap->idmap_im_lock);
253 schedule();
254 current->state = TASK_RUNNING;
255 remove_wait_queue(&idmap->idmap_wq, &wq);
256 down(&idmap->idmap_im_lock);
257
258 if (im->im_status & IDMAP_STATUS_SUCCESS) {
259 *id = im->im_id;
260 ret = 0;
261 }
262
263 out:
264 memset(im, 0, sizeof(*im));
265 up(&idmap->idmap_im_lock);
266 up(&idmap->idmap_lock);
267 return (ret);
268}
269
270/*
271 * ID -> Name
272 */
273static int
274nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
275 __u32 id, char *name)
276{
277 struct rpc_pipe_msg msg;
278 struct idmap_msg *im;
279 struct idmap_hashent *he;
280 DECLARE_WAITQUEUE(wq, current);
281 int ret = -EIO;
282 unsigned int len;
283
284 im = &idmap->idmap_im;
285
286 down(&idmap->idmap_lock);
287 down(&idmap->idmap_im_lock);
288
289 he = idmap_lookup_id(h, id);
290 if (he != 0) {
291 memcpy(name, he->ih_name, he->ih_namelen);
292 ret = he->ih_namelen;
293 goto out;
294 }
295
296 memset(im, 0, sizeof(*im));
297 im->im_type = h->h_type;
298 im->im_conv = IDMAP_CONV_IDTONAME;
299 im->im_id = id;
300
301 memset(&msg, 0, sizeof(msg));
302 msg.data = im;
303 msg.len = sizeof(*im);
304
305 add_wait_queue(&idmap->idmap_wq, &wq);
306
307 if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
308 remove_wait_queue(&idmap->idmap_wq, &wq);
309 goto out;
310 }
311
312 set_current_state(TASK_UNINTERRUPTIBLE);
313 up(&idmap->idmap_im_lock);
314 schedule();
315 current->state = TASK_RUNNING;
316 remove_wait_queue(&idmap->idmap_wq, &wq);
317 down(&idmap->idmap_im_lock);
318
319 if (im->im_status & IDMAP_STATUS_SUCCESS) {
320 if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
321 goto out;
322 memcpy(name, im->im_name, len);
323 ret = len;
324 }
325
326 out:
327 memset(im, 0, sizeof(*im));
328 up(&idmap->idmap_im_lock);
329 up(&idmap->idmap_lock);
330 return ret;
331}
332
333/* RPC pipefs upcall/downcall routines */
334static ssize_t
335idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
336 char __user *dst, size_t buflen)
337{
338 char *data = (char *)msg->data + msg->copied;
339 ssize_t mlen = msg->len - msg->copied;
340 ssize_t left;
341
342 if (mlen > buflen)
343 mlen = buflen;
344
345 left = copy_to_user(dst, data, mlen);
346 if (left < 0) {
347 msg->errno = left;
348 return left;
349 }
350 mlen -= left;
351 msg->copied += mlen;
352 msg->errno = 0;
353 return mlen;
354}
355
356static ssize_t
357idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
358{
359 struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
360 struct idmap *idmap = (struct idmap *)rpci->private;
361 struct idmap_msg im_in, *im = &idmap->idmap_im;
362 struct idmap_hashtable *h;
363 struct idmap_hashent *he = NULL;
364 int namelen_in;
365 int ret;
366
367 if (mlen != sizeof(im_in))
368 return (-ENOSPC);
369
370 if (copy_from_user(&im_in, src, mlen) != 0)
371 return (-EFAULT);
372
373 down(&idmap->idmap_im_lock);
374
375 ret = mlen;
376 im->im_status = im_in.im_status;
377 /* If we got an error, terminate now, and wake up pending upcalls */
378 if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) {
379 wake_up(&idmap->idmap_wq);
380 goto out;
381 }
382
383 /* Sanity checking of strings */
384 ret = -EINVAL;
385 namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ);
386 if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ)
387 goto out;
388
389 switch (im_in.im_type) {
390 case IDMAP_TYPE_USER:
391 h = &idmap->idmap_user_hash;
392 break;
393 case IDMAP_TYPE_GROUP:
394 h = &idmap->idmap_group_hash;
395 break;
396 default:
397 goto out;
398 }
399
400 switch (im_in.im_conv) {
401 case IDMAP_CONV_IDTONAME:
402 /* Did we match the current upcall? */
403 if (im->im_conv == IDMAP_CONV_IDTONAME
404 && im->im_type == im_in.im_type
405 && im->im_id == im_in.im_id) {
406 /* Yes: copy string, including the terminating '\0' */
407 memcpy(im->im_name, im_in.im_name, namelen_in);
408 im->im_name[namelen_in] = '\0';
409 wake_up(&idmap->idmap_wq);
410 }
411 he = idmap_alloc_id(h, im_in.im_id);
412 break;
413 case IDMAP_CONV_NAMETOID:
414 /* Did we match the current upcall? */
415 if (im->im_conv == IDMAP_CONV_NAMETOID
416 && im->im_type == im_in.im_type
417 && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in
418 && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) {
419 im->im_id = im_in.im_id;
420 wake_up(&idmap->idmap_wq);
421 }
422 he = idmap_alloc_name(h, im_in.im_name, namelen_in);
423 break;
424 default:
425 goto out;
426 }
427
428 /* If the entry is valid, also copy it to the cache */
429 if (he != NULL)
430 idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
431 ret = mlen;
432out:
433 up(&idmap->idmap_im_lock);
434 return ret;
435}
436
437void
438idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
439{
440 struct idmap_msg *im = msg->data;
441 struct idmap *idmap = container_of(im, struct idmap, idmap_im);
442
443 if (msg->errno >= 0)
444 return;
445 down(&idmap->idmap_im_lock);
446 im->im_status = IDMAP_STATUS_LOOKUPFAIL;
447 wake_up(&idmap->idmap_wq);
448 up(&idmap->idmap_im_lock);
449}
450
451/*
452 * Fowler/Noll/Vo hash
453 * http://www.isthe.com/chongo/tech/comp/fnv/
454 */
455
456#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */
457#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */
458
459static unsigned int fnvhash32(const void *buf, size_t buflen)
460{
461 const unsigned char *p, *end = (const unsigned char *)buf + buflen;
462 unsigned int hash = FNV_1_32;
463
464 for (p = buf; p < end; p++) {
465 hash *= FNV_P_32;
466 hash ^= (unsigned int)*p;
467 }
468
469 return (hash);
470}
471
472int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
473{
474 struct idmap *idmap = clp->cl_idmap;
475
476 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
477}
478
479int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
480{
481 struct idmap *idmap = clp->cl_idmap;
482
483 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
484}
485
486int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
487{
488 struct idmap *idmap = clp->cl_idmap;
489
490 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
491}
492int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
493{
494 struct idmap *idmap = clp->cl_idmap;
495
496 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
497}
498
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
new file mode 100644
index 000000000000..6345f26e87ee
--- /dev/null
+++ b/fs/nfs/inode.c
@@ -0,0 +1,2003 @@
1/*
2 * linux/fs/nfs/inode.c
3 *
4 * Copyright (C) 1992 Rick Sladkey
5 *
6 * nfs inode and superblock handling functions
7 *
8 * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some
9 * experimental NFS changes. Modularisation taken straight from SYS5 fs.
10 *
11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts.
12 * J.S.Peatfield@damtp.cam.ac.uk
13 *
14 */
15
16#include <linux/config.h>
17#include <linux/module.h>
18#include <linux/init.h>
19
20#include <linux/time.h>
21#include <linux/kernel.h>
22#include <linux/mm.h>
23#include <linux/string.h>
24#include <linux/stat.h>
25#include <linux/errno.h>
26#include <linux/unistd.h>
27#include <linux/sunrpc/clnt.h>
28#include <linux/sunrpc/stats.h>
29#include <linux/nfs_fs.h>
30#include <linux/nfs_mount.h>
31#include <linux/nfs4_mount.h>
32#include <linux/lockd/bind.h>
33#include <linux/smp_lock.h>
34#include <linux/seq_file.h>
35#include <linux/mount.h>
36#include <linux/nfs_idmap.h>
37#include <linux/vfs.h>
38
39#include <asm/system.h>
40#include <asm/uaccess.h>
41
42#include "delegation.h"
43
44#define NFSDBG_FACILITY NFSDBG_VFS
45#define NFS_PARANOIA 1
46
47/* Maximum number of readahead requests
48 * FIXME: this should really be a sysctl so that users may tune it to suit
49 * their needs. People that do NFS over a slow network, might for
50 * instance want to reduce it to something closer to 1 for improved
51 * interactive response.
52 */
53#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
54
55static void nfs_invalidate_inode(struct inode *);
56static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long);
57
58static struct inode *nfs_alloc_inode(struct super_block *sb);
59static void nfs_destroy_inode(struct inode *);
60static int nfs_write_inode(struct inode *,int);
61static void nfs_delete_inode(struct inode *);
62static void nfs_clear_inode(struct inode *);
63static void nfs_umount_begin(struct super_block *);
64static int nfs_statfs(struct super_block *, struct kstatfs *);
65static int nfs_show_options(struct seq_file *, struct vfsmount *);
66
67static struct rpc_program nfs_program;
68
69static struct super_operations nfs_sops = {
70 .alloc_inode = nfs_alloc_inode,
71 .destroy_inode = nfs_destroy_inode,
72 .write_inode = nfs_write_inode,
73 .delete_inode = nfs_delete_inode,
74 .statfs = nfs_statfs,
75 .clear_inode = nfs_clear_inode,
76 .umount_begin = nfs_umount_begin,
77 .show_options = nfs_show_options,
78};
79
80/*
81 * RPC cruft for NFS
82 */
83static struct rpc_stat nfs_rpcstat = {
84 .program = &nfs_program
85};
86static struct rpc_version * nfs_version[] = {
87 NULL,
88 NULL,
89 &nfs_version2,
90#if defined(CONFIG_NFS_V3)
91 &nfs_version3,
92#elif defined(CONFIG_NFS_V4)
93 NULL,
94#endif
95#if defined(CONFIG_NFS_V4)
96 &nfs_version4,
97#endif
98};
99
100static struct rpc_program nfs_program = {
101 .name = "nfs",
102 .number = NFS_PROGRAM,
103 .nrvers = sizeof(nfs_version) / sizeof(nfs_version[0]),
104 .version = nfs_version,
105 .stats = &nfs_rpcstat,
106 .pipe_dir_name = "/nfs",
107};
108
109static inline unsigned long
110nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
111{
112 return nfs_fileid_to_ino_t(fattr->fileid);
113}
114
115static int
116nfs_write_inode(struct inode *inode, int sync)
117{
118 int flags = sync ? FLUSH_WAIT : 0;
119 int ret;
120
121 ret = nfs_commit_inode(inode, 0, 0, flags);
122 if (ret < 0)
123 return ret;
124 return 0;
125}
126
127static void
128nfs_delete_inode(struct inode * inode)
129{
130 dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
131
132 nfs_wb_all(inode);
133 /*
134 * The following should never happen...
135 */
136 if (nfs_have_writebacks(inode)) {
137 printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
138 }
139
140 clear_inode(inode);
141}
142
143/*
144 * For the moment, the only task for the NFS clear_inode method is to
145 * release the mmap credential
146 */
147static void
148nfs_clear_inode(struct inode *inode)
149{
150 struct nfs_inode *nfsi = NFS_I(inode);
151 struct rpc_cred *cred;
152
153 nfs_wb_all(inode);
154 BUG_ON (!list_empty(&nfsi->open_files));
155 cred = nfsi->cache_access.cred;
156 if (cred)
157 put_rpccred(cred);
158 BUG_ON(atomic_read(&nfsi->data_updates) != 0);
159}
160
161void
162nfs_umount_begin(struct super_block *sb)
163{
164 struct nfs_server *server = NFS_SB(sb);
165 struct rpc_clnt *rpc;
166
167 /* -EIO all pending I/O */
168 if ((rpc = server->client) != NULL)
169 rpc_killall_tasks(rpc);
170}
171
172
173static inline unsigned long
174nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
175{
176 /* make sure blocksize is a power of two */
177 if ((bsize & (bsize - 1)) || nrbitsp) {
178 unsigned char nrbits;
179
180 for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
181 ;
182 bsize = 1 << nrbits;
183 if (nrbitsp)
184 *nrbitsp = nrbits;
185 }
186
187 return bsize;
188}
189
190/*
191 * Calculate the number of 512byte blocks used.
192 */
193static inline unsigned long
194nfs_calc_block_size(u64 tsize)
195{
196 loff_t used = (tsize + 511) >> 9;
197 return (used > ULONG_MAX) ? ULONG_MAX : used;
198}
199
200/*
201 * Compute and set NFS server blocksize
202 */
203static inline unsigned long
204nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
205{
206 if (bsize < 1024)
207 bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
208 else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
209 bsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
210
211 return nfs_block_bits(bsize, nrbitsp);
212}
213
214/*
215 * Obtain the root inode of the file system.
216 */
217static struct inode *
218nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
219{
220 struct nfs_server *server = NFS_SB(sb);
221 struct inode *rooti;
222 int error;
223
224 error = server->rpc_ops->getroot(server, rootfh, fsinfo);
225 if (error < 0) {
226 dprintk("nfs_get_root: getattr error = %d\n", -error);
227 return ERR_PTR(error);
228 }
229
230 rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
231 if (!rooti)
232 return ERR_PTR(-ENOMEM);
233 return rooti;
234}
235
236/*
237 * Do NFS version-independent mount processing, and sanity checking
238 */
239static int
240nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
241{
242 struct nfs_server *server;
243 struct inode *root_inode;
244 struct nfs_fattr fattr;
245 struct nfs_fsinfo fsinfo = {
246 .fattr = &fattr,
247 };
248 struct nfs_pathconf pathinfo = {
249 .fattr = &fattr,
250 };
251 int no_root_error = 0;
252 unsigned long max_rpc_payload;
253
254 /* We probably want something more informative here */
255 snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
256
257 server = NFS_SB(sb);
258
259 sb->s_magic = NFS_SUPER_MAGIC;
260
261 root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
262 /* Did getting the root inode fail? */
263 if (IS_ERR(root_inode)) {
264 no_root_error = PTR_ERR(root_inode);
265 goto out_no_root;
266 }
267 sb->s_root = d_alloc_root(root_inode);
268 if (!sb->s_root) {
269 no_root_error = -ENOMEM;
270 goto out_no_root;
271 }
272 sb->s_root->d_op = server->rpc_ops->dentry_ops;
273
274 /* Get some general file system info */
275 if (server->namelen == 0 &&
276 server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
277 server->namelen = pathinfo.max_namelen;
278 /* Work out a lot of parameters */
279 if (server->rsize == 0)
280 server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
281 if (server->wsize == 0)
282 server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
283
284 if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
285 server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
286 if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
287 server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
288
289 max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
290 if (server->rsize > max_rpc_payload)
291 server->rsize = max_rpc_payload;
292 if (server->wsize > max_rpc_payload)
293 server->wsize = max_rpc_payload;
294
295 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
296 if (server->rpages > NFS_READ_MAXIOV) {
297 server->rpages = NFS_READ_MAXIOV;
298 server->rsize = server->rpages << PAGE_CACHE_SHIFT;
299 }
300
301 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
302 if (server->wpages > NFS_WRITE_MAXIOV) {
303 server->wpages = NFS_WRITE_MAXIOV;
304 server->wsize = server->wpages << PAGE_CACHE_SHIFT;
305 }
306
307 if (sb->s_blocksize == 0)
308 sb->s_blocksize = nfs_block_bits(server->wsize,
309 &sb->s_blocksize_bits);
310 server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
311
312 server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
313 if (server->dtsize > PAGE_CACHE_SIZE)
314 server->dtsize = PAGE_CACHE_SIZE;
315 if (server->dtsize > server->rsize)
316 server->dtsize = server->rsize;
317
318 if (server->flags & NFS_MOUNT_NOAC) {
319 server->acregmin = server->acregmax = 0;
320 server->acdirmin = server->acdirmax = 0;
321 sb->s_flags |= MS_SYNCHRONOUS;
322 }
323 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
324
325 sb->s_maxbytes = fsinfo.maxfilesize;
326 if (sb->s_maxbytes > MAX_LFS_FILESIZE)
327 sb->s_maxbytes = MAX_LFS_FILESIZE;
328
329 server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
330 server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
331
332 /* We're airborne Set socket buffersize */
333 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
334 return 0;
335 /* Yargs. It didn't work out. */
336out_no_root:
337 dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
338 if (!IS_ERR(root_inode))
339 iput(root_inode);
340 return no_root_error;
341}
342
343/*
344 * Create an RPC client handle.
345 */
346static struct rpc_clnt *
347nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
348{
349 struct rpc_timeout timeparms;
350 struct rpc_xprt *xprt = NULL;
351 struct rpc_clnt *clnt = NULL;
352 int tcp = (data->flags & NFS_MOUNT_TCP);
353
354 /* Initialize timeout values */
355 timeparms.to_initval = data->timeo * HZ / 10;
356 timeparms.to_retries = data->retrans;
357 timeparms.to_maxval = tcp ? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT;
358 timeparms.to_exponential = 1;
359
360 if (!timeparms.to_initval)
361 timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10;
362 if (!timeparms.to_retries)
363 timeparms.to_retries = 5;
364
365 /* create transport and client */
366 xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP,
367 &server->addr, &timeparms);
368 if (IS_ERR(xprt)) {
369 printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
370 return (struct rpc_clnt *)xprt;
371 }
372 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
373 server->rpc_ops->version, data->pseudoflavor);
374 if (IS_ERR(clnt)) {
375 printk(KERN_WARNING "NFS: cannot create RPC client.\n");
376 goto out_fail;
377 }
378
379 clnt->cl_intr = 1;
380 clnt->cl_softrtry = 1;
381 clnt->cl_chatty = 1;
382
383 return clnt;
384
385out_fail:
386 xprt_destroy(xprt);
387 return clnt;
388}
389
390/*
391 * The way this works is that the mount process passes a structure
392 * in the data argument which contains the server's IP address
393 * and the root file handle obtained from the server's mount
394 * daemon. We stash these away in the private superblock fields.
395 */
396static int
397nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
398{
399 struct nfs_server *server;
400 rpc_authflavor_t authflavor;
401
402 server = NFS_SB(sb);
403 sb->s_blocksize_bits = 0;
404 sb->s_blocksize = 0;
405 if (data->bsize)
406 sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
407 if (data->rsize)
408 server->rsize = nfs_block_size(data->rsize, NULL);
409 if (data->wsize)
410 server->wsize = nfs_block_size(data->wsize, NULL);
411 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
412
413 server->acregmin = data->acregmin*HZ;
414 server->acregmax = data->acregmax*HZ;
415 server->acdirmin = data->acdirmin*HZ;
416 server->acdirmax = data->acdirmax*HZ;
417
418 /* Start lockd here, before we might error out */
419 if (!(server->flags & NFS_MOUNT_NONLM))
420 lockd_up();
421
422 server->namelen = data->namlen;
423 server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
424 if (!server->hostname)
425 return -ENOMEM;
426 strcpy(server->hostname, data->hostname);
427
428 /* Check NFS protocol revision and initialize RPC op vector
429 * and file handle pool. */
430 if (server->flags & NFS_MOUNT_VER3) {
431#ifdef CONFIG_NFS_V3
432 server->rpc_ops = &nfs_v3_clientops;
433 server->caps |= NFS_CAP_READDIRPLUS;
434 if (data->version < 4) {
435 printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
436 return -EIO;
437 }
438#else
439 printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
440 return -EIO;
441#endif
442 } else {
443 server->rpc_ops = &nfs_v2_clientops;
444 }
445
446 /* Fill in pseudoflavor for mount version < 5 */
447 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
448 data->pseudoflavor = RPC_AUTH_UNIX;
449 authflavor = data->pseudoflavor; /* save for sb_init() */
450 /* XXX maybe we want to add a server->pseudoflavor field */
451
452 /* Create RPC client handles */
453 server->client = nfs_create_client(server, data);
454 if (IS_ERR(server->client))
455 return PTR_ERR(server->client);
456 /* RFC 2623, sec 2.3.2 */
457 if (authflavor != RPC_AUTH_UNIX) {
458 server->client_sys = rpc_clone_client(server->client);
459 if (IS_ERR(server->client_sys))
460 return PTR_ERR(server->client_sys);
461 if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys))
462 return -ENOMEM;
463 } else {
464 atomic_inc(&server->client->cl_count);
465 server->client_sys = server->client;
466 }
467
468 if (server->flags & NFS_MOUNT_VER3) {
469 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
470 server->namelen = NFS3_MAXNAMLEN;
471 sb->s_time_gran = 1;
472 } else {
473 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
474 server->namelen = NFS2_MAXNAMLEN;
475 }
476
477 sb->s_op = &nfs_sops;
478 return nfs_sb_init(sb, authflavor);
479}
480
481static int
482nfs_statfs(struct super_block *sb, struct kstatfs *buf)
483{
484 struct nfs_server *server = NFS_SB(sb);
485 unsigned char blockbits;
486 unsigned long blockres;
487 struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
488 struct nfs_fattr fattr;
489 struct nfs_fsstat res = {
490 .fattr = &fattr,
491 };
492 int error;
493
494 lock_kernel();
495
496 error = server->rpc_ops->statfs(server, rootfh, &res);
497 buf->f_type = NFS_SUPER_MAGIC;
498 if (error < 0)
499 goto out_err;
500
501 /*
502 * Current versions of glibc do not correctly handle the
503 * case where f_frsize != f_bsize. Eventually we want to
504 * report the value of wtmult in this field.
505 */
506 buf->f_frsize = sb->s_blocksize;
507
508 /*
509 * On most *nix systems, f_blocks, f_bfree, and f_bavail
510 * are reported in units of f_frsize. Linux hasn't had
511 * an f_frsize field in its statfs struct until recently,
512 * thus historically Linux's sys_statfs reports these
513 * fields in units of f_bsize.
514 */
515 buf->f_bsize = sb->s_blocksize;
516 blockbits = sb->s_blocksize_bits;
517 blockres = (1 << blockbits) - 1;
518 buf->f_blocks = (res.tbytes + blockres) >> blockbits;
519 buf->f_bfree = (res.fbytes + blockres) >> blockbits;
520 buf->f_bavail = (res.abytes + blockres) >> blockbits;
521
522 buf->f_files = res.tfiles;
523 buf->f_ffree = res.afiles;
524
525 buf->f_namelen = server->namelen;
526 out:
527 unlock_kernel();
528
529 return 0;
530
531 out_err:
532 printk(KERN_WARNING "nfs_statfs: statfs error = %d\n", -error);
533 buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1;
534 goto out;
535
536}
537
538static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
539{
540 static struct proc_nfs_info {
541 int flag;
542 char *str;
543 char *nostr;
544 } nfs_info[] = {
545 { NFS_MOUNT_SOFT, ",soft", ",hard" },
546 { NFS_MOUNT_INTR, ",intr", "" },
547 { NFS_MOUNT_POSIX, ",posix", "" },
548 { NFS_MOUNT_TCP, ",tcp", ",udp" },
549 { NFS_MOUNT_NOCTO, ",nocto", "" },
550 { NFS_MOUNT_NOAC, ",noac", "" },
551 { NFS_MOUNT_NONLM, ",nolock", ",lock" },
552 { 0, NULL, NULL }
553 };
554 struct proc_nfs_info *nfs_infop;
555 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
556
557 seq_printf(m, ",v%d", nfss->rpc_ops->version);
558 seq_printf(m, ",rsize=%d", nfss->rsize);
559 seq_printf(m, ",wsize=%d", nfss->wsize);
560 if (nfss->acregmin != 3*HZ)
561 seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
562 if (nfss->acregmax != 60*HZ)
563 seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
564 if (nfss->acdirmin != 30*HZ)
565 seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
566 if (nfss->acdirmax != 60*HZ)
567 seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
568 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
569 if (nfss->flags & nfs_infop->flag)
570 seq_puts(m, nfs_infop->str);
571 else
572 seq_puts(m, nfs_infop->nostr);
573 }
574 seq_puts(m, ",addr=");
575 seq_escape(m, nfss->hostname, " \t\n\\");
576 return 0;
577}
578
579/*
580 * Invalidate the local caches
581 */
582void
583nfs_zap_caches(struct inode *inode)
584{
585 struct nfs_inode *nfsi = NFS_I(inode);
586 int mode = inode->i_mode;
587
588 NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
589 NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
590
591 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
592 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
593 nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
594 else
595 nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
596}
597
598/*
599 * Invalidate, but do not unhash, the inode
600 */
601static void
602nfs_invalidate_inode(struct inode *inode)
603{
604 umode_t save_mode = inode->i_mode;
605
606 make_bad_inode(inode);
607 inode->i_mode = save_mode;
608 nfs_zap_caches(inode);
609}
610
611struct nfs_find_desc {
612 struct nfs_fh *fh;
613 struct nfs_fattr *fattr;
614};
615
616/*
617 * In NFSv3 we can have 64bit inode numbers. In order to support
618 * this, and re-exported directories (also seen in NFSv2)
619 * we are forced to allow 2 different inodes to have the same
620 * i_ino.
621 */
622static int
623nfs_find_actor(struct inode *inode, void *opaque)
624{
625 struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque;
626 struct nfs_fh *fh = desc->fh;
627 struct nfs_fattr *fattr = desc->fattr;
628
629 if (NFS_FILEID(inode) != fattr->fileid)
630 return 0;
631 if (nfs_compare_fh(NFS_FH(inode), fh))
632 return 0;
633 if (is_bad_inode(inode) || NFS_STALE(inode))
634 return 0;
635 return 1;
636}
637
638static int
639nfs_init_locked(struct inode *inode, void *opaque)
640{
641 struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque;
642 struct nfs_fattr *fattr = desc->fattr;
643
644 NFS_FILEID(inode) = fattr->fileid;
645 nfs_copy_fh(NFS_FH(inode), desc->fh);
646 return 0;
647}
648
649/* Don't use READDIRPLUS on directories that we believe are too large */
650#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
651
652/*
653 * This is our front-end to iget that looks up inodes by file handle
654 * instead of inode number.
655 */
656struct inode *
657nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
658{
659 struct nfs_find_desc desc = {
660 .fh = fh,
661 .fattr = fattr
662 };
663 struct inode *inode = NULL;
664 unsigned long hash;
665
666 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
667 goto out_no_inode;
668
669 if (!fattr->nlink) {
670 printk("NFS: Buggy server - nlink == 0!\n");
671 goto out_no_inode;
672 }
673
674 hash = nfs_fattr_to_ino_t(fattr);
675
676 if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
677 goto out_no_inode;
678
679 if (inode->i_state & I_NEW) {
680 struct nfs_inode *nfsi = NFS_I(inode);
681
682 /* We set i_ino for the few things that still rely on it,
683 * such as stat(2) */
684 inode->i_ino = hash;
685
686 /* We can't support update_atime(), since the server will reset it */
687 inode->i_flags |= S_NOATIME|S_NOCMTIME;
688 inode->i_mode = fattr->mode;
689 /* Why so? Because we want revalidate for devices/FIFOs, and
690 * that's precisely what we have in nfs_file_inode_operations.
691 */
692 inode->i_op = &nfs_file_inode_operations;
693 if (S_ISREG(inode->i_mode)) {
694 inode->i_fop = &nfs_file_operations;
695 inode->i_data.a_ops = &nfs_file_aops;
696 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
697 } else if (S_ISDIR(inode->i_mode)) {
698 inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
699 inode->i_fop = &nfs_dir_operations;
700 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
701 && fattr->size <= NFS_LIMIT_READDIRPLUS)
702 NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS;
703 } else if (S_ISLNK(inode->i_mode))
704 inode->i_op = &nfs_symlink_inode_operations;
705 else
706 init_special_inode(inode, inode->i_mode, fattr->rdev);
707
708 nfsi->read_cache_jiffies = fattr->timestamp;
709 inode->i_atime = fattr->atime;
710 inode->i_mtime = fattr->mtime;
711 inode->i_ctime = fattr->ctime;
712 if (fattr->valid & NFS_ATTR_FATTR_V4)
713 nfsi->change_attr = fattr->change_attr;
714 inode->i_size = nfs_size_to_loff_t(fattr->size);
715 inode->i_nlink = fattr->nlink;
716 inode->i_uid = fattr->uid;
717 inode->i_gid = fattr->gid;
718 if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
719 /*
720 * report the blocks in 512byte units
721 */
722 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
723 inode->i_blksize = inode->i_sb->s_blocksize;
724 } else {
725 inode->i_blocks = fattr->du.nfs2.blocks;
726 inode->i_blksize = fattr->du.nfs2.blocksize;
727 }
728 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
729 nfsi->attrtimeo_timestamp = jiffies;
730 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
731 nfsi->cache_access.cred = NULL;
732
733 unlock_new_inode(inode);
734 } else
735 nfs_refresh_inode(inode, fattr);
736 dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
737 inode->i_sb->s_id,
738 (long long)NFS_FILEID(inode),
739 atomic_read(&inode->i_count));
740
741out:
742 return inode;
743
744out_no_inode:
745 printk("nfs_fhget: iget failed\n");
746 goto out;
747}
748
749#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET)
750
751int
752nfs_setattr(struct dentry *dentry, struct iattr *attr)
753{
754 struct inode *inode = dentry->d_inode;
755 struct nfs_fattr fattr;
756 int error;
757
758 if (attr->ia_valid & ATTR_SIZE) {
759 if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
760 attr->ia_valid &= ~ATTR_SIZE;
761 }
762
763 /* Optimization: if the end result is no change, don't RPC */
764 attr->ia_valid &= NFS_VALID_ATTRS;
765 if (attr->ia_valid == 0)
766 return 0;
767
768 lock_kernel();
769 nfs_begin_data_update(inode);
770 /* Write all dirty data if we're changing file permissions or size */
771 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
772 if (filemap_fdatawrite(inode->i_mapping) == 0)
773 filemap_fdatawait(inode->i_mapping);
774 nfs_wb_all(inode);
775 }
776 error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
777 if (error == 0) {
778 nfs_refresh_inode(inode, &fattr);
779 if ((attr->ia_valid & ATTR_MODE) != 0) {
780 int mode;
781 mode = inode->i_mode & ~S_IALLUGO;
782 mode |= attr->ia_mode & S_IALLUGO;
783 inode->i_mode = mode;
784 }
785 if ((attr->ia_valid & ATTR_UID) != 0)
786 inode->i_uid = attr->ia_uid;
787 if ((attr->ia_valid & ATTR_GID) != 0)
788 inode->i_gid = attr->ia_gid;
789 if ((attr->ia_valid & ATTR_SIZE) != 0) {
790 inode->i_size = attr->ia_size;
791 vmtruncate(inode, attr->ia_size);
792 }
793 }
794 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
795 NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
796 nfs_end_data_update(inode);
797 unlock_kernel();
798 return error;
799}
800
801/*
802 * Wait for the inode to get unlocked.
803 * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING).
804 */
805static int
806nfs_wait_on_inode(struct inode *inode, int flag)
807{
808 struct rpc_clnt *clnt = NFS_CLIENT(inode);
809 struct nfs_inode *nfsi = NFS_I(inode);
810
811 int error;
812 if (!(NFS_FLAGS(inode) & flag))
813 return 0;
814 atomic_inc(&inode->i_count);
815 error = nfs_wait_event(clnt, nfsi->nfs_i_wait,
816 !(NFS_FLAGS(inode) & flag));
817 iput(inode);
818 return error;
819}
820
821int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
822{
823 struct inode *inode = dentry->d_inode;
824 struct nfs_inode *nfsi = NFS_I(inode);
825 int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME;
826 int err;
827
828 if (__IS_FLG(inode, MS_NOATIME))
829 need_atime = 0;
830 else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode))
831 need_atime = 0;
832 /* We may force a getattr if the user cares about atime */
833 if (need_atime)
834 err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
835 else
836 err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
837 if (!err)
838 generic_fillattr(inode, stat);
839 return err;
840}
841
842struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
843{
844 struct nfs_open_context *ctx;
845
846 ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL);
847 if (ctx != NULL) {
848 atomic_set(&ctx->count, 1);
849 ctx->dentry = dget(dentry);
850 ctx->cred = get_rpccred(cred);
851 ctx->state = NULL;
852 ctx->lockowner = current->files;
853 ctx->error = 0;
854 init_waitqueue_head(&ctx->waitq);
855 }
856 return ctx;
857}
858
859struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
860{
861 if (ctx != NULL)
862 atomic_inc(&ctx->count);
863 return ctx;
864}
865
866void put_nfs_open_context(struct nfs_open_context *ctx)
867{
868 if (atomic_dec_and_test(&ctx->count)) {
869 if (!list_empty(&ctx->list)) {
870 struct inode *inode = ctx->dentry->d_inode;
871 spin_lock(&inode->i_lock);
872 list_del(&ctx->list);
873 spin_unlock(&inode->i_lock);
874 }
875 if (ctx->state != NULL)
876 nfs4_close_state(ctx->state, ctx->mode);
877 if (ctx->cred != NULL)
878 put_rpccred(ctx->cred);
879 dput(ctx->dentry);
880 kfree(ctx);
881 }
882}
883
884/*
885 * Ensure that mmap has a recent RPC credential for use when writing out
886 * shared pages
887 */
888void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
889{
890 struct inode *inode = filp->f_dentry->d_inode;
891 struct nfs_inode *nfsi = NFS_I(inode);
892
893 filp->private_data = get_nfs_open_context(ctx);
894 spin_lock(&inode->i_lock);
895 list_add(&ctx->list, &nfsi->open_files);
896 spin_unlock(&inode->i_lock);
897}
898
899struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode)
900{
901 struct nfs_inode *nfsi = NFS_I(inode);
902 struct nfs_open_context *pos, *ctx = NULL;
903
904 spin_lock(&inode->i_lock);
905 list_for_each_entry(pos, &nfsi->open_files, list) {
906 if ((pos->mode & mode) == mode) {
907 ctx = get_nfs_open_context(pos);
908 break;
909 }
910 }
911 spin_unlock(&inode->i_lock);
912 return ctx;
913}
914
915void nfs_file_clear_open_context(struct file *filp)
916{
917 struct inode *inode = filp->f_dentry->d_inode;
918 struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
919
920 if (ctx) {
921 filp->private_data = NULL;
922 spin_lock(&inode->i_lock);
923 list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
924 spin_unlock(&inode->i_lock);
925 put_nfs_open_context(ctx);
926 }
927}
928
929/*
930 * These allocate and release file read/write context information.
931 */
932int nfs_open(struct inode *inode, struct file *filp)
933{
934 struct nfs_open_context *ctx;
935 struct rpc_cred *cred;
936
937 cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
938 if (IS_ERR(cred))
939 return PTR_ERR(cred);
940 ctx = alloc_nfs_open_context(filp->f_dentry, cred);
941 put_rpccred(cred);
942 if (ctx == NULL)
943 return -ENOMEM;
944 ctx->mode = filp->f_mode;
945 nfs_file_set_open_context(filp, ctx);
946 put_nfs_open_context(ctx);
947 if ((filp->f_mode & FMODE_WRITE) != 0)
948 nfs_begin_data_update(inode);
949 return 0;
950}
951
952int nfs_release(struct inode *inode, struct file *filp)
953{
954 if ((filp->f_mode & FMODE_WRITE) != 0)
955 nfs_end_data_update(inode);
956 nfs_file_clear_open_context(filp);
957 return 0;
958}
959
960/*
961 * This function is called whenever some part of NFS notices that
962 * the cached attributes have to be refreshed.
963 */
964int
965__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
966{
967 int status = -ESTALE;
968 struct nfs_fattr fattr;
969 struct nfs_inode *nfsi = NFS_I(inode);
970 unsigned long verifier;
971 unsigned int flags;
972
973 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
974 inode->i_sb->s_id, (long long)NFS_FILEID(inode));
975
976 lock_kernel();
977 if (!inode || is_bad_inode(inode))
978 goto out_nowait;
979 if (NFS_STALE(inode))
980 goto out_nowait;
981
982 while (NFS_REVALIDATING(inode)) {
983 status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
984 if (status < 0)
985 goto out_nowait;
986 if (NFS_ATTRTIMEO(inode) == 0)
987 continue;
988 if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
989 continue;
990 status = NFS_STALE(inode) ? -ESTALE : 0;
991 goto out_nowait;
992 }
993 NFS_FLAGS(inode) |= NFS_INO_REVALIDATING;
994
995 /* Protect against RPC races by saving the change attribute */
996 verifier = nfs_save_change_attribute(inode);
997 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
998 if (status != 0) {
999 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
1000 inode->i_sb->s_id,
1001 (long long)NFS_FILEID(inode), status);
1002 if (status == -ESTALE) {
1003 nfs_zap_caches(inode);
1004 if (!S_ISDIR(inode->i_mode))
1005 NFS_FLAGS(inode) |= NFS_INO_STALE;
1006 }
1007 goto out;
1008 }
1009
1010 status = nfs_update_inode(inode, &fattr, verifier);
1011 if (status) {
1012 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
1013 inode->i_sb->s_id,
1014 (long long)NFS_FILEID(inode), status);
1015 goto out;
1016 }
1017 flags = nfsi->flags;
1018 /*
1019 * We may need to keep the attributes marked as invalid if
1020 * we raced with nfs_end_attr_update().
1021 */
1022 if (verifier == nfsi->cache_change_attribute)
1023 nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
1024 /* Do the page cache invalidation */
1025 if (flags & NFS_INO_INVALID_DATA) {
1026 if (S_ISREG(inode->i_mode)) {
1027 if (filemap_fdatawrite(inode->i_mapping) == 0)
1028 filemap_fdatawait(inode->i_mapping);
1029 nfs_wb_all(inode);
1030 }
1031 nfsi->flags &= ~NFS_INO_INVALID_DATA;
1032 invalidate_inode_pages2(inode->i_mapping);
1033 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
1034 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
1035 inode->i_sb->s_id,
1036 (long long)NFS_FILEID(inode));
1037 /* This ensures we revalidate dentries */
1038 nfsi->cache_change_attribute++;
1039 }
1040 dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
1041 inode->i_sb->s_id,
1042 (long long)NFS_FILEID(inode));
1043
1044out:
1045 NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
1046 wake_up(&nfsi->nfs_i_wait);
1047 out_nowait:
1048 unlock_kernel();
1049 return status;
1050}
1051
1052int nfs_attribute_timeout(struct inode *inode)
1053{
1054 struct nfs_inode *nfsi = NFS_I(inode);
1055
1056 if (nfs_have_delegation(inode, FMODE_READ))
1057 return 0;
1058 return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo);
1059}
1060
1061/**
1062 * nfs_revalidate_inode - Revalidate the inode attributes
1063 * @server - pointer to nfs_server struct
1064 * @inode - pointer to inode struct
1065 *
1066 * Updates inode attribute information by retrieving the data from the server.
1067 */
1068int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1069{
1070 if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
1071 && !nfs_attribute_timeout(inode))
1072 return NFS_STALE(inode) ? -ESTALE : 0;
1073 return __nfs_revalidate_inode(server, inode);
1074}
1075
1076/**
1077 * nfs_begin_data_update
1078 * @inode - pointer to inode
1079 * Declare that a set of operations will update file data on the server
1080 */
1081void nfs_begin_data_update(struct inode *inode)
1082{
1083 atomic_inc(&NFS_I(inode)->data_updates);
1084}
1085
1086/**
1087 * nfs_end_data_update
1088 * @inode - pointer to inode
1089 * Declare end of the operations that will update file data
1090 * This will mark the inode as immediately needing revalidation
1091 * of its attribute cache.
1092 */
1093void nfs_end_data_update(struct inode *inode)
1094{
1095 struct nfs_inode *nfsi = NFS_I(inode);
1096
1097 if (!nfs_have_delegation(inode, FMODE_READ)) {
1098 /* Mark the attribute cache for revalidation */
1099 nfsi->flags |= NFS_INO_INVALID_ATTR;
1100 /* Directories and symlinks: invalidate page cache too */
1101 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1102 nfsi->flags |= NFS_INO_INVALID_DATA;
1103 }
1104 nfsi->cache_change_attribute ++;
1105 atomic_dec(&nfsi->data_updates);
1106}
1107
1108/**
1109 * nfs_end_data_update_defer
1110 * @inode - pointer to inode
1111 * Declare end of the operations that will update file data
1112 * This will defer marking the inode as needing revalidation
1113 * unless there are no other pending updates.
1114 */
1115void nfs_end_data_update_defer(struct inode *inode)
1116{
1117 struct nfs_inode *nfsi = NFS_I(inode);
1118
1119 if (atomic_dec_and_test(&nfsi->data_updates)) {
1120 /* Mark the attribute cache for revalidation */
1121 nfsi->flags |= NFS_INO_INVALID_ATTR;
1122 /* Directories and symlinks: invalidate page cache too */
1123 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1124 nfsi->flags |= NFS_INO_INVALID_DATA;
1125 nfsi->cache_change_attribute ++;
1126 }
1127}
1128
1129/**
1130 * nfs_refresh_inode - verify consistency of the inode attribute cache
1131 * @inode - pointer to inode
1132 * @fattr - updated attributes
1133 *
1134 * Verifies the attribute cache. If we have just changed the attributes,
1135 * so that fattr carries weak cache consistency data, then it may
1136 * also update the ctime/mtime/change_attribute.
1137 */
1138int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1139{
1140 struct nfs_inode *nfsi = NFS_I(inode);
1141 loff_t cur_size, new_isize;
1142 int data_unstable;
1143
1144 /* Do we hold a delegation? */
1145 if (nfs_have_delegation(inode, FMODE_READ))
1146 return 0;
1147
1148 /* Are we in the process of updating data on the server? */
1149 data_unstable = nfs_caches_unstable(inode);
1150
1151 if (fattr->valid & NFS_ATTR_FATTR_V4) {
1152 if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
1153 && nfsi->change_attr == fattr->pre_change_attr)
1154 nfsi->change_attr = fattr->change_attr;
1155 if (!data_unstable && nfsi->change_attr != fattr->change_attr)
1156 nfsi->flags |= NFS_INO_INVALID_ATTR;
1157 }
1158
1159 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1160 return 0;
1161
1162 /* Has the inode gone and changed behind our back? */
1163 if (nfsi->fileid != fattr->fileid
1164 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
1165 return -EIO;
1166
1167 cur_size = i_size_read(inode);
1168 new_isize = nfs_size_to_loff_t(fattr->size);
1169
1170 /* If we have atomic WCC data, we may update some attributes */
1171 if ((fattr->valid & NFS_ATTR_WCC) != 0) {
1172 if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
1173 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1174 if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime))
1175 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1176 }
1177
1178 /* Verify a few of the more important attributes */
1179 if (!data_unstable) {
1180 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)
1181 || cur_size != new_isize)
1182 nfsi->flags |= NFS_INO_INVALID_ATTR;
1183 } else if (S_ISREG(inode->i_mode) && new_isize > cur_size)
1184 nfsi->flags |= NFS_INO_INVALID_ATTR;
1185
1186 /* Have any file permissions changed? */
1187 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
1188 || inode->i_uid != fattr->uid
1189 || inode->i_gid != fattr->gid)
1190 nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
1191
1192 /* Has the link count changed? */
1193 if (inode->i_nlink != fattr->nlink)
1194 nfsi->flags |= NFS_INO_INVALID_ATTR;
1195
1196 if (!timespec_equal(&inode->i_atime, &fattr->atime))
1197 nfsi->flags |= NFS_INO_INVALID_ATIME;
1198
1199 nfsi->read_cache_jiffies = fattr->timestamp;
1200 return 0;
1201}
1202
1203/*
1204 * Many nfs protocol calls return the new file attributes after
1205 * an operation. Here we update the inode to reflect the state
1206 * of the server's inode.
1207 *
1208 * This is a bit tricky because we have to make sure all dirty pages
1209 * have been sent off to the server before calling invalidate_inode_pages.
1210 * To make sure no other process adds more write requests while we try
1211 * our best to flush them, we make them sleep during the attribute refresh.
1212 *
1213 * A very similar scenario holds for the dir cache.
1214 */
1215static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier)
1216{
1217 struct nfs_inode *nfsi = NFS_I(inode);
1218 __u64 new_size;
1219 loff_t new_isize;
1220 unsigned int invalid = 0;
1221 loff_t cur_isize;
1222 int data_unstable;
1223
1224 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
1225 __FUNCTION__, inode->i_sb->s_id, inode->i_ino,
1226 atomic_read(&inode->i_count), fattr->valid);
1227
1228 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1229 return 0;
1230
1231 if (nfsi->fileid != fattr->fileid) {
1232 printk(KERN_ERR "%s: inode number mismatch\n"
1233 "expected (%s/0x%Lx), got (%s/0x%Lx)\n",
1234 __FUNCTION__,
1235 inode->i_sb->s_id, (long long)nfsi->fileid,
1236 inode->i_sb->s_id, (long long)fattr->fileid);
1237 goto out_err;
1238 }
1239
1240 /*
1241 * Make sure the inode's type hasn't changed.
1242 */
1243 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
1244 goto out_changed;
1245
1246 /*
1247 * Update the read time so we don't revalidate too often.
1248 */
1249 nfsi->read_cache_jiffies = fattr->timestamp;
1250
1251 /* Are we racing with known updates of the metadata on the server? */
1252 data_unstable = ! nfs_verify_change_attribute(inode, verifier);
1253
1254 /* Check if the file size agrees */
1255 new_size = fattr->size;
1256 new_isize = nfs_size_to_loff_t(fattr->size);
1257 cur_isize = i_size_read(inode);
1258 if (cur_isize != new_size) {
1259#ifdef NFS_DEBUG_VERBOSE
1260 printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
1261#endif
1262 /*
1263 * If we have pending writebacks, things can get
1264 * messy.
1265 */
1266 if (S_ISREG(inode->i_mode) && data_unstable) {
1267 if (new_isize > cur_isize) {
1268 inode->i_size = new_isize;
1269 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1270 }
1271 } else {
1272 inode->i_size = new_isize;
1273 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1274 }
1275 }
1276
1277 /*
1278 * Note: we don't check inode->i_mtime since pipes etc.
1279 * can change this value in VFS without requiring a
1280 * cache revalidation.
1281 */
1282 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1283 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1284#ifdef NFS_DEBUG_VERBOSE
1285 printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
1286#endif
1287 if (!data_unstable)
1288 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1289 }
1290
1291 if ((fattr->valid & NFS_ATTR_FATTR_V4)
1292 && nfsi->change_attr != fattr->change_attr) {
1293#ifdef NFS_DEBUG_VERBOSE
1294 printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n",
1295 inode->i_sb->s_id, inode->i_ino);
1296#endif
1297 nfsi->change_attr = fattr->change_attr;
1298 if (!data_unstable)
1299 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
1300 }
1301
1302 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1303 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1304
1305 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
1306 inode->i_uid != fattr->uid ||
1307 inode->i_gid != fattr->gid)
1308 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
1309
1310 inode->i_mode = fattr->mode;
1311 inode->i_nlink = fattr->nlink;
1312 inode->i_uid = fattr->uid;
1313 inode->i_gid = fattr->gid;
1314
1315 if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
1316 /*
1317 * report the blocks in 512byte units
1318 */
1319 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
1320 inode->i_blksize = inode->i_sb->s_blocksize;
1321 } else {
1322 inode->i_blocks = fattr->du.nfs2.blocks;
1323 inode->i_blksize = fattr->du.nfs2.blocksize;
1324 }
1325
1326 /* Update attrtimeo value if we're out of the unstable period */
1327 if (invalid & NFS_INO_INVALID_ATTR) {
1328 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
1329 nfsi->attrtimeo_timestamp = jiffies;
1330 } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
1331 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
1332 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
1333 nfsi->attrtimeo_timestamp = jiffies;
1334 }
1335 /* Don't invalidate the data if we were to blame */
1336 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1337 || S_ISLNK(inode->i_mode)))
1338 invalid &= ~NFS_INO_INVALID_DATA;
1339 if (!nfs_have_delegation(inode, FMODE_READ))
1340 nfsi->flags |= invalid;
1341
1342 return 0;
1343 out_changed:
1344 /*
1345 * Big trouble! The inode has become a different object.
1346 */
1347#ifdef NFS_PARANOIA
1348 printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n",
1349 __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode);
1350#endif
1351 /*
1352 * No need to worry about unhashing the dentry, as the
1353 * lookup validation will know that the inode is bad.
1354 * (But we fall through to invalidate the caches.)
1355 */
1356 nfs_invalidate_inode(inode);
1357 out_err:
1358 NFS_FLAGS(inode) |= NFS_INO_STALE;
1359 return -ESTALE;
1360}
1361
1362/*
1363 * File system information
1364 */
1365
1366static int nfs_set_super(struct super_block *s, void *data)
1367{
1368 s->s_fs_info = data;
1369 return set_anon_super(s, data);
1370}
1371
1372static int nfs_compare_super(struct super_block *sb, void *data)
1373{
1374 struct nfs_server *server = data;
1375 struct nfs_server *old = NFS_SB(sb);
1376
1377 if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
1378 return 0;
1379 if (old->addr.sin_port != server->addr.sin_port)
1380 return 0;
1381 return !nfs_compare_fh(&old->fh, &server->fh);
1382}
1383
1384static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
1385 int flags, const char *dev_name, void *raw_data)
1386{
1387 int error;
1388 struct nfs_server *server;
1389 struct super_block *s;
1390 struct nfs_fh *root;
1391 struct nfs_mount_data *data = raw_data;
1392
1393 if (!data) {
1394 printk("nfs_read_super: missing data argument\n");
1395 return ERR_PTR(-EINVAL);
1396 }
1397
1398 server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
1399 if (!server)
1400 return ERR_PTR(-ENOMEM);
1401 memset(server, 0, sizeof(struct nfs_server));
1402 /* Zero out the NFS state stuff */
1403 init_nfsv4_state(server);
1404
1405 if (data->version != NFS_MOUNT_VERSION) {
1406 printk("nfs warning: mount version %s than kernel\n",
1407 data->version < NFS_MOUNT_VERSION ? "older" : "newer");
1408 if (data->version < 2)
1409 data->namlen = 0;
1410 if (data->version < 3)
1411 data->bsize = 0;
1412 if (data->version < 4) {
1413 data->flags &= ~NFS_MOUNT_VER3;
1414 data->root.size = NFS2_FHSIZE;
1415 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
1416 }
1417 if (data->version < 5)
1418 data->flags &= ~NFS_MOUNT_SECFLAVOUR;
1419 }
1420
1421 root = &server->fh;
1422 if (data->flags & NFS_MOUNT_VER3)
1423 root->size = data->root.size;
1424 else
1425 root->size = NFS2_FHSIZE;
1426 if (root->size > sizeof(root->data)) {
1427 printk("nfs_get_sb: invalid root filehandle\n");
1428 kfree(server);
1429 return ERR_PTR(-EINVAL);
1430 }
1431 memcpy(root->data, data->root.data, root->size);
1432
1433 /* We now require that the mount process passes the remote address */
1434 memcpy(&server->addr, &data->addr, sizeof(server->addr));
1435 if (server->addr.sin_addr.s_addr == INADDR_ANY) {
1436 printk("NFS: mount program didn't pass remote address!\n");
1437 kfree(server);
1438 return ERR_PTR(-EINVAL);
1439 }
1440
1441 s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
1442
1443 if (IS_ERR(s) || s->s_root) {
1444 kfree(server);
1445 return s;
1446 }
1447
1448 s->s_flags = flags;
1449
1450 /* Fire up rpciod if not yet running */
1451 if (rpciod_up() != 0) {
1452 printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
1453 kfree(server);
1454 return ERR_PTR(-EIO);
1455 }
1456
1457 error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
1458 if (error) {
1459 up_write(&s->s_umount);
1460 deactivate_super(s);
1461 return ERR_PTR(error);
1462 }
1463 s->s_flags |= MS_ACTIVE;
1464 return s;
1465}
1466
1467static void nfs_kill_super(struct super_block *s)
1468{
1469 struct nfs_server *server = NFS_SB(s);
1470
1471 kill_anon_super(s);
1472
1473 if (server->client != NULL && !IS_ERR(server->client))
1474 rpc_shutdown_client(server->client);
1475 if (server->client_sys != NULL && !IS_ERR(server->client_sys))
1476 rpc_shutdown_client(server->client_sys);
1477
1478 if (!(server->flags & NFS_MOUNT_NONLM))
1479 lockd_down(); /* release rpc.lockd */
1480
1481 rpciod_down(); /* release rpciod */
1482
1483 if (server->hostname != NULL)
1484 kfree(server->hostname);
1485 kfree(server);
1486}
1487
1488static struct file_system_type nfs_fs_type = {
1489 .owner = THIS_MODULE,
1490 .name = "nfs",
1491 .get_sb = nfs_get_sb,
1492 .kill_sb = nfs_kill_super,
1493 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
1494};
1495
1496#ifdef CONFIG_NFS_V4
1497
1498static void nfs4_clear_inode(struct inode *);
1499
1500
1501static struct super_operations nfs4_sops = {
1502 .alloc_inode = nfs_alloc_inode,
1503 .destroy_inode = nfs_destroy_inode,
1504 .write_inode = nfs_write_inode,
1505 .delete_inode = nfs_delete_inode,
1506 .statfs = nfs_statfs,
1507 .clear_inode = nfs4_clear_inode,
1508 .umount_begin = nfs_umount_begin,
1509 .show_options = nfs_show_options,
1510};
1511
1512/*
1513 * Clean out any remaining NFSv4 state that might be left over due
1514 * to open() calls that passed nfs_atomic_lookup, but failed to call
1515 * nfs_open().
1516 */
1517static void nfs4_clear_inode(struct inode *inode)
1518{
1519 struct nfs_inode *nfsi = NFS_I(inode);
1520
1521 /* If we are holding a delegation, return it! */
1522 if (nfsi->delegation != NULL)
1523 nfs_inode_return_delegation(inode);
1524 /* First call standard NFS clear_inode() code */
1525 nfs_clear_inode(inode);
1526 /* Now clear out any remaining state */
1527 while (!list_empty(&nfsi->open_states)) {
1528 struct nfs4_state *state;
1529
1530 state = list_entry(nfsi->open_states.next,
1531 struct nfs4_state,
1532 inode_states);
1533 dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
1534 __FUNCTION__,
1535 inode->i_sb->s_id,
1536 (long long)NFS_FILEID(inode),
1537 state);
1538 BUG_ON(atomic_read(&state->count) != 1);
1539 nfs4_close_state(state, state->state);
1540 }
1541}
1542
1543
1544static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
1545{
1546 struct nfs_server *server;
1547 struct nfs4_client *clp = NULL;
1548 struct rpc_xprt *xprt = NULL;
1549 struct rpc_clnt *clnt = NULL;
1550 struct rpc_timeout timeparms;
1551 rpc_authflavor_t authflavour;
1552 int proto, err = -EIO;
1553
1554 sb->s_blocksize_bits = 0;
1555 sb->s_blocksize = 0;
1556 server = NFS_SB(sb);
1557 if (data->rsize != 0)
1558 server->rsize = nfs_block_size(data->rsize, NULL);
1559 if (data->wsize != 0)
1560 server->wsize = nfs_block_size(data->wsize, NULL);
1561 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
1562 server->caps = NFS_CAP_ATOMIC_OPEN;
1563
1564 server->acregmin = data->acregmin*HZ;
1565 server->acregmax = data->acregmax*HZ;
1566 server->acdirmin = data->acdirmin*HZ;
1567 server->acdirmax = data->acdirmax*HZ;
1568
1569 server->rpc_ops = &nfs_v4_clientops;
1570 /* Initialize timeout values */
1571
1572 timeparms.to_initval = data->timeo * HZ / 10;
1573 timeparms.to_retries = data->retrans;
1574 timeparms.to_exponential = 1;
1575 if (!timeparms.to_retries)
1576 timeparms.to_retries = 5;
1577
1578 proto = data->proto;
1579 /* Which IP protocol do we use? */
1580 switch (proto) {
1581 case IPPROTO_TCP:
1582 timeparms.to_maxval = RPC_MAX_TCP_TIMEOUT;
1583 if (!timeparms.to_initval)
1584 timeparms.to_initval = 600 * HZ / 10;
1585 break;
1586 case IPPROTO_UDP:
1587 timeparms.to_maxval = RPC_MAX_UDP_TIMEOUT;
1588 if (!timeparms.to_initval)
1589 timeparms.to_initval = 11 * HZ / 10;
1590 break;
1591 default:
1592 return -EINVAL;
1593 }
1594
1595 clp = nfs4_get_client(&server->addr.sin_addr);
1596 if (!clp) {
1597 printk(KERN_WARNING "NFS: failed to create NFS4 client.\n");
1598 return -EIO;
1599 }
1600
1601 /* Now create transport and client */
1602 authflavour = RPC_AUTH_UNIX;
1603 if (data->auth_flavourlen != 0) {
1604 if (data->auth_flavourlen > 1)
1605 printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n");
1606 if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
1607 err = -EFAULT;
1608 goto out_fail;
1609 }
1610 }
1611
1612 down_write(&clp->cl_sem);
1613 if (clp->cl_rpcclient == NULL) {
1614 xprt = xprt_create_proto(proto, &server->addr, &timeparms);
1615 if (IS_ERR(xprt)) {
1616 up_write(&clp->cl_sem);
1617 printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
1618 err = PTR_ERR(xprt);
1619 goto out_fail;
1620 }
1621 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
1622 server->rpc_ops->version, authflavour);
1623 if (IS_ERR(clnt)) {
1624 up_write(&clp->cl_sem);
1625 printk(KERN_WARNING "NFS: cannot create RPC client.\n");
1626 xprt_destroy(xprt);
1627 err = PTR_ERR(clnt);
1628 goto out_fail;
1629 }
1630 clnt->cl_intr = 1;
1631 clnt->cl_softrtry = 1;
1632 clnt->cl_chatty = 1;
1633 clp->cl_rpcclient = clnt;
1634 clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0);
1635 if (IS_ERR(clp->cl_cred)) {
1636 up_write(&clp->cl_sem);
1637 err = PTR_ERR(clp->cl_cred);
1638 clp->cl_cred = NULL;
1639 goto out_fail;
1640 }
1641 memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
1642 nfs_idmap_new(clp);
1643 }
1644 if (list_empty(&clp->cl_superblocks)) {
1645 err = nfs4_init_client(clp);
1646 if (err != 0) {
1647 up_write(&clp->cl_sem);
1648 goto out_fail;
1649 }
1650 }
1651 list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
1652 clnt = rpc_clone_client(clp->cl_rpcclient);
1653 if (!IS_ERR(clnt))
1654 server->nfs4_state = clp;
1655 up_write(&clp->cl_sem);
1656 clp = NULL;
1657
1658 if (IS_ERR(clnt)) {
1659 printk(KERN_WARNING "NFS: cannot create RPC client.\n");
1660 return PTR_ERR(clnt);
1661 }
1662
1663 server->client = clnt;
1664
1665 if (server->nfs4_state->cl_idmap == NULL) {
1666 printk(KERN_WARNING "NFS: failed to create idmapper.\n");
1667 return -ENOMEM;
1668 }
1669
1670 if (clnt->cl_auth->au_flavor != authflavour) {
1671 if (rpcauth_create(authflavour, clnt) == NULL) {
1672 printk(KERN_WARNING "NFS: couldn't create credcache!\n");
1673 return -ENOMEM;
1674 }
1675 }
1676
1677 sb->s_time_gran = 1;
1678
1679 sb->s_op = &nfs4_sops;
1680 err = nfs_sb_init(sb, authflavour);
1681 if (err == 0)
1682 return 0;
1683out_fail:
1684 if (clp)
1685 nfs4_put_client(clp);
1686 return err;
1687}
1688
1689static int nfs4_compare_super(struct super_block *sb, void *data)
1690{
1691 struct nfs_server *server = data;
1692 struct nfs_server *old = NFS_SB(sb);
1693
1694 if (strcmp(server->hostname, old->hostname) != 0)
1695 return 0;
1696 if (strcmp(server->mnt_path, old->mnt_path) != 0)
1697 return 0;
1698 return 1;
1699}
1700
1701static void *
1702nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
1703{
1704 void *p = NULL;
1705
1706 if (!src->len)
1707 return ERR_PTR(-EINVAL);
1708 if (src->len < maxlen)
1709 maxlen = src->len;
1710 if (dst == NULL) {
1711 p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
1712 if (p == NULL)
1713 return ERR_PTR(-ENOMEM);
1714 }
1715 if (copy_from_user(dst, src->data, maxlen)) {
1716 if (p != NULL)
1717 kfree(p);
1718 return ERR_PTR(-EFAULT);
1719 }
1720 dst[maxlen] = '\0';
1721 return dst;
1722}
1723
1724static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
1725 int flags, const char *dev_name, void *raw_data)
1726{
1727 int error;
1728 struct nfs_server *server;
1729 struct super_block *s;
1730 struct nfs4_mount_data *data = raw_data;
1731 void *p;
1732
1733 if (!data) {
1734 printk("nfs_read_super: missing data argument\n");
1735 return ERR_PTR(-EINVAL);
1736 }
1737
1738 server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
1739 if (!server)
1740 return ERR_PTR(-ENOMEM);
1741 memset(server, 0, sizeof(struct nfs_server));
1742 /* Zero out the NFS state stuff */
1743 init_nfsv4_state(server);
1744
1745 if (data->version != NFS4_MOUNT_VERSION) {
1746 printk("nfs warning: mount version %s than kernel\n",
1747 data->version < NFS4_MOUNT_VERSION ? "older" : "newer");
1748 }
1749
1750 p = nfs_copy_user_string(NULL, &data->hostname, 256);
1751 if (IS_ERR(p))
1752 goto out_err;
1753 server->hostname = p;
1754
1755 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
1756 if (IS_ERR(p))
1757 goto out_err;
1758 server->mnt_path = p;
1759
1760 p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
1761 sizeof(server->ip_addr) - 1);
1762 if (IS_ERR(p))
1763 goto out_err;
1764
1765 /* We now require that the mount process passes the remote address */
1766 if (data->host_addrlen != sizeof(server->addr)) {
1767 s = ERR_PTR(-EINVAL);
1768 goto out_free;
1769 }
1770 if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
1771 s = ERR_PTR(-EFAULT);
1772 goto out_free;
1773 }
1774 if (server->addr.sin_family != AF_INET ||
1775 server->addr.sin_addr.s_addr == INADDR_ANY) {
1776 printk("NFS: mount program didn't pass remote IP address!\n");
1777 s = ERR_PTR(-EINVAL);
1778 goto out_free;
1779 }
1780
1781 s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
1782
1783 if (IS_ERR(s) || s->s_root)
1784 goto out_free;
1785
1786 s->s_flags = flags;
1787
1788 /* Fire up rpciod if not yet running */
1789 if (rpciod_up() != 0) {
1790 printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
1791 s = ERR_PTR(-EIO);
1792 goto out_free;
1793 }
1794
1795 error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
1796 if (error) {
1797 up_write(&s->s_umount);
1798 deactivate_super(s);
1799 return ERR_PTR(error);
1800 }
1801 s->s_flags |= MS_ACTIVE;
1802 return s;
1803out_err:
1804 s = (struct super_block *)p;
1805out_free:
1806 if (server->mnt_path)
1807 kfree(server->mnt_path);
1808 if (server->hostname)
1809 kfree(server->hostname);
1810 kfree(server);
1811 return s;
1812}
1813
1814static void nfs4_kill_super(struct super_block *sb)
1815{
1816 struct nfs_server *server = NFS_SB(sb);
1817
1818 nfs_return_all_delegations(sb);
1819 kill_anon_super(sb);
1820
1821 nfs4_renewd_prepare_shutdown(server);
1822
1823 if (server->client != NULL && !IS_ERR(server->client))
1824 rpc_shutdown_client(server->client);
1825 rpciod_down(); /* release rpciod */
1826
1827 destroy_nfsv4_state(server);
1828
1829 if (server->hostname != NULL)
1830 kfree(server->hostname);
1831 kfree(server);
1832}
1833
1834static struct file_system_type nfs4_fs_type = {
1835 .owner = THIS_MODULE,
1836 .name = "nfs4",
1837 .get_sb = nfs4_get_sb,
1838 .kill_sb = nfs4_kill_super,
1839 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
1840};
1841
1842#define nfs4_init_once(nfsi) \
1843 do { \
1844 INIT_LIST_HEAD(&(nfsi)->open_states); \
1845 nfsi->delegation = NULL; \
1846 nfsi->delegation_state = 0; \
1847 init_rwsem(&nfsi->rwsem); \
1848 } while(0)
1849#define register_nfs4fs() register_filesystem(&nfs4_fs_type)
1850#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type)
1851#else
1852#define nfs4_init_once(nfsi) \
1853 do { } while (0)
1854#define register_nfs4fs() (0)
1855#define unregister_nfs4fs()
1856#endif
1857
1858extern int nfs_init_nfspagecache(void);
1859extern void nfs_destroy_nfspagecache(void);
1860extern int nfs_init_readpagecache(void);
1861extern void nfs_destroy_readpagecache(void);
1862extern int nfs_init_writepagecache(void);
1863extern void nfs_destroy_writepagecache(void);
1864#ifdef CONFIG_NFS_DIRECTIO
1865extern int nfs_init_directcache(void);
1866extern void nfs_destroy_directcache(void);
1867#endif
1868
1869static kmem_cache_t * nfs_inode_cachep;
1870
1871static struct inode *nfs_alloc_inode(struct super_block *sb)
1872{
1873 struct nfs_inode *nfsi;
1874 nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
1875 if (!nfsi)
1876 return NULL;
1877 nfsi->flags = 0;
1878 return &nfsi->vfs_inode;
1879}
1880
1881static void nfs_destroy_inode(struct inode *inode)
1882{
1883 kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
1884}
1885
1886static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
1887{
1888 struct nfs_inode *nfsi = (struct nfs_inode *) foo;
1889
1890 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1891 SLAB_CTOR_CONSTRUCTOR) {
1892 inode_init_once(&nfsi->vfs_inode);
1893 spin_lock_init(&nfsi->req_lock);
1894 INIT_LIST_HEAD(&nfsi->dirty);
1895 INIT_LIST_HEAD(&nfsi->commit);
1896 INIT_LIST_HEAD(&nfsi->open_files);
1897 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1898 atomic_set(&nfsi->data_updates, 0);
1899 nfsi->ndirty = 0;
1900 nfsi->ncommit = 0;
1901 nfsi->npages = 0;
1902 init_waitqueue_head(&nfsi->nfs_i_wait);
1903 nfs4_init_once(nfsi);
1904 }
1905}
1906
1907int nfs_init_inodecache(void)
1908{
1909 nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
1910 sizeof(struct nfs_inode),
1911 0, SLAB_RECLAIM_ACCOUNT,
1912 init_once, NULL);
1913 if (nfs_inode_cachep == NULL)
1914 return -ENOMEM;
1915
1916 return 0;
1917}
1918
1919void nfs_destroy_inodecache(void)
1920{
1921 if (kmem_cache_destroy(nfs_inode_cachep))
1922 printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n");
1923}
1924
1925/*
1926 * Initialize NFS
1927 */
1928static int __init init_nfs_fs(void)
1929{
1930 int err;
1931
1932 err = nfs_init_nfspagecache();
1933 if (err)
1934 goto out4;
1935
1936 err = nfs_init_inodecache();
1937 if (err)
1938 goto out3;
1939
1940 err = nfs_init_readpagecache();
1941 if (err)
1942 goto out2;
1943
1944 err = nfs_init_writepagecache();
1945 if (err)
1946 goto out1;
1947
1948#ifdef CONFIG_NFS_DIRECTIO
1949 err = nfs_init_directcache();
1950 if (err)
1951 goto out0;
1952#endif
1953
1954#ifdef CONFIG_PROC_FS
1955 rpc_proc_register(&nfs_rpcstat);
1956#endif
1957 err = register_filesystem(&nfs_fs_type);
1958 if (err)
1959 goto out;
1960 if ((err = register_nfs4fs()) != 0)
1961 goto out;
1962 return 0;
1963out:
1964#ifdef CONFIG_PROC_FS
1965 rpc_proc_unregister("nfs");
1966#endif
1967 nfs_destroy_writepagecache();
1968#ifdef CONFIG_NFS_DIRECTIO
1969out0:
1970 nfs_destroy_directcache();
1971#endif
1972out1:
1973 nfs_destroy_readpagecache();
1974out2:
1975 nfs_destroy_inodecache();
1976out3:
1977 nfs_destroy_nfspagecache();
1978out4:
1979 return err;
1980}
1981
1982static void __exit exit_nfs_fs(void)
1983{
1984#ifdef CONFIG_NFS_DIRECTIO
1985 nfs_destroy_directcache();
1986#endif
1987 nfs_destroy_writepagecache();
1988 nfs_destroy_readpagecache();
1989 nfs_destroy_inodecache();
1990 nfs_destroy_nfspagecache();
1991#ifdef CONFIG_PROC_FS
1992 rpc_proc_unregister("nfs");
1993#endif
1994 unregister_filesystem(&nfs_fs_type);
1995 unregister_nfs4fs();
1996}
1997
1998/* Not quite true; I just maintain it */
1999MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
2000MODULE_LICENSE("GPL");
2001
2002module_init(init_nfs_fs)
2003module_exit(exit_nfs_fs)
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
new file mode 100644
index 000000000000..9d3ddad96d9e
--- /dev/null
+++ b/fs/nfs/mount_clnt.c
@@ -0,0 +1,183 @@
1/*
2 * linux/fs/nfs/mount_clnt.c
3 *
4 * MOUNT client to support NFSroot.
5 *
6 * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
7 */
8
9#include <linux/types.h>
10#include <linux/socket.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/uio.h>
14#include <linux/net.h>
15#include <linux/in.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/sunrpc/xprt.h>
18#include <linux/sunrpc/sched.h>
19#include <linux/nfs_fs.h>
20
21#ifdef RPC_DEBUG
22# define NFSDBG_FACILITY NFSDBG_ROOT
23#endif
24
25/*
26#define MOUNT_PROGRAM 100005
27#define MOUNT_VERSION 1
28#define MOUNT_MNT 1
29#define MOUNT_UMNT 3
30 */
31
32static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *,
33 int, int);
34static struct rpc_program mnt_program;
35
36struct mnt_fhstatus {
37 unsigned int status;
38 struct nfs_fh * fh;
39};
40
41/*
42 * Obtain an NFS file handle for the given host and path
43 */
44int
45nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
46 int version, int protocol)
47{
48 struct rpc_clnt *mnt_clnt;
49 struct mnt_fhstatus result = {
50 .fh = fh
51 };
52 char hostname[32];
53 int status;
54 int call;
55
56 dprintk("NFS: nfs_mount(%08x:%s)\n",
57 (unsigned)ntohl(addr->sin_addr.s_addr), path);
58
59 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
60 mnt_clnt = mnt_create(hostname, addr, version, protocol);
61 if (IS_ERR(mnt_clnt))
62 return PTR_ERR(mnt_clnt);
63
64 call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
65 status = rpc_call(mnt_clnt, call, path, &result, 0);
66 return status < 0? status : (result.status? -EACCES : 0);
67}
68
69static struct rpc_clnt *
70mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
71 int protocol)
72{
73 struct rpc_xprt *xprt;
74 struct rpc_clnt *clnt;
75
76 xprt = xprt_create_proto(protocol, srvaddr, NULL);
77 if (IS_ERR(xprt))
78 return (struct rpc_clnt *)xprt;
79
80 clnt = rpc_create_client(xprt, hostname,
81 &mnt_program, version,
82 RPC_AUTH_UNIX);
83 if (IS_ERR(clnt)) {
84 xprt_destroy(xprt);
85 } else {
86 clnt->cl_softrtry = 1;
87 clnt->cl_chatty = 1;
88 clnt->cl_oneshot = 1;
89 clnt->cl_intr = 1;
90 }
91 return clnt;
92}
93
94/*
95 * XDR encode/decode functions for MOUNT
96 */
97static int
98xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path)
99{
100 p = xdr_encode_string(p, path);
101
102 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
103 return 0;
104}
105
106static int
107xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
108{
109 struct nfs_fh *fh = res->fh;
110
111 if ((res->status = ntohl(*p++)) == 0) {
112 fh->size = NFS2_FHSIZE;
113 memcpy(fh->data, p, NFS2_FHSIZE);
114 }
115 return 0;
116}
117
118static int
119xdr_decode_fhstatus3(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
120{
121 struct nfs_fh *fh = res->fh;
122
123 if ((res->status = ntohl(*p++)) == 0) {
124 int size = ntohl(*p++);
125 if (size <= NFS3_FHSIZE) {
126 fh->size = size;
127 memcpy(fh->data, p, size);
128 } else
129 res->status = -EBADHANDLE;
130 }
131 return 0;
132}
133
134#define MNT_dirpath_sz (1 + 256)
135#define MNT_fhstatus_sz (1 + 8)
136
137static struct rpc_procinfo mnt_procedures[] = {
138[MNTPROC_MNT] = {
139 .p_proc = MNTPROC_MNT,
140 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
141 .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
142 .p_bufsiz = MNT_dirpath_sz << 2,
143 },
144};
145
146static struct rpc_procinfo mnt3_procedures[] = {
147[MOUNTPROC3_MNT] = {
148 .p_proc = MOUNTPROC3_MNT,
149 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
150 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
151 .p_bufsiz = MNT_dirpath_sz << 2,
152 },
153};
154
155
156static struct rpc_version mnt_version1 = {
157 .number = 1,
158 .nrprocs = 2,
159 .procs = mnt_procedures
160};
161
162static struct rpc_version mnt_version3 = {
163 .number = 3,
164 .nrprocs = 2,
165 .procs = mnt3_procedures
166};
167
168static struct rpc_version * mnt_version[] = {
169 NULL,
170 &mnt_version1,
171 NULL,
172 &mnt_version3,
173};
174
175static struct rpc_stat mnt_stats;
176
177static struct rpc_program mnt_program = {
178 .name = "mount",
179 .number = NFS_MNT_PROGRAM,
180 .nrvers = sizeof(mnt_version)/sizeof(mnt_version[0]),
181 .version = mnt_version,
182 .stats = &mnt_stats,
183};
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
new file mode 100644
index 000000000000..d91b69044a4d
--- /dev/null
+++ b/fs/nfs/nfs2xdr.c
@@ -0,0 +1,711 @@
1/*
2 * linux/fs/nfs/nfs2xdr.c
3 *
4 * XDR functions to encode/decode NFS RPC arguments and results.
5 *
6 * Copyright (C) 1992, 1993, 1994 Rick Sladkey
7 * Copyright (C) 1996 Olaf Kirch
8 * 04 Aug 1998 Ion Badulescu <ionut@cs.columbia.edu>
9 * FIFO's need special handling in NFSv2
10 */
11
12#include <linux/param.h>
13#include <linux/time.h>
14#include <linux/mm.h>
15#include <linux/slab.h>
16#include <linux/utsname.h>
17#include <linux/errno.h>
18#include <linux/string.h>
19#include <linux/in.h>
20#include <linux/pagemap.h>
21#include <linux/proc_fs.h>
22#include <linux/sunrpc/clnt.h>
23#include <linux/nfs.h>
24#include <linux/nfs2.h>
25#include <linux/nfs_fs.h>
26
27#define NFSDBG_FACILITY NFSDBG_XDR
28/* #define NFS_PARANOIA 1 */
29
30extern int nfs_stat_to_errno(int stat);
31
32/* Mapping from NFS error code to "errno" error code. */
33#define errno_NFSERR_IO EIO
34
35/*
36 * Declare the space requirements for NFS arguments and replies as
37 * number of 32bit-words
38 */
39#define NFS_fhandle_sz (8)
40#define NFS_sattr_sz (8)
41#define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2))
42#define NFS_path_sz (1+(NFS2_MAXPATHLEN>>2))
43#define NFS_fattr_sz (17)
44#define NFS_info_sz (5)
45#define NFS_entry_sz (NFS_filename_sz+3)
46
47#define NFS_diropargs_sz (NFS_fhandle_sz+NFS_filename_sz)
48#define NFS_sattrargs_sz (NFS_fhandle_sz+NFS_sattr_sz)
49#define NFS_readlinkargs_sz (NFS_fhandle_sz)
50#define NFS_readargs_sz (NFS_fhandle_sz+3)
51#define NFS_writeargs_sz (NFS_fhandle_sz+4)
52#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz)
53#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz)
54#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz)
55#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
56#define NFS_readdirargs_sz (NFS_fhandle_sz+2)
57
58#define NFS_attrstat_sz (1+NFS_fattr_sz)
59#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
60#define NFS_readlinkres_sz (2)
61#define NFS_readres_sz (1+NFS_fattr_sz+1)
62#define NFS_writeres_sz (NFS_attrstat_sz)
63#define NFS_stat_sz (1)
64#define NFS_readdirres_sz (1)
65#define NFS_statfsres_sz (1+NFS_info_sz)
66
67/*
68 * Common NFS XDR functions as inlines
69 */
70static inline u32 *
71xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle)
72{
73 memcpy(p, fhandle->data, NFS2_FHSIZE);
74 return p + XDR_QUADLEN(NFS2_FHSIZE);
75}
76
77static inline u32 *
78xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle)
79{
80 /* NFSv2 handles have a fixed length */
81 fhandle->size = NFS2_FHSIZE;
82 memcpy(fhandle->data, p, NFS2_FHSIZE);
83 return p + XDR_QUADLEN(NFS2_FHSIZE);
84}
85
86static inline u32*
87xdr_encode_time(u32 *p, struct timespec *timep)
88{
89 *p++ = htonl(timep->tv_sec);
90 /* Convert nanoseconds into microseconds */
91 *p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0);
92 return p;
93}
94
95static inline u32*
96xdr_encode_current_server_time(u32 *p, struct timespec *timep)
97{
98 /*
99 * Passing the invalid value useconds=1000000 is a
100 * Sun convention for "set to current server time".
101 * It's needed to make permissions checks for the
102 * "touch" program across v2 mounts to Solaris and
103 * Irix boxes work correctly. See description of
104 * sattr in section 6.1 of "NFS Illustrated" by
105 * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
106 */
107 *p++ = htonl(timep->tv_sec);
108 *p++ = htonl(1000000);
109 return p;
110}
111
112static inline u32*
113xdr_decode_time(u32 *p, struct timespec *timep)
114{
115 timep->tv_sec = ntohl(*p++);
116 /* Convert microseconds into nanoseconds */
117 timep->tv_nsec = ntohl(*p++) * 1000;
118 return p;
119}
120
121static u32 *
122xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
123{
124 u32 rdev;
125 fattr->type = (enum nfs_ftype) ntohl(*p++);
126 fattr->mode = ntohl(*p++);
127 fattr->nlink = ntohl(*p++);
128 fattr->uid = ntohl(*p++);
129 fattr->gid = ntohl(*p++);
130 fattr->size = ntohl(*p++);
131 fattr->du.nfs2.blocksize = ntohl(*p++);
132 rdev = ntohl(*p++);
133 fattr->du.nfs2.blocks = ntohl(*p++);
134 fattr->fsid_u.nfs3 = ntohl(*p++);
135 fattr->fileid = ntohl(*p++);
136 p = xdr_decode_time(p, &fattr->atime);
137 p = xdr_decode_time(p, &fattr->mtime);
138 p = xdr_decode_time(p, &fattr->ctime);
139 fattr->valid |= NFS_ATTR_FATTR;
140 fattr->rdev = new_decode_dev(rdev);
141 if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) {
142 fattr->type = NFFIFO;
143 fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
144 fattr->rdev = 0;
145 }
146 fattr->timestamp = jiffies;
147 return p;
148}
149
150#define SATTR(p, attr, flag, field) \
151 *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0
152static inline u32 *
153xdr_encode_sattr(u32 *p, struct iattr *attr)
154{
155 SATTR(p, attr, ATTR_MODE, ia_mode);
156 SATTR(p, attr, ATTR_UID, ia_uid);
157 SATTR(p, attr, ATTR_GID, ia_gid);
158 SATTR(p, attr, ATTR_SIZE, ia_size);
159
160 if (attr->ia_valid & ATTR_ATIME_SET) {
161 p = xdr_encode_time(p, &attr->ia_atime);
162 } else if (attr->ia_valid & ATTR_ATIME) {
163 p = xdr_encode_current_server_time(p, &attr->ia_atime);
164 } else {
165 *p++ = ~(u32) 0;
166 *p++ = ~(u32) 0;
167 }
168
169 if (attr->ia_valid & ATTR_MTIME_SET) {
170 p = xdr_encode_time(p, &attr->ia_mtime);
171 } else if (attr->ia_valid & ATTR_MTIME) {
172 p = xdr_encode_current_server_time(p, &attr->ia_mtime);
173 } else {
174 *p++ = ~(u32) 0;
175 *p++ = ~(u32) 0;
176 }
177 return p;
178}
179#undef SATTR
180
181/*
182 * NFS encode functions
183 */
184/*
185 * Encode file handle argument
186 * GETATTR, READLINK, STATFS
187 */
188static int
189nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
190{
191 p = xdr_encode_fhandle(p, fh);
192 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
193 return 0;
194}
195
196/*
197 * Encode SETATTR arguments
198 */
199static int
200nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args)
201{
202 p = xdr_encode_fhandle(p, args->fh);
203 p = xdr_encode_sattr(p, args->sattr);
204 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
205 return 0;
206}
207
208/*
209 * Encode directory ops argument
210 * LOOKUP, REMOVE, RMDIR
211 */
212static int
213nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args)
214{
215 p = xdr_encode_fhandle(p, args->fh);
216 p = xdr_encode_array(p, args->name, args->len);
217 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
218 return 0;
219}
220
221/*
222 * Arguments to a READ call. Since we read data directly into the page
223 * cache, we also set up the reply iovec here so that iov[1] points
224 * exactly to the page we want to fetch.
225 */
226static int
227nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
228{
229 struct rpc_auth *auth = req->rq_task->tk_auth;
230 unsigned int replen;
231 u32 offset = (u32)args->offset;
232 u32 count = args->count;
233
234 p = xdr_encode_fhandle(p, args->fh);
235 *p++ = htonl(offset);
236 *p++ = htonl(count);
237 *p++ = htonl(count);
238 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
239
240 /* Inline the page array */
241 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
242 xdr_inline_pages(&req->rq_rcv_buf, replen,
243 args->pages, args->pgbase, count);
244 return 0;
245}
246
247/*
248 * Decode READ reply
249 */
250static int
251nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
252{
253 struct kvec *iov = req->rq_rcv_buf.head;
254 int status, count, recvd, hdrlen;
255
256 if ((status = ntohl(*p++)))
257 return -nfs_stat_to_errno(status);
258 p = xdr_decode_fattr(p, res->fattr);
259
260 count = ntohl(*p++);
261 res->eof = 0;
262 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
263 if (iov->iov_len < hdrlen) {
264 printk(KERN_WARNING "NFS: READ reply header overflowed:"
265 "length %d > %Zu\n", hdrlen, iov->iov_len);
266 return -errno_NFSERR_IO;
267 } else if (iov->iov_len != hdrlen) {
268 dprintk("NFS: READ header is short. iovec will be shifted.\n");
269 xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
270 }
271
272 recvd = req->rq_rcv_buf.len - hdrlen;
273 if (count > recvd) {
274 printk(KERN_WARNING "NFS: server cheating in read reply: "
275 "count %d > recvd %d\n", count, recvd);
276 count = recvd;
277 }
278
279 dprintk("RPC: readres OK count %d\n", count);
280 if (count < res->count)
281 res->count = count;
282
283 return count;
284}
285
286
287/*
288 * Write arguments. Splice the buffer to be written into the iovec.
289 */
290static int
291nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
292{
293 struct xdr_buf *sndbuf = &req->rq_snd_buf;
294 u32 offset = (u32)args->offset;
295 u32 count = args->count;
296
297 p = xdr_encode_fhandle(p, args->fh);
298 *p++ = htonl(offset);
299 *p++ = htonl(offset);
300 *p++ = htonl(count);
301 *p++ = htonl(count);
302 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
303
304 /* Copy the page array */
305 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
306 return 0;
307}
308
309/*
310 * Encode create arguments
311 * CREATE, MKDIR
312 */
313static int
314nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args)
315{
316 p = xdr_encode_fhandle(p, args->fh);
317 p = xdr_encode_array(p, args->name, args->len);
318 p = xdr_encode_sattr(p, args->sattr);
319 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
320 return 0;
321}
322
323/*
324 * Encode RENAME arguments
325 */
326static int
327nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args)
328{
329 p = xdr_encode_fhandle(p, args->fromfh);
330 p = xdr_encode_array(p, args->fromname, args->fromlen);
331 p = xdr_encode_fhandle(p, args->tofh);
332 p = xdr_encode_array(p, args->toname, args->tolen);
333 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
334 return 0;
335}
336
337/*
338 * Encode LINK arguments
339 */
340static int
341nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
342{
343 p = xdr_encode_fhandle(p, args->fromfh);
344 p = xdr_encode_fhandle(p, args->tofh);
345 p = xdr_encode_array(p, args->toname, args->tolen);
346 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
347 return 0;
348}
349
350/*
351 * Encode SYMLINK arguments
352 */
353static int
354nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
355{
356 p = xdr_encode_fhandle(p, args->fromfh);
357 p = xdr_encode_array(p, args->fromname, args->fromlen);
358 p = xdr_encode_array(p, args->topath, args->tolen);
359 p = xdr_encode_sattr(p, args->sattr);
360 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
361 return 0;
362}
363
364/*
365 * Encode arguments to readdir call
366 */
367static int
368nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
369{
370 struct rpc_task *task = req->rq_task;
371 struct rpc_auth *auth = task->tk_auth;
372 unsigned int replen;
373 u32 count = args->count;
374
375 p = xdr_encode_fhandle(p, args->fh);
376 *p++ = htonl(args->cookie);
377 *p++ = htonl(count); /* see above */
378 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
379
380 /* Inline the page array */
381 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2;
382 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
383 return 0;
384}
385
386/*
387 * Decode the result of a readdir call.
388 * We're not really decoding anymore, we just leave the buffer untouched
389 * and only check that it is syntactically correct.
390 * The real decoding happens in nfs_decode_entry below, called directly
391 * from nfs_readdir for each entry.
392 */
393static int
394nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy)
395{
396 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
397 struct kvec *iov = rcvbuf->head;
398 struct page **page;
399 int hdrlen, recvd;
400 int status, nr;
401 unsigned int len, pglen;
402 u32 *end, *entry, *kaddr;
403
404 if ((status = ntohl(*p++)))
405 return -nfs_stat_to_errno(status);
406
407 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
408 if (iov->iov_len < hdrlen) {
409 printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
410 "length %d > %Zu\n", hdrlen, iov->iov_len);
411 return -errno_NFSERR_IO;
412 } else if (iov->iov_len != hdrlen) {
413 dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
414 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
415 }
416
417 pglen = rcvbuf->page_len;
418 recvd = rcvbuf->len - hdrlen;
419 if (pglen > recvd)
420 pglen = recvd;
421 page = rcvbuf->pages;
422 kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
423 end = (u32 *)((char *)p + pglen);
424 entry = p;
425 for (nr = 0; *p++; nr++) {
426 if (p + 2 > end)
427 goto short_pkt;
428 p++; /* fileid */
429 len = ntohl(*p++);
430 p += XDR_QUADLEN(len) + 1; /* name plus cookie */
431 if (len > NFS2_MAXNAMLEN) {
432 printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n",
433 len);
434 goto err_unmap;
435 }
436 if (p + 2 > end)
437 goto short_pkt;
438 entry = p;
439 }
440 if (!nr && (entry[0] != 0 || entry[1] == 0))
441 goto short_pkt;
442 out:
443 kunmap_atomic(kaddr, KM_USER0);
444 return nr;
445 short_pkt:
446 entry[0] = entry[1] = 0;
447 /* truncate listing ? */
448 if (!nr) {
449 printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
450 entry[1] = 1;
451 }
452 goto out;
453err_unmap:
454 nr = -errno_NFSERR_IO;
455 goto out;
456}
457
458u32 *
459nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
460{
461 if (!*p++) {
462 if (!*p)
463 return ERR_PTR(-EAGAIN);
464 entry->eof = 1;
465 return ERR_PTR(-EBADCOOKIE);
466 }
467
468 entry->ino = ntohl(*p++);
469 entry->len = ntohl(*p++);
470 entry->name = (const char *) p;
471 p += XDR_QUADLEN(entry->len);
472 entry->prev_cookie = entry->cookie;
473 entry->cookie = ntohl(*p++);
474 entry->eof = !p[0] && p[1];
475
476 return p;
477}
478
479/*
480 * NFS XDR decode functions
481 */
482/*
483 * Decode simple status reply
484 */
485static int
486nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy)
487{
488 int status;
489
490 if ((status = ntohl(*p++)) != 0)
491 status = -nfs_stat_to_errno(status);
492 return status;
493}
494
495/*
496 * Decode attrstat reply
497 * GETATTR, SETATTR, WRITE
498 */
499static int
500nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
501{
502 int status;
503
504 if ((status = ntohl(*p++)))
505 return -nfs_stat_to_errno(status);
506 xdr_decode_fattr(p, fattr);
507 return 0;
508}
509
510/*
511 * Decode diropres reply
512 * LOOKUP, CREATE, MKDIR
513 */
514static int
515nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res)
516{
517 int status;
518
519 if ((status = ntohl(*p++)))
520 return -nfs_stat_to_errno(status);
521 p = xdr_decode_fhandle(p, res->fh);
522 xdr_decode_fattr(p, res->fattr);
523 return 0;
524}
525
526/*
527 * Encode READLINK args
528 */
529static int
530nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args)
531{
532 struct rpc_auth *auth = req->rq_task->tk_auth;
533 unsigned int replen;
534
535 p = xdr_encode_fhandle(p, args->fh);
536 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
537
538 /* Inline the page array */
539 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2;
540 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
541 return 0;
542}
543
544/*
545 * Decode READLINK reply
546 */
547static int
548nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy)
549{
550 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
551 struct kvec *iov = rcvbuf->head;
552 int hdrlen, len, recvd;
553 char *kaddr;
554 int status;
555
556 if ((status = ntohl(*p++)))
557 return -nfs_stat_to_errno(status);
558 /* Convert length of symlink */
559 len = ntohl(*p++);
560 if (len >= rcvbuf->page_len || len <= 0) {
561 dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
562 return -ENAMETOOLONG;
563 }
564 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
565 if (iov->iov_len < hdrlen) {
566 printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
567 "length %d > %Zu\n", hdrlen, iov->iov_len);
568 return -errno_NFSERR_IO;
569 } else if (iov->iov_len != hdrlen) {
570 dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
571 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
572 }
573 recvd = req->rq_rcv_buf.len - hdrlen;
574 if (recvd < len) {
575 printk(KERN_WARNING "NFS: server cheating in readlink reply: "
576 "count %u > recvd %u\n", len, recvd);
577 return -EIO;
578 }
579
580 /* NULL terminate the string we got */
581 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
582 kaddr[len+rcvbuf->page_base] = '\0';
583 kunmap_atomic(kaddr, KM_USER0);
584 return 0;
585}
586
587/*
588 * Decode WRITE reply
589 */
590static int
591nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
592{
593 res->verf->committed = NFS_FILE_SYNC;
594 return nfs_xdr_attrstat(req, p, res->fattr);
595}
596
597/*
598 * Decode STATFS reply
599 */
600static int
601nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res)
602{
603 int status;
604
605 if ((status = ntohl(*p++)))
606 return -nfs_stat_to_errno(status);
607
608 res->tsize = ntohl(*p++);
609 res->bsize = ntohl(*p++);
610 res->blocks = ntohl(*p++);
611 res->bfree = ntohl(*p++);
612 res->bavail = ntohl(*p++);
613 return 0;
614}
615
616/*
617 * We need to translate between nfs status return values and
618 * the local errno values which may not be the same.
619 */
620static struct {
621 int stat;
622 int errno;
623} nfs_errtbl[] = {
624 { NFS_OK, 0 },
625 { NFSERR_PERM, EPERM },
626 { NFSERR_NOENT, ENOENT },
627 { NFSERR_IO, errno_NFSERR_IO },
628 { NFSERR_NXIO, ENXIO },
629/* { NFSERR_EAGAIN, EAGAIN }, */
630 { NFSERR_ACCES, EACCES },
631 { NFSERR_EXIST, EEXIST },
632 { NFSERR_XDEV, EXDEV },
633 { NFSERR_NODEV, ENODEV },
634 { NFSERR_NOTDIR, ENOTDIR },
635 { NFSERR_ISDIR, EISDIR },
636 { NFSERR_INVAL, EINVAL },
637 { NFSERR_FBIG, EFBIG },
638 { NFSERR_NOSPC, ENOSPC },
639 { NFSERR_ROFS, EROFS },
640 { NFSERR_MLINK, EMLINK },
641 { NFSERR_NAMETOOLONG, ENAMETOOLONG },
642 { NFSERR_NOTEMPTY, ENOTEMPTY },
643 { NFSERR_DQUOT, EDQUOT },
644 { NFSERR_STALE, ESTALE },
645 { NFSERR_REMOTE, EREMOTE },
646#ifdef EWFLUSH
647 { NFSERR_WFLUSH, EWFLUSH },
648#endif
649 { NFSERR_BADHANDLE, EBADHANDLE },
650 { NFSERR_NOT_SYNC, ENOTSYNC },
651 { NFSERR_BAD_COOKIE, EBADCOOKIE },
652 { NFSERR_NOTSUPP, ENOTSUPP },
653 { NFSERR_TOOSMALL, ETOOSMALL },
654 { NFSERR_SERVERFAULT, ESERVERFAULT },
655 { NFSERR_BADTYPE, EBADTYPE },
656 { NFSERR_JUKEBOX, EJUKEBOX },
657 { -1, EIO }
658};
659
660/*
661 * Convert an NFS error code to a local one.
662 * This one is used jointly by NFSv2 and NFSv3.
663 */
664int
665nfs_stat_to_errno(int stat)
666{
667 int i;
668
669 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
670 if (nfs_errtbl[i].stat == stat)
671 return nfs_errtbl[i].errno;
672 }
673 printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
674 return nfs_errtbl[i].errno;
675}
676
677#ifndef MAX
678# define MAX(a, b) (((a) > (b))? (a) : (b))
679#endif
680
681#define PROC(proc, argtype, restype, timer) \
682[NFSPROC_##proc] = { \
683 .p_proc = NFSPROC_##proc, \
684 .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
685 .p_decode = (kxdrproc_t) nfs_xdr_##restype, \
686 .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
687 .p_timer = timer \
688 }
689struct rpc_procinfo nfs_procedures[] = {
690 PROC(GETATTR, fhandle, attrstat, 1),
691 PROC(SETATTR, sattrargs, attrstat, 0),
692 PROC(LOOKUP, diropargs, diropres, 2),
693 PROC(READLINK, readlinkargs, readlinkres, 3),
694 PROC(READ, readargs, readres, 3),
695 PROC(WRITE, writeargs, writeres, 4),
696 PROC(CREATE, createargs, diropres, 0),
697 PROC(REMOVE, diropargs, stat, 0),
698 PROC(RENAME, renameargs, stat, 0),
699 PROC(LINK, linkargs, stat, 0),
700 PROC(SYMLINK, symlinkargs, stat, 0),
701 PROC(MKDIR, createargs, diropres, 0),
702 PROC(RMDIR, diropargs, stat, 0),
703 PROC(READDIR, readdirargs, readdirres, 3),
704 PROC(STATFS, fhandle, statfsres, 0),
705};
706
707struct rpc_version nfs_version2 = {
708 .number = 2,
709 .nrprocs = sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
710 .procs = nfs_procedures
711};
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
new file mode 100644
index 000000000000..3878494dfc2c
--- /dev/null
+++ b/fs/nfs/nfs3proc.c
@@ -0,0 +1,859 @@
1/*
2 * linux/fs/nfs/nfs3proc.c
3 *
4 * Client-side NFSv3 procedures stubs.
5 *
6 * Copyright (C) 1997, Olaf Kirch
7 */
8
9#include <linux/mm.h>
10#include <linux/utsname.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/sunrpc/clnt.h>
14#include <linux/nfs.h>
15#include <linux/nfs3.h>
16#include <linux/nfs_fs.h>
17#include <linux/nfs_page.h>
18#include <linux/lockd/bind.h>
19#include <linux/smp_lock.h>
20
21#define NFSDBG_FACILITY NFSDBG_PROC
22
23extern struct rpc_procinfo nfs3_procedures[];
24
25/* A wrapper to handle the EJUKEBOX error message */
26static int
27nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
28{
29 sigset_t oldset;
30 int res;
31 rpc_clnt_sigmask(clnt, &oldset);
32 do {
33 res = rpc_call_sync(clnt, msg, flags);
34 if (res != -EJUKEBOX)
35 break;
36 set_current_state(TASK_INTERRUPTIBLE);
37 schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
38 res = -ERESTARTSYS;
39 } while (!signalled());
40 rpc_clnt_sigunmask(clnt, &oldset);
41 return res;
42}
43
44static inline int
45nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
46{
47 struct rpc_message msg = {
48 .rpc_proc = &nfs3_procedures[proc],
49 .rpc_argp = argp,
50 .rpc_resp = resp,
51 };
52 return nfs3_rpc_wrapper(clnt, &msg, flags);
53}
54
55#define rpc_call(clnt, proc, argp, resp, flags) \
56 nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
57#define rpc_call_sync(clnt, msg, flags) \
58 nfs3_rpc_wrapper(clnt, msg, flags)
59
60static int
61nfs3_async_handle_jukebox(struct rpc_task *task)
62{
63 if (task->tk_status != -EJUKEBOX)
64 return 0;
65 task->tk_status = 0;
66 rpc_restart_call(task);
67 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
68 return 1;
69}
70
71/*
72 * Bare-bones access to getattr: this is for nfs_read_super.
73 */
74static int
75nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
76 struct nfs_fsinfo *info)
77{
78 int status;
79
80 dprintk("%s: call fsinfo\n", __FUNCTION__);
81 info->fattr->valid = 0;
82 status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
83 dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
84 if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
85 status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
86 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
87 }
88 return status;
89}
90
91/*
92 * One function for each procedure in the NFS protocol.
93 */
94static int
95nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
96 struct nfs_fattr *fattr)
97{
98 int status;
99
100 dprintk("NFS call getattr\n");
101 fattr->valid = 0;
102 status = rpc_call(server->client, NFS3PROC_GETATTR,
103 fhandle, fattr, 0);
104 dprintk("NFS reply getattr: %d\n", status);
105 return status;
106}
107
108static int
109nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
110 struct iattr *sattr)
111{
112 struct inode *inode = dentry->d_inode;
113 struct nfs3_sattrargs arg = {
114 .fh = NFS_FH(inode),
115 .sattr = sattr,
116 };
117 int status;
118
119 dprintk("NFS call setattr\n");
120 fattr->valid = 0;
121 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
122 dprintk("NFS reply setattr: %d\n", status);
123 return status;
124}
125
126static int
127nfs3_proc_lookup(struct inode *dir, struct qstr *name,
128 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
129{
130 struct nfs_fattr dir_attr;
131 struct nfs3_diropargs arg = {
132 .fh = NFS_FH(dir),
133 .name = name->name,
134 .len = name->len
135 };
136 struct nfs3_diropres res = {
137 .dir_attr = &dir_attr,
138 .fh = fhandle,
139 .fattr = fattr
140 };
141 int status;
142
143 dprintk("NFS call lookup %s\n", name->name);
144 dir_attr.valid = 0;
145 fattr->valid = 0;
146 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
147 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
148 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
149 fhandle, fattr, 0);
150 dprintk("NFS reply lookup: %d\n", status);
151 if (status >= 0)
152 status = nfs_refresh_inode(dir, &dir_attr);
153 return status;
154}
155
156static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
157{
158 struct nfs_fattr fattr;
159 struct nfs3_accessargs arg = {
160 .fh = NFS_FH(inode),
161 };
162 struct nfs3_accessres res = {
163 .fattr = &fattr,
164 };
165 struct rpc_message msg = {
166 .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
167 .rpc_argp = &arg,
168 .rpc_resp = &res,
169 .rpc_cred = entry->cred
170 };
171 int mode = entry->mask;
172 int status;
173
174 dprintk("NFS call access\n");
175 fattr.valid = 0;
176
177 if (mode & MAY_READ)
178 arg.access |= NFS3_ACCESS_READ;
179 if (S_ISDIR(inode->i_mode)) {
180 if (mode & MAY_WRITE)
181 arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE;
182 if (mode & MAY_EXEC)
183 arg.access |= NFS3_ACCESS_LOOKUP;
184 } else {
185 if (mode & MAY_WRITE)
186 arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND;
187 if (mode & MAY_EXEC)
188 arg.access |= NFS3_ACCESS_EXECUTE;
189 }
190 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
191 nfs_refresh_inode(inode, &fattr);
192 if (status == 0) {
193 entry->mask = 0;
194 if (res.access & NFS3_ACCESS_READ)
195 entry->mask |= MAY_READ;
196 if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE))
197 entry->mask |= MAY_WRITE;
198 if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
199 entry->mask |= MAY_EXEC;
200 }
201 dprintk("NFS reply access: %d\n", status);
202 return status;
203}
204
205static int nfs3_proc_readlink(struct inode *inode, struct page *page,
206 unsigned int pgbase, unsigned int pglen)
207{
208 struct nfs_fattr fattr;
209 struct nfs3_readlinkargs args = {
210 .fh = NFS_FH(inode),
211 .pgbase = pgbase,
212 .pglen = pglen,
213 .pages = &page
214 };
215 int status;
216
217 dprintk("NFS call readlink\n");
218 fattr.valid = 0;
219 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
220 &args, &fattr, 0);
221 nfs_refresh_inode(inode, &fattr);
222 dprintk("NFS reply readlink: %d\n", status);
223 return status;
224}
225
226static int nfs3_proc_read(struct nfs_read_data *rdata)
227{
228 int flags = rdata->flags;
229 struct inode * inode = rdata->inode;
230 struct nfs_fattr * fattr = rdata->res.fattr;
231 struct rpc_message msg = {
232 .rpc_proc = &nfs3_procedures[NFS3PROC_READ],
233 .rpc_argp = &rdata->args,
234 .rpc_resp = &rdata->res,
235 .rpc_cred = rdata->cred,
236 };
237 int status;
238
239 dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
240 (long long) rdata->args.offset);
241 fattr->valid = 0;
242 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
243 if (status >= 0)
244 nfs_refresh_inode(inode, fattr);
245 dprintk("NFS reply read: %d\n", status);
246 return status;
247}
248
249static int nfs3_proc_write(struct nfs_write_data *wdata)
250{
251 int rpcflags = wdata->flags;
252 struct inode * inode = wdata->inode;
253 struct nfs_fattr * fattr = wdata->res.fattr;
254 struct rpc_message msg = {
255 .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
256 .rpc_argp = &wdata->args,
257 .rpc_resp = &wdata->res,
258 .rpc_cred = wdata->cred,
259 };
260 int status;
261
262 dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
263 (long long) wdata->args.offset);
264 fattr->valid = 0;
265 status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
266 if (status >= 0)
267 nfs_refresh_inode(inode, fattr);
268 dprintk("NFS reply write: %d\n", status);
269 return status < 0? status : wdata->res.count;
270}
271
272static int nfs3_proc_commit(struct nfs_write_data *cdata)
273{
274 struct inode * inode = cdata->inode;
275 struct nfs_fattr * fattr = cdata->res.fattr;
276 struct rpc_message msg = {
277 .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
278 .rpc_argp = &cdata->args,
279 .rpc_resp = &cdata->res,
280 .rpc_cred = cdata->cred,
281 };
282 int status;
283
284 dprintk("NFS call commit %d @ %Ld\n", cdata->args.count,
285 (long long) cdata->args.offset);
286 fattr->valid = 0;
287 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
288 if (status >= 0)
289 nfs_refresh_inode(inode, fattr);
290 dprintk("NFS reply commit: %d\n", status);
291 return status;
292}
293
294/*
295 * Create a regular file.
296 * For now, we don't implement O_EXCL.
297 */
298static int
299nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
300 int flags)
301{
302 struct nfs_fh fhandle;
303 struct nfs_fattr fattr;
304 struct nfs_fattr dir_attr;
305 struct nfs3_createargs arg = {
306 .fh = NFS_FH(dir),
307 .name = dentry->d_name.name,
308 .len = dentry->d_name.len,
309 .sattr = sattr,
310 };
311 struct nfs3_diropres res = {
312 .dir_attr = &dir_attr,
313 .fh = &fhandle,
314 .fattr = &fattr
315 };
316 int status;
317
318 dprintk("NFS call create %s\n", dentry->d_name.name);
319 arg.createmode = NFS3_CREATE_UNCHECKED;
320 if (flags & O_EXCL) {
321 arg.createmode = NFS3_CREATE_EXCLUSIVE;
322 arg.verifier[0] = jiffies;
323 arg.verifier[1] = current->pid;
324 }
325
326again:
327 dir_attr.valid = 0;
328 fattr.valid = 0;
329 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
330 nfs_refresh_inode(dir, &dir_attr);
331
332 /* If the server doesn't support the exclusive creation semantics,
333 * try again with simple 'guarded' mode. */
334 if (status == NFSERR_NOTSUPP) {
335 switch (arg.createmode) {
336 case NFS3_CREATE_EXCLUSIVE:
337 arg.createmode = NFS3_CREATE_GUARDED;
338 break;
339
340 case NFS3_CREATE_GUARDED:
341 arg.createmode = NFS3_CREATE_UNCHECKED;
342 break;
343
344 case NFS3_CREATE_UNCHECKED:
345 goto out;
346 }
347 goto again;
348 }
349
350 if (status == 0)
351 status = nfs_instantiate(dentry, &fhandle, &fattr);
352 if (status != 0)
353 goto out;
354
355 /* When we created the file with exclusive semantics, make
356 * sure we set the attributes afterwards. */
357 if (arg.createmode == NFS3_CREATE_EXCLUSIVE) {
358 dprintk("NFS call setattr (post-create)\n");
359
360 if (!(sattr->ia_valid & ATTR_ATIME_SET))
361 sattr->ia_valid |= ATTR_ATIME;
362 if (!(sattr->ia_valid & ATTR_MTIME_SET))
363 sattr->ia_valid |= ATTR_MTIME;
364
365 /* Note: we could use a guarded setattr here, but I'm
366 * not sure this buys us anything (and I'd have
367 * to revamp the NFSv3 XDR code) */
368 status = nfs3_proc_setattr(dentry, &fattr, sattr);
369 nfs_refresh_inode(dentry->d_inode, &fattr);
370 dprintk("NFS reply setattr (post-create): %d\n", status);
371 }
372out:
373 dprintk("NFS reply create: %d\n", status);
374 return status;
375}
376
377static int
378nfs3_proc_remove(struct inode *dir, struct qstr *name)
379{
380 struct nfs_fattr dir_attr;
381 struct nfs3_diropargs arg = {
382 .fh = NFS_FH(dir),
383 .name = name->name,
384 .len = name->len
385 };
386 struct rpc_message msg = {
387 .rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE],
388 .rpc_argp = &arg,
389 .rpc_resp = &dir_attr,
390 };
391 int status;
392
393 dprintk("NFS call remove %s\n", name->name);
394 dir_attr.valid = 0;
395 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
396 nfs_refresh_inode(dir, &dir_attr);
397 dprintk("NFS reply remove: %d\n", status);
398 return status;
399}
400
401static int
402nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
403{
404 struct unlinkxdr {
405 struct nfs3_diropargs arg;
406 struct nfs_fattr res;
407 } *ptr;
408
409 ptr = (struct unlinkxdr *)kmalloc(sizeof(*ptr), GFP_KERNEL);
410 if (!ptr)
411 return -ENOMEM;
412 ptr->arg.fh = NFS_FH(dir->d_inode);
413 ptr->arg.name = name->name;
414 ptr->arg.len = name->len;
415 ptr->res.valid = 0;
416 msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
417 msg->rpc_argp = &ptr->arg;
418 msg->rpc_resp = &ptr->res;
419 return 0;
420}
421
422static int
423nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
424{
425 struct rpc_message *msg = &task->tk_msg;
426 struct nfs_fattr *dir_attr;
427
428 if (nfs3_async_handle_jukebox(task))
429 return 1;
430 if (msg->rpc_argp) {
431 dir_attr = (struct nfs_fattr*)msg->rpc_resp;
432 nfs_refresh_inode(dir->d_inode, dir_attr);
433 kfree(msg->rpc_argp);
434 }
435 return 0;
436}
437
438static int
439nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
440 struct inode *new_dir, struct qstr *new_name)
441{
442 struct nfs_fattr old_dir_attr, new_dir_attr;
443 struct nfs3_renameargs arg = {
444 .fromfh = NFS_FH(old_dir),
445 .fromname = old_name->name,
446 .fromlen = old_name->len,
447 .tofh = NFS_FH(new_dir),
448 .toname = new_name->name,
449 .tolen = new_name->len
450 };
451 struct nfs3_renameres res = {
452 .fromattr = &old_dir_attr,
453 .toattr = &new_dir_attr
454 };
455 int status;
456
457 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
458 old_dir_attr.valid = 0;
459 new_dir_attr.valid = 0;
460 status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
461 nfs_refresh_inode(old_dir, &old_dir_attr);
462 nfs_refresh_inode(new_dir, &new_dir_attr);
463 dprintk("NFS reply rename: %d\n", status);
464 return status;
465}
466
467static int
468nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
469{
470 struct nfs_fattr dir_attr, fattr;
471 struct nfs3_linkargs arg = {
472 .fromfh = NFS_FH(inode),
473 .tofh = NFS_FH(dir),
474 .toname = name->name,
475 .tolen = name->len
476 };
477 struct nfs3_linkres res = {
478 .dir_attr = &dir_attr,
479 .fattr = &fattr
480 };
481 int status;
482
483 dprintk("NFS call link %s\n", name->name);
484 dir_attr.valid = 0;
485 fattr.valid = 0;
486 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
487 nfs_refresh_inode(dir, &dir_attr);
488 nfs_refresh_inode(inode, &fattr);
489 dprintk("NFS reply link: %d\n", status);
490 return status;
491}
492
493static int
494nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
495 struct iattr *sattr, struct nfs_fh *fhandle,
496 struct nfs_fattr *fattr)
497{
498 struct nfs_fattr dir_attr;
499 struct nfs3_symlinkargs arg = {
500 .fromfh = NFS_FH(dir),
501 .fromname = name->name,
502 .fromlen = name->len,
503 .topath = path->name,
504 .tolen = path->len,
505 .sattr = sattr
506 };
507 struct nfs3_diropres res = {
508 .dir_attr = &dir_attr,
509 .fh = fhandle,
510 .fattr = fattr
511 };
512 int status;
513
514 if (path->len > NFS3_MAXPATHLEN)
515 return -ENAMETOOLONG;
516 dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
517 dir_attr.valid = 0;
518 fattr->valid = 0;
519 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
520 nfs_refresh_inode(dir, &dir_attr);
521 dprintk("NFS reply symlink: %d\n", status);
522 return status;
523}
524
525static int
526nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
527{
528 struct nfs_fh fhandle;
529 struct nfs_fattr fattr, dir_attr;
530 struct nfs3_mkdirargs arg = {
531 .fh = NFS_FH(dir),
532 .name = dentry->d_name.name,
533 .len = dentry->d_name.len,
534 .sattr = sattr
535 };
536 struct nfs3_diropres res = {
537 .dir_attr = &dir_attr,
538 .fh = &fhandle,
539 .fattr = &fattr
540 };
541 int status;
542
543 dprintk("NFS call mkdir %s\n", dentry->d_name.name);
544 dir_attr.valid = 0;
545 fattr.valid = 0;
546 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
547 nfs_refresh_inode(dir, &dir_attr);
548 if (status == 0)
549 status = nfs_instantiate(dentry, &fhandle, &fattr);
550 dprintk("NFS reply mkdir: %d\n", status);
551 return status;
552}
553
554static int
555nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
556{
557 struct nfs_fattr dir_attr;
558 struct nfs3_diropargs arg = {
559 .fh = NFS_FH(dir),
560 .name = name->name,
561 .len = name->len
562 };
563 int status;
564
565 dprintk("NFS call rmdir %s\n", name->name);
566 dir_attr.valid = 0;
567 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
568 nfs_refresh_inode(dir, &dir_attr);
569 dprintk("NFS reply rmdir: %d\n", status);
570 return status;
571}
572
573/*
574 * The READDIR implementation is somewhat hackish - we pass the user buffer
575 * to the encode function, which installs it in the receive iovec.
576 * The decode function itself doesn't perform any decoding, it just makes
577 * sure the reply is syntactically correct.
578 *
579 * Also note that this implementation handles both plain readdir and
580 * readdirplus.
581 */
582static int
583nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
584 u64 cookie, struct page *page, unsigned int count, int plus)
585{
586 struct inode *dir = dentry->d_inode;
587 struct nfs_fattr dir_attr;
588 u32 *verf = NFS_COOKIEVERF(dir);
589 struct nfs3_readdirargs arg = {
590 .fh = NFS_FH(dir),
591 .cookie = cookie,
592 .verf = {verf[0], verf[1]},
593 .plus = plus,
594 .count = count,
595 .pages = &page
596 };
597 struct nfs3_readdirres res = {
598 .dir_attr = &dir_attr,
599 .verf = verf,
600 .plus = plus
601 };
602 struct rpc_message msg = {
603 .rpc_proc = &nfs3_procedures[NFS3PROC_READDIR],
604 .rpc_argp = &arg,
605 .rpc_resp = &res,
606 .rpc_cred = cred
607 };
608 int status;
609
610 lock_kernel();
611
612 if (plus)
613 msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
614
615 dprintk("NFS call readdir%s %d\n",
616 plus? "plus" : "", (unsigned int) cookie);
617
618 dir_attr.valid = 0;
619 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
620 nfs_refresh_inode(dir, &dir_attr);
621 dprintk("NFS reply readdir: %d\n", status);
622 unlock_kernel();
623 return status;
624}
625
626static int
627nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
628 dev_t rdev)
629{
630 struct nfs_fh fh;
631 struct nfs_fattr fattr, dir_attr;
632 struct nfs3_mknodargs arg = {
633 .fh = NFS_FH(dir),
634 .name = dentry->d_name.name,
635 .len = dentry->d_name.len,
636 .sattr = sattr,
637 .rdev = rdev
638 };
639 struct nfs3_diropres res = {
640 .dir_attr = &dir_attr,
641 .fh = &fh,
642 .fattr = &fattr
643 };
644 int status;
645
646 switch (sattr->ia_mode & S_IFMT) {
647 case S_IFBLK: arg.type = NF3BLK; break;
648 case S_IFCHR: arg.type = NF3CHR; break;
649 case S_IFIFO: arg.type = NF3FIFO; break;
650 case S_IFSOCK: arg.type = NF3SOCK; break;
651 default: return -EINVAL;
652 }
653
654 dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name,
655 MAJOR(rdev), MINOR(rdev));
656 dir_attr.valid = 0;
657 fattr.valid = 0;
658 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
659 nfs_refresh_inode(dir, &dir_attr);
660 if (status == 0)
661 status = nfs_instantiate(dentry, &fh, &fattr);
662 dprintk("NFS reply mknod: %d\n", status);
663 return status;
664}
665
666static int
667nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
668 struct nfs_fsstat *stat)
669{
670 int status;
671
672 dprintk("NFS call fsstat\n");
673 stat->fattr->valid = 0;
674 status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
675 dprintk("NFS reply statfs: %d\n", status);
676 return status;
677}
678
679static int
680nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
681 struct nfs_fsinfo *info)
682{
683 int status;
684
685 dprintk("NFS call fsinfo\n");
686 info->fattr->valid = 0;
687 status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
688 dprintk("NFS reply fsinfo: %d\n", status);
689 return status;
690}
691
692static int
693nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
694 struct nfs_pathconf *info)
695{
696 int status;
697
698 dprintk("NFS call pathconf\n");
699 info->fattr->valid = 0;
700 status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
701 dprintk("NFS reply pathconf: %d\n", status);
702 return status;
703}
704
705extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
706
707static void
708nfs3_read_done(struct rpc_task *task)
709{
710 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
711
712 if (nfs3_async_handle_jukebox(task))
713 return;
714 /* Call back common NFS readpage processing */
715 if (task->tk_status >= 0)
716 nfs_refresh_inode(data->inode, &data->fattr);
717 nfs_readpage_result(task);
718}
719
720static void
721nfs3_proc_read_setup(struct nfs_read_data *data)
722{
723 struct rpc_task *task = &data->task;
724 struct inode *inode = data->inode;
725 int flags;
726 struct rpc_message msg = {
727 .rpc_proc = &nfs3_procedures[NFS3PROC_READ],
728 .rpc_argp = &data->args,
729 .rpc_resp = &data->res,
730 .rpc_cred = data->cred,
731 };
732
733 /* N.B. Do we need to test? Never called for swapfile inode */
734 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
735
736 /* Finalize the task. */
737 rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags);
738 rpc_call_setup(task, &msg, 0);
739}
740
741static void
742nfs3_write_done(struct rpc_task *task)
743{
744 struct nfs_write_data *data;
745
746 if (nfs3_async_handle_jukebox(task))
747 return;
748 data = (struct nfs_write_data *)task->tk_calldata;
749 if (task->tk_status >= 0)
750 nfs_refresh_inode(data->inode, data->res.fattr);
751 nfs_writeback_done(task);
752}
753
754static void
755nfs3_proc_write_setup(struct nfs_write_data *data, int how)
756{
757 struct rpc_task *task = &data->task;
758 struct inode *inode = data->inode;
759 int stable;
760 int flags;
761 struct rpc_message msg = {
762 .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
763 .rpc_argp = &data->args,
764 .rpc_resp = &data->res,
765 .rpc_cred = data->cred,
766 };
767
768 if (how & FLUSH_STABLE) {
769 if (!NFS_I(inode)->ncommit)
770 stable = NFS_FILE_SYNC;
771 else
772 stable = NFS_DATA_SYNC;
773 } else
774 stable = NFS_UNSTABLE;
775 data->args.stable = stable;
776
777 /* Set the initial flags for the task. */
778 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
779
780 /* Finalize the task. */
781 rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags);
782 rpc_call_setup(task, &msg, 0);
783}
784
785static void
786nfs3_commit_done(struct rpc_task *task)
787{
788 struct nfs_write_data *data;
789
790 if (nfs3_async_handle_jukebox(task))
791 return;
792 data = (struct nfs_write_data *)task->tk_calldata;
793 if (task->tk_status >= 0)
794 nfs_refresh_inode(data->inode, data->res.fattr);
795 nfs_commit_done(task);
796}
797
798static void
799nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
800{
801 struct rpc_task *task = &data->task;
802 struct inode *inode = data->inode;
803 int flags;
804 struct rpc_message msg = {
805 .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
806 .rpc_argp = &data->args,
807 .rpc_resp = &data->res,
808 .rpc_cred = data->cred,
809 };
810
811 /* Set the initial flags for the task. */
812 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
813
814 /* Finalize the task. */
815 rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags);
816 rpc_call_setup(task, &msg, 0);
817}
818
819static int
820nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
821{
822 return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
823}
824
825struct nfs_rpc_ops nfs_v3_clientops = {
826 .version = 3, /* protocol version */
827 .dentry_ops = &nfs_dentry_operations,
828 .dir_inode_ops = &nfs_dir_inode_operations,
829 .getroot = nfs3_proc_get_root,
830 .getattr = nfs3_proc_getattr,
831 .setattr = nfs3_proc_setattr,
832 .lookup = nfs3_proc_lookup,
833 .access = nfs3_proc_access,
834 .readlink = nfs3_proc_readlink,
835 .read = nfs3_proc_read,
836 .write = nfs3_proc_write,
837 .commit = nfs3_proc_commit,
838 .create = nfs3_proc_create,
839 .remove = nfs3_proc_remove,
840 .unlink_setup = nfs3_proc_unlink_setup,
841 .unlink_done = nfs3_proc_unlink_done,
842 .rename = nfs3_proc_rename,
843 .link = nfs3_proc_link,
844 .symlink = nfs3_proc_symlink,
845 .mkdir = nfs3_proc_mkdir,
846 .rmdir = nfs3_proc_rmdir,
847 .readdir = nfs3_proc_readdir,
848 .mknod = nfs3_proc_mknod,
849 .statfs = nfs3_proc_statfs,
850 .fsinfo = nfs3_proc_fsinfo,
851 .pathconf = nfs3_proc_pathconf,
852 .decode_dirent = nfs3_decode_dirent,
853 .read_setup = nfs3_proc_read_setup,
854 .write_setup = nfs3_proc_write_setup,
855 .commit_setup = nfs3_proc_commit_setup,
856 .file_open = nfs_open,
857 .file_release = nfs_release,
858 .lock = nfs3_proc_lock,
859};
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
new file mode 100644
index 000000000000..a3593d47e5ab
--- /dev/null
+++ b/fs/nfs/nfs3xdr.c
@@ -0,0 +1,1023 @@
1/*
2 * linux/fs/nfs/nfs3xdr.c
3 *
4 * XDR functions to encode/decode NFSv3 RPC arguments and results.
5 *
6 * Copyright (C) 1996, 1997 Olaf Kirch
7 */
8
9#include <linux/param.h>
10#include <linux/time.h>
11#include <linux/mm.h>
12#include <linux/slab.h>
13#include <linux/utsname.h>
14#include <linux/errno.h>
15#include <linux/string.h>
16#include <linux/in.h>
17#include <linux/pagemap.h>
18#include <linux/proc_fs.h>
19#include <linux/kdev_t.h>
20#include <linux/sunrpc/clnt.h>
21#include <linux/nfs.h>
22#include <linux/nfs3.h>
23#include <linux/nfs_fs.h>
24
25#define NFSDBG_FACILITY NFSDBG_XDR
26
27/* Mapping from NFS error code to "errno" error code. */
28#define errno_NFSERR_IO EIO
29
30extern int nfs_stat_to_errno(int);
31
32/*
33 * Declare the space requirements for NFS arguments and replies as
34 * number of 32bit-words
35 */
36#define NFS3_fhandle_sz (1+16)
37#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */
38#define NFS3_sattr_sz (15)
39#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2))
40#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2))
41#define NFS3_fattr_sz (21)
42#define NFS3_wcc_attr_sz (6)
43#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz)
44#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz)
45#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz)
46#define NFS3_fsstat_sz
47#define NFS3_fsinfo_sz
48#define NFS3_pathconf_sz
49#define NFS3_entry_sz (NFS3_filename_sz+3)
50
51#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
52#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz)
53#define NFS3_accessargs_sz (NFS3_fh_sz+1)
54#define NFS3_readlinkargs_sz (NFS3_fh_sz)
55#define NFS3_readargs_sz (NFS3_fh_sz+3)
56#define NFS3_writeargs_sz (NFS3_fh_sz+5)
57#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
58#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
59#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz)
60#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz)
61#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz)
62#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz)
63#define NFS3_readdirargs_sz (NFS3_fh_sz+2)
64#define NFS3_commitargs_sz (NFS3_fh_sz+3)
65
66#define NFS3_attrstat_sz (1+NFS3_fattr_sz)
67#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz)
68#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
69#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
70#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
71#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
72#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
73#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
74#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
75#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
76#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
77#define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
78#define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
79#define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
80#define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2)
81
82/*
83 * Map file type to S_IFMT bits
84 */
85static struct {
86 unsigned int mode;
87 unsigned int nfs2type;
88} nfs_type2fmt[] = {
89 { 0, NFNON },
90 { S_IFREG, NFREG },
91 { S_IFDIR, NFDIR },
92 { S_IFBLK, NFBLK },
93 { S_IFCHR, NFCHR },
94 { S_IFLNK, NFLNK },
95 { S_IFSOCK, NFSOCK },
96 { S_IFIFO, NFFIFO },
97 { 0, NFBAD }
98};
99
100/*
101 * Common NFS XDR functions as inlines
102 */
103static inline u32 *
104xdr_encode_fhandle(u32 *p, struct nfs_fh *fh)
105{
106 return xdr_encode_array(p, fh->data, fh->size);
107}
108
109static inline u32 *
110xdr_decode_fhandle(u32 *p, struct nfs_fh *fh)
111{
112 if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) {
113 memcpy(fh->data, p, fh->size);
114 return p + XDR_QUADLEN(fh->size);
115 }
116 return NULL;
117}
118
119/*
120 * Encode/decode time.
121 */
122static inline u32 *
123xdr_encode_time3(u32 *p, struct timespec *timep)
124{
125 *p++ = htonl(timep->tv_sec);
126 *p++ = htonl(timep->tv_nsec);
127 return p;
128}
129
130static inline u32 *
131xdr_decode_time3(u32 *p, struct timespec *timep)
132{
133 timep->tv_sec = ntohl(*p++);
134 timep->tv_nsec = ntohl(*p++);
135 return p;
136}
137
138static u32 *
139xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
140{
141 unsigned int type, major, minor;
142 int fmode;
143
144 type = ntohl(*p++);
145 if (type >= NF3BAD)
146 type = NF3BAD;
147 fmode = nfs_type2fmt[type].mode;
148 fattr->type = nfs_type2fmt[type].nfs2type;
149 fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
150 fattr->nlink = ntohl(*p++);
151 fattr->uid = ntohl(*p++);
152 fattr->gid = ntohl(*p++);
153 p = xdr_decode_hyper(p, &fattr->size);
154 p = xdr_decode_hyper(p, &fattr->du.nfs3.used);
155
156 /* Turn remote device info into Linux-specific dev_t */
157 major = ntohl(*p++);
158 minor = ntohl(*p++);
159 fattr->rdev = MKDEV(major, minor);
160 if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor)
161 fattr->rdev = 0;
162
163 p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3);
164 p = xdr_decode_hyper(p, &fattr->fileid);
165 p = xdr_decode_time3(p, &fattr->atime);
166 p = xdr_decode_time3(p, &fattr->mtime);
167 p = xdr_decode_time3(p, &fattr->ctime);
168
169 /* Update the mode bits */
170 fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
171 fattr->timestamp = jiffies;
172 return p;
173}
174
175static inline u32 *
176xdr_encode_sattr(u32 *p, struct iattr *attr)
177{
178 if (attr->ia_valid & ATTR_MODE) {
179 *p++ = xdr_one;
180 *p++ = htonl(attr->ia_mode);
181 } else {
182 *p++ = xdr_zero;
183 }
184 if (attr->ia_valid & ATTR_UID) {
185 *p++ = xdr_one;
186 *p++ = htonl(attr->ia_uid);
187 } else {
188 *p++ = xdr_zero;
189 }
190 if (attr->ia_valid & ATTR_GID) {
191 *p++ = xdr_one;
192 *p++ = htonl(attr->ia_gid);
193 } else {
194 *p++ = xdr_zero;
195 }
196 if (attr->ia_valid & ATTR_SIZE) {
197 *p++ = xdr_one;
198 p = xdr_encode_hyper(p, (__u64) attr->ia_size);
199 } else {
200 *p++ = xdr_zero;
201 }
202 if (attr->ia_valid & ATTR_ATIME_SET) {
203 *p++ = xdr_two;
204 p = xdr_encode_time3(p, &attr->ia_atime);
205 } else if (attr->ia_valid & ATTR_ATIME) {
206 *p++ = xdr_one;
207 } else {
208 *p++ = xdr_zero;
209 }
210 if (attr->ia_valid & ATTR_MTIME_SET) {
211 *p++ = xdr_two;
212 p = xdr_encode_time3(p, &attr->ia_mtime);
213 } else if (attr->ia_valid & ATTR_MTIME) {
214 *p++ = xdr_one;
215 } else {
216 *p++ = xdr_zero;
217 }
218 return p;
219}
220
221static inline u32 *
222xdr_decode_wcc_attr(u32 *p, struct nfs_fattr *fattr)
223{
224 p = xdr_decode_hyper(p, &fattr->pre_size);
225 p = xdr_decode_time3(p, &fattr->pre_mtime);
226 p = xdr_decode_time3(p, &fattr->pre_ctime);
227 fattr->valid |= NFS_ATTR_WCC;
228 return p;
229}
230
231static inline u32 *
232xdr_decode_post_op_attr(u32 *p, struct nfs_fattr *fattr)
233{
234 if (*p++)
235 p = xdr_decode_fattr(p, fattr);
236 return p;
237}
238
239static inline u32 *
240xdr_decode_pre_op_attr(u32 *p, struct nfs_fattr *fattr)
241{
242 if (*p++)
243 return xdr_decode_wcc_attr(p, fattr);
244 return p;
245}
246
247
248static inline u32 *
249xdr_decode_wcc_data(u32 *p, struct nfs_fattr *fattr)
250{
251 p = xdr_decode_pre_op_attr(p, fattr);
252 return xdr_decode_post_op_attr(p, fattr);
253}
254
255/*
256 * NFS encode functions
257 */
258
259/*
260 * Encode file handle argument
261 */
262static int
263nfs3_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
264{
265 p = xdr_encode_fhandle(p, fh);
266 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
267 return 0;
268}
269
270/*
271 * Encode SETATTR arguments
272 */
273static int
274nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args)
275{
276 p = xdr_encode_fhandle(p, args->fh);
277 p = xdr_encode_sattr(p, args->sattr);
278 *p++ = htonl(args->guard);
279 if (args->guard)
280 p = xdr_encode_time3(p, &args->guardtime);
281 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
282 return 0;
283}
284
285/*
286 * Encode directory ops argument
287 */
288static int
289nfs3_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs3_diropargs *args)
290{
291 p = xdr_encode_fhandle(p, args->fh);
292 p = xdr_encode_array(p, args->name, args->len);
293 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
294 return 0;
295}
296
297/*
298 * Encode access() argument
299 */
300static int
301nfs3_xdr_accessargs(struct rpc_rqst *req, u32 *p, struct nfs3_accessargs *args)
302{
303 p = xdr_encode_fhandle(p, args->fh);
304 *p++ = htonl(args->access);
305 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
306 return 0;
307}
308
309/*
310 * Arguments to a READ call. Since we read data directly into the page
311 * cache, we also set up the reply iovec here so that iov[1] points
312 * exactly to the page we want to fetch.
313 */
314static int
315nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
316{
317 struct rpc_auth *auth = req->rq_task->tk_auth;
318 unsigned int replen;
319 u32 count = args->count;
320
321 p = xdr_encode_fhandle(p, args->fh);
322 p = xdr_encode_hyper(p, args->offset);
323 *p++ = htonl(count);
324 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
325
326 /* Inline the page array */
327 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2;
328 xdr_inline_pages(&req->rq_rcv_buf, replen,
329 args->pages, args->pgbase, count);
330 return 0;
331}
332
333/*
334 * Write arguments. Splice the buffer to be written into the iovec.
335 */
336static int
337nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
338{
339 struct xdr_buf *sndbuf = &req->rq_snd_buf;
340 u32 count = args->count;
341
342 p = xdr_encode_fhandle(p, args->fh);
343 p = xdr_encode_hyper(p, args->offset);
344 *p++ = htonl(count);
345 *p++ = htonl(args->stable);
346 *p++ = htonl(count);
347 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
348
349 /* Copy the page array */
350 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
351 return 0;
352}
353
354/*
355 * Encode CREATE arguments
356 */
357static int
358nfs3_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs3_createargs *args)
359{
360 p = xdr_encode_fhandle(p, args->fh);
361 p = xdr_encode_array(p, args->name, args->len);
362
363 *p++ = htonl(args->createmode);
364 if (args->createmode == NFS3_CREATE_EXCLUSIVE) {
365 *p++ = args->verifier[0];
366 *p++ = args->verifier[1];
367 } else
368 p = xdr_encode_sattr(p, args->sattr);
369
370 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
371 return 0;
372}
373
374/*
375 * Encode MKDIR arguments
376 */
377static int
378nfs3_xdr_mkdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_mkdirargs *args)
379{
380 p = xdr_encode_fhandle(p, args->fh);
381 p = xdr_encode_array(p, args->name, args->len);
382 p = xdr_encode_sattr(p, args->sattr);
383 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
384 return 0;
385}
386
387/*
388 * Encode SYMLINK arguments
389 */
390static int
391nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args)
392{
393 p = xdr_encode_fhandle(p, args->fromfh);
394 p = xdr_encode_array(p, args->fromname, args->fromlen);
395 p = xdr_encode_sattr(p, args->sattr);
396 p = xdr_encode_array(p, args->topath, args->tolen);
397 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
398 return 0;
399}
400
401/*
402 * Encode MKNOD arguments
403 */
404static int
405nfs3_xdr_mknodargs(struct rpc_rqst *req, u32 *p, struct nfs3_mknodargs *args)
406{
407 p = xdr_encode_fhandle(p, args->fh);
408 p = xdr_encode_array(p, args->name, args->len);
409 *p++ = htonl(args->type);
410 p = xdr_encode_sattr(p, args->sattr);
411 if (args->type == NF3CHR || args->type == NF3BLK) {
412 *p++ = htonl(MAJOR(args->rdev));
413 *p++ = htonl(MINOR(args->rdev));
414 }
415
416 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
417 return 0;
418}
419
420/*
421 * Encode RENAME arguments
422 */
423static int
424nfs3_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs3_renameargs *args)
425{
426 p = xdr_encode_fhandle(p, args->fromfh);
427 p = xdr_encode_array(p, args->fromname, args->fromlen);
428 p = xdr_encode_fhandle(p, args->tofh);
429 p = xdr_encode_array(p, args->toname, args->tolen);
430 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
431 return 0;
432}
433
434/*
435 * Encode LINK arguments
436 */
437static int
438nfs3_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs3_linkargs *args)
439{
440 p = xdr_encode_fhandle(p, args->fromfh);
441 p = xdr_encode_fhandle(p, args->tofh);
442 p = xdr_encode_array(p, args->toname, args->tolen);
443 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
444 return 0;
445}
446
447/*
448 * Encode arguments to readdir call
449 */
450static int
451nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args)
452{
453 struct rpc_auth *auth = req->rq_task->tk_auth;
454 unsigned int replen;
455 u32 count = args->count;
456
457 p = xdr_encode_fhandle(p, args->fh);
458 p = xdr_encode_hyper(p, args->cookie);
459 *p++ = args->verf[0];
460 *p++ = args->verf[1];
461 if (args->plus) {
462 /* readdirplus: need dircount + buffer size.
463 * We just make sure we make dircount big enough */
464 *p++ = htonl(count >> 3);
465 }
466 *p++ = htonl(count);
467 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
468
469 /* Inline the page array */
470 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2;
471 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
472 return 0;
473}
474
475/*
476 * Decode the result of a readdir call.
477 * We just check for syntactical correctness.
478 */
479static int
480nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res)
481{
482 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
483 struct kvec *iov = rcvbuf->head;
484 struct page **page;
485 int hdrlen, recvd;
486 int status, nr;
487 unsigned int len, pglen;
488 u32 *entry, *end, *kaddr;
489
490 status = ntohl(*p++);
491 /* Decode post_op_attrs */
492 p = xdr_decode_post_op_attr(p, res->dir_attr);
493 if (status)
494 return -nfs_stat_to_errno(status);
495 /* Decode verifier cookie */
496 if (res->verf) {
497 res->verf[0] = *p++;
498 res->verf[1] = *p++;
499 } else {
500 p += 2;
501 }
502
503 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
504 if (iov->iov_len < hdrlen) {
505 printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
506 "length %d > %Zu\n", hdrlen, iov->iov_len);
507 return -errno_NFSERR_IO;
508 } else if (iov->iov_len != hdrlen) {
509 dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
510 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
511 }
512
513 pglen = rcvbuf->page_len;
514 recvd = rcvbuf->len - hdrlen;
515 if (pglen > recvd)
516 pglen = recvd;
517 page = rcvbuf->pages;
518 kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
519 end = (u32 *)((char *)p + pglen);
520 entry = p;
521 for (nr = 0; *p++; nr++) {
522 if (p + 3 > end)
523 goto short_pkt;
524 p += 2; /* inode # */
525 len = ntohl(*p++); /* string length */
526 p += XDR_QUADLEN(len) + 2; /* name + cookie */
527 if (len > NFS3_MAXNAMLEN) {
528 printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n",
529 len);
530 goto err_unmap;
531 }
532
533 if (res->plus) {
534 /* post_op_attr */
535 if (p + 2 > end)
536 goto short_pkt;
537 if (*p++) {
538 p += 21;
539 if (p + 1 > end)
540 goto short_pkt;
541 }
542 /* post_op_fh3 */
543 if (*p++) {
544 if (p + 1 > end)
545 goto short_pkt;
546 len = ntohl(*p++);
547 if (len > NFS3_FHSIZE) {
548 printk(KERN_WARNING "NFS: giant filehandle in "
549 "readdir (len %x)!\n", len);
550 goto err_unmap;
551 }
552 p += XDR_QUADLEN(len);
553 }
554 }
555
556 if (p + 2 > end)
557 goto short_pkt;
558 entry = p;
559 }
560 if (!nr && (entry[0] != 0 || entry[1] == 0))
561 goto short_pkt;
562 out:
563 kunmap_atomic(kaddr, KM_USER0);
564 return nr;
565 short_pkt:
566 entry[0] = entry[1] = 0;
567 /* truncate listing ? */
568 if (!nr) {
569 printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
570 entry[1] = 1;
571 }
572 goto out;
573err_unmap:
574 nr = -errno_NFSERR_IO;
575 goto out;
576}
577
578u32 *
579nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
580{
581 struct nfs_entry old = *entry;
582
583 if (!*p++) {
584 if (!*p)
585 return ERR_PTR(-EAGAIN);
586 entry->eof = 1;
587 return ERR_PTR(-EBADCOOKIE);
588 }
589
590 p = xdr_decode_hyper(p, &entry->ino);
591 entry->len = ntohl(*p++);
592 entry->name = (const char *) p;
593 p += XDR_QUADLEN(entry->len);
594 entry->prev_cookie = entry->cookie;
595 p = xdr_decode_hyper(p, &entry->cookie);
596
597 if (plus) {
598 entry->fattr->valid = 0;
599 p = xdr_decode_post_op_attr(p, entry->fattr);
600 /* In fact, a post_op_fh3: */
601 if (*p++) {
602 p = xdr_decode_fhandle(p, entry->fh);
603 /* Ugh -- server reply was truncated */
604 if (p == NULL) {
605 dprintk("NFS: FH truncated\n");
606 *entry = old;
607 return ERR_PTR(-EAGAIN);
608 }
609 } else
610 memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
611 }
612
613 entry->eof = !p[0] && p[1];
614 return p;
615}
616
617/*
618 * Encode COMMIT arguments
619 */
620static int
621nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
622{
623 p = xdr_encode_fhandle(p, args->fh);
624 p = xdr_encode_hyper(p, args->offset);
625 *p++ = htonl(args->count);
626 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
627 return 0;
628}
629
630/*
631 * NFS XDR decode functions
632 */
633
634/*
635 * Decode attrstat reply.
636 */
637static int
638nfs3_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
639{
640 int status;
641
642 if ((status = ntohl(*p++)))
643 return -nfs_stat_to_errno(status);
644 xdr_decode_fattr(p, fattr);
645 return 0;
646}
647
648/*
649 * Decode status+wcc_data reply
650 * SATTR, REMOVE, RMDIR
651 */
652static int
653nfs3_xdr_wccstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
654{
655 int status;
656
657 if ((status = ntohl(*p++)))
658 status = -nfs_stat_to_errno(status);
659 xdr_decode_wcc_data(p, fattr);
660 return status;
661}
662
663/*
664 * Decode LOOKUP reply
665 */
666static int
667nfs3_xdr_lookupres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
668{
669 int status;
670
671 if ((status = ntohl(*p++))) {
672 status = -nfs_stat_to_errno(status);
673 } else {
674 if (!(p = xdr_decode_fhandle(p, res->fh)))
675 return -errno_NFSERR_IO;
676 p = xdr_decode_post_op_attr(p, res->fattr);
677 }
678 xdr_decode_post_op_attr(p, res->dir_attr);
679 return status;
680}
681
682/*
683 * Decode ACCESS reply
684 */
685static int
686nfs3_xdr_accessres(struct rpc_rqst *req, u32 *p, struct nfs3_accessres *res)
687{
688 int status = ntohl(*p++);
689
690 p = xdr_decode_post_op_attr(p, res->fattr);
691 if (status)
692 return -nfs_stat_to_errno(status);
693 res->access = ntohl(*p++);
694 return 0;
695}
696
697static int
698nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *args)
699{
700 struct rpc_auth *auth = req->rq_task->tk_auth;
701 unsigned int replen;
702
703 p = xdr_encode_fhandle(p, args->fh);
704 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
705
706 /* Inline the page array */
707 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2;
708 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
709 return 0;
710}
711
712/*
713 * Decode READLINK reply
714 */
715static int
716nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
717{
718 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
719 struct kvec *iov = rcvbuf->head;
720 int hdrlen, len, recvd;
721 char *kaddr;
722 int status;
723
724 status = ntohl(*p++);
725 p = xdr_decode_post_op_attr(p, fattr);
726
727 if (status != 0)
728 return -nfs_stat_to_errno(status);
729
730 /* Convert length of symlink */
731 len = ntohl(*p++);
732 if (len >= rcvbuf->page_len || len <= 0) {
733 dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
734 return -ENAMETOOLONG;
735 }
736
737 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
738 if (iov->iov_len < hdrlen) {
739 printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
740 "length %d > %Zu\n", hdrlen, iov->iov_len);
741 return -errno_NFSERR_IO;
742 } else if (iov->iov_len != hdrlen) {
743 dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
744 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
745 }
746 recvd = req->rq_rcv_buf.len - hdrlen;
747 if (recvd < len) {
748 printk(KERN_WARNING "NFS: server cheating in readlink reply: "
749 "count %u > recvd %u\n", len, recvd);
750 return -EIO;
751 }
752
753 /* NULL terminate the string we got */
754 kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0);
755 kaddr[len+rcvbuf->page_base] = '\0';
756 kunmap_atomic(kaddr, KM_USER0);
757 return 0;
758}
759
760/*
761 * Decode READ reply
762 */
763static int
764nfs3_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
765{
766 struct kvec *iov = req->rq_rcv_buf.head;
767 int status, count, ocount, recvd, hdrlen;
768
769 status = ntohl(*p++);
770 p = xdr_decode_post_op_attr(p, res->fattr);
771
772 if (status != 0)
773 return -nfs_stat_to_errno(status);
774
775 /* Decode reply could and EOF flag. NFSv3 is somewhat redundant
776 * in that it puts the count both in the res struct and in the
777 * opaque data count. */
778 count = ntohl(*p++);
779 res->eof = ntohl(*p++);
780 ocount = ntohl(*p++);
781
782 if (ocount != count) {
783 printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n");
784 return -errno_NFSERR_IO;
785 }
786
787 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
788 if (iov->iov_len < hdrlen) {
789 printk(KERN_WARNING "NFS: READ reply header overflowed:"
790 "length %d > %Zu\n", hdrlen, iov->iov_len);
791 return -errno_NFSERR_IO;
792 } else if (iov->iov_len != hdrlen) {
793 dprintk("NFS: READ header is short. iovec will be shifted.\n");
794 xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
795 }
796
797 recvd = req->rq_rcv_buf.len - hdrlen;
798 if (count > recvd) {
799 printk(KERN_WARNING "NFS: server cheating in read reply: "
800 "count %d > recvd %d\n", count, recvd);
801 count = recvd;
802 res->eof = 0;
803 }
804
805 if (count < res->count)
806 res->count = count;
807
808 return count;
809}
810
811/*
812 * Decode WRITE response
813 */
814static int
815nfs3_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
816{
817 int status;
818
819 status = ntohl(*p++);
820 p = xdr_decode_wcc_data(p, res->fattr);
821
822 if (status != 0)
823 return -nfs_stat_to_errno(status);
824
825 res->count = ntohl(*p++);
826 res->verf->committed = (enum nfs3_stable_how)ntohl(*p++);
827 res->verf->verifier[0] = *p++;
828 res->verf->verifier[1] = *p++;
829
830 return res->count;
831}
832
833/*
834 * Decode a CREATE response
835 */
836static int
837nfs3_xdr_createres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
838{
839 int status;
840
841 status = ntohl(*p++);
842 if (status == 0) {
843 if (*p++) {
844 if (!(p = xdr_decode_fhandle(p, res->fh)))
845 return -errno_NFSERR_IO;
846 p = xdr_decode_post_op_attr(p, res->fattr);
847 } else {
848 memset(res->fh, 0, sizeof(*res->fh));
849 /* Do decode post_op_attr but set it to NULL */
850 p = xdr_decode_post_op_attr(p, res->fattr);
851 res->fattr->valid = 0;
852 }
853 } else {
854 status = -nfs_stat_to_errno(status);
855 }
856 p = xdr_decode_wcc_data(p, res->dir_attr);
857 return status;
858}
859
860/*
861 * Decode RENAME reply
862 */
863static int
864nfs3_xdr_renameres(struct rpc_rqst *req, u32 *p, struct nfs3_renameres *res)
865{
866 int status;
867
868 if ((status = ntohl(*p++)) != 0)
869 status = -nfs_stat_to_errno(status);
870 p = xdr_decode_wcc_data(p, res->fromattr);
871 p = xdr_decode_wcc_data(p, res->toattr);
872 return status;
873}
874
875/*
876 * Decode LINK reply
877 */
878static int
879nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res)
880{
881 int status;
882
883 if ((status = ntohl(*p++)) != 0)
884 status = -nfs_stat_to_errno(status);
885 p = xdr_decode_post_op_attr(p, res->fattr);
886 p = xdr_decode_wcc_data(p, res->dir_attr);
887 return status;
888}
889
890/*
891 * Decode FSSTAT reply
892 */
893static int
894nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res)
895{
896 int status;
897
898 status = ntohl(*p++);
899
900 p = xdr_decode_post_op_attr(p, res->fattr);
901 if (status != 0)
902 return -nfs_stat_to_errno(status);
903
904 p = xdr_decode_hyper(p, &res->tbytes);
905 p = xdr_decode_hyper(p, &res->fbytes);
906 p = xdr_decode_hyper(p, &res->abytes);
907 p = xdr_decode_hyper(p, &res->tfiles);
908 p = xdr_decode_hyper(p, &res->ffiles);
909 p = xdr_decode_hyper(p, &res->afiles);
910
911 /* ignore invarsec */
912 return 0;
913}
914
915/*
916 * Decode FSINFO reply
917 */
918static int
919nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
920{
921 int status;
922
923 status = ntohl(*p++);
924
925 p = xdr_decode_post_op_attr(p, res->fattr);
926 if (status != 0)
927 return -nfs_stat_to_errno(status);
928
929 res->rtmax = ntohl(*p++);
930 res->rtpref = ntohl(*p++);
931 res->rtmult = ntohl(*p++);
932 res->wtmax = ntohl(*p++);
933 res->wtpref = ntohl(*p++);
934 res->wtmult = ntohl(*p++);
935 res->dtpref = ntohl(*p++);
936 p = xdr_decode_hyper(p, &res->maxfilesize);
937
938 /* ignore time_delta and properties */
939 res->lease_time = 0;
940 return 0;
941}
942
943/*
944 * Decode PATHCONF reply
945 */
946static int
947nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res)
948{
949 int status;
950
951 status = ntohl(*p++);
952
953 p = xdr_decode_post_op_attr(p, res->fattr);
954 if (status != 0)
955 return -nfs_stat_to_errno(status);
956 res->max_link = ntohl(*p++);
957 res->max_namelen = ntohl(*p++);
958
959 /* ignore remaining fields */
960 return 0;
961}
962
963/*
964 * Decode COMMIT reply
965 */
966static int
967nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
968{
969 int status;
970
971 status = ntohl(*p++);
972 p = xdr_decode_wcc_data(p, res->fattr);
973 if (status != 0)
974 return -nfs_stat_to_errno(status);
975
976 res->verf->verifier[0] = *p++;
977 res->verf->verifier[1] = *p++;
978 return 0;
979}
980
981#ifndef MAX
982# define MAX(a, b) (((a) > (b))? (a) : (b))
983#endif
984
985#define PROC(proc, argtype, restype, timer) \
986[NFS3PROC_##proc] = { \
987 .p_proc = NFS3PROC_##proc, \
988 .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
989 .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
990 .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
991 .p_timer = timer \
992 }
993
994struct rpc_procinfo nfs3_procedures[] = {
995 PROC(GETATTR, fhandle, attrstat, 1),
996 PROC(SETATTR, sattrargs, wccstat, 0),
997 PROC(LOOKUP, diropargs, lookupres, 2),
998 PROC(ACCESS, accessargs, accessres, 1),
999 PROC(READLINK, readlinkargs, readlinkres, 3),
1000 PROC(READ, readargs, readres, 3),
1001 PROC(WRITE, writeargs, writeres, 4),
1002 PROC(CREATE, createargs, createres, 0),
1003 PROC(MKDIR, mkdirargs, createres, 0),
1004 PROC(SYMLINK, symlinkargs, createres, 0),
1005 PROC(MKNOD, mknodargs, createres, 0),
1006 PROC(REMOVE, diropargs, wccstat, 0),
1007 PROC(RMDIR, diropargs, wccstat, 0),
1008 PROC(RENAME, renameargs, renameres, 0),
1009 PROC(LINK, linkargs, linkres, 0),
1010 PROC(READDIR, readdirargs, readdirres, 3),
1011 PROC(READDIRPLUS, readdirargs, readdirres, 3),
1012 PROC(FSSTAT, fhandle, fsstatres, 0),
1013 PROC(FSINFO, fhandle, fsinfores, 0),
1014 PROC(PATHCONF, fhandle, pathconfres, 0),
1015 PROC(COMMIT, commitargs, commitres, 5),
1016};
1017
1018struct rpc_version nfs_version3 = {
1019 .number = 3,
1020 .nrprocs = sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]),
1021 .procs = nfs3_procedures
1022};
1023
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
new file mode 100644
index 000000000000..1d5cb3e80c3e
--- /dev/null
+++ b/fs/nfs/nfs4proc.c
@@ -0,0 +1,2786 @@
1/*
2 * fs/nfs/nfs4proc.c
3 *
4 * Client-side procedure declarations for NFSv4.
5 *
6 * Copyright (c) 2002 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Kendrick Smith <kmsmith@umich.edu>
10 * Andy Adamson <andros@umich.edu>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
27 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <linux/mm.h>
39#include <linux/utsname.h>
40#include <linux/delay.h>
41#include <linux/errno.h>
42#include <linux/string.h>
43#include <linux/sunrpc/clnt.h>
44#include <linux/nfs.h>
45#include <linux/nfs4.h>
46#include <linux/nfs_fs.h>
47#include <linux/nfs_page.h>
48#include <linux/smp_lock.h>
49#include <linux/namei.h>
50
51#include "delegation.h"
52
53#define NFSDBG_FACILITY NFSDBG_PROC
54
55#define NFS4_POLL_RETRY_MIN (1*HZ)
56#define NFS4_POLL_RETRY_MAX (15*HZ)
57
58static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
59static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
60static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
61static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
62extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
63extern struct rpc_procinfo nfs4_procedures[];
64
65extern nfs4_stateid zero_stateid;
66
67/* Prevent leaks of NFSv4 errors into userland */
68int nfs4_map_errors(int err)
69{
70 if (err < -1000) {
71 dprintk("%s could not handle NFSv4 error %d\n",
72 __FUNCTION__, -err);
73 return -EIO;
74 }
75 return err;
76}
77
78/*
79 * This is our standard bitmap for GETATTR requests.
80 */
81const u32 nfs4_fattr_bitmap[2] = {
82 FATTR4_WORD0_TYPE
83 | FATTR4_WORD0_CHANGE
84 | FATTR4_WORD0_SIZE
85 | FATTR4_WORD0_FSID
86 | FATTR4_WORD0_FILEID,
87 FATTR4_WORD1_MODE
88 | FATTR4_WORD1_NUMLINKS
89 | FATTR4_WORD1_OWNER
90 | FATTR4_WORD1_OWNER_GROUP
91 | FATTR4_WORD1_RAWDEV
92 | FATTR4_WORD1_SPACE_USED
93 | FATTR4_WORD1_TIME_ACCESS
94 | FATTR4_WORD1_TIME_METADATA
95 | FATTR4_WORD1_TIME_MODIFY
96};
97
98const u32 nfs4_statfs_bitmap[2] = {
99 FATTR4_WORD0_FILES_AVAIL
100 | FATTR4_WORD0_FILES_FREE
101 | FATTR4_WORD0_FILES_TOTAL,
102 FATTR4_WORD1_SPACE_AVAIL
103 | FATTR4_WORD1_SPACE_FREE
104 | FATTR4_WORD1_SPACE_TOTAL
105};
106
107u32 nfs4_pathconf_bitmap[2] = {
108 FATTR4_WORD0_MAXLINK
109 | FATTR4_WORD0_MAXNAME,
110 0
111};
112
113const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
114 | FATTR4_WORD0_MAXREAD
115 | FATTR4_WORD0_MAXWRITE
116 | FATTR4_WORD0_LEASE_TIME,
117 0
118};
119
120static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
121 struct nfs4_readdir_arg *readdir)
122{
123 u32 *start, *p;
124
125 BUG_ON(readdir->count < 80);
126 if (cookie > 2) {
127 readdir->cookie = (cookie > 2) ? cookie : 0;
128 memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier));
129 return;
130 }
131
132 readdir->cookie = 0;
133 memset(&readdir->verifier, 0, sizeof(readdir->verifier));
134 if (cookie == 2)
135 return;
136
137 /*
138 * NFSv4 servers do not return entries for '.' and '..'
139 * Therefore, we fake these entries here. We let '.'
140 * have cookie 0 and '..' have cookie 1. Note that
141 * when talking to the server, we always send cookie 0
142 * instead of 1 or 2.
143 */
144 start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0);
145
146 if (cookie == 0) {
147 *p++ = xdr_one; /* next */
148 *p++ = xdr_zero; /* cookie, first word */
149 *p++ = xdr_one; /* cookie, second word */
150 *p++ = xdr_one; /* entry len */
151 memcpy(p, ".\0\0\0", 4); /* entry */
152 p++;
153 *p++ = xdr_one; /* bitmap length */
154 *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
155 *p++ = htonl(8); /* attribute buffer length */
156 p = xdr_encode_hyper(p, dentry->d_inode->i_ino);
157 }
158
159 *p++ = xdr_one; /* next */
160 *p++ = xdr_zero; /* cookie, first word */
161 *p++ = xdr_two; /* cookie, second word */
162 *p++ = xdr_two; /* entry len */
163 memcpy(p, "..\0\0", 4); /* entry */
164 p++;
165 *p++ = xdr_one; /* bitmap length */
166 *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
167 *p++ = htonl(8); /* attribute buffer length */
168 p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino);
169
170 readdir->pgbase = (char *)p - (char *)start;
171 readdir->count -= readdir->pgbase;
172 kunmap_atomic(start, KM_USER0);
173}
174
175static void
176renew_lease(struct nfs_server *server, unsigned long timestamp)
177{
178 struct nfs4_client *clp = server->nfs4_state;
179 spin_lock(&clp->cl_lock);
180 if (time_before(clp->cl_last_renewal,timestamp))
181 clp->cl_last_renewal = timestamp;
182 spin_unlock(&clp->cl_lock);
183}
184
185static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinfo)
186{
187 struct nfs_inode *nfsi = NFS_I(inode);
188
189 if (cinfo->before == nfsi->change_attr && cinfo->atomic)
190 nfsi->change_attr = cinfo->after;
191}
192
193static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
194{
195 struct inode *inode = state->inode;
196
197 open_flags &= (FMODE_READ|FMODE_WRITE);
198 /* Protect against nfs4_find_state() */
199 spin_lock(&inode->i_lock);
200 state->state |= open_flags;
201 /* NB! List reordering - see the reclaim code for why. */
202 if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++)
203 list_move(&state->open_states, &state->owner->so_states);
204 if (open_flags & FMODE_READ)
205 state->nreaders++;
206 memcpy(&state->stateid, stateid, sizeof(state->stateid));
207 spin_unlock(&inode->i_lock);
208}
209
210/*
211 * OPEN_RECLAIM:
212 * reclaim state on the server after a reboot.
213 * Assumes caller is holding the sp->so_sem
214 */
215static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
216{
217 struct inode *inode = state->inode;
218 struct nfs_server *server = NFS_SERVER(inode);
219 struct nfs_delegation *delegation = NFS_I(inode)->delegation;
220 struct nfs_openargs o_arg = {
221 .fh = NFS_FH(inode),
222 .seqid = sp->so_seqid,
223 .id = sp->so_id,
224 .open_flags = state->state,
225 .clientid = server->nfs4_state->cl_clientid,
226 .claim = NFS4_OPEN_CLAIM_PREVIOUS,
227 .bitmask = server->attr_bitmask,
228 };
229 struct nfs_openres o_res = {
230 .server = server, /* Grrr */
231 };
232 struct rpc_message msg = {
233 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR],
234 .rpc_argp = &o_arg,
235 .rpc_resp = &o_res,
236 .rpc_cred = sp->so_cred,
237 };
238 int status;
239
240 if (delegation != NULL) {
241 if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
242 memcpy(&state->stateid, &delegation->stateid,
243 sizeof(state->stateid));
244 set_bit(NFS_DELEGATED_STATE, &state->flags);
245 return 0;
246 }
247 o_arg.u.delegation_type = delegation->type;
248 }
249 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
250 nfs4_increment_seqid(status, sp);
251 if (status == 0) {
252 memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
253 if (o_res.delegation_type != 0) {
254 nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
255 /* Did the server issue an immediate delegation recall? */
256 if (o_res.do_recall)
257 nfs_async_inode_return_delegation(inode, &o_res.stateid);
258 }
259 }
260 clear_bit(NFS_DELEGATED_STATE, &state->flags);
261 /* Ensure we update the inode attributes */
262 NFS_CACHEINV(inode);
263 return status;
264}
265
266static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
267{
268 struct nfs_server *server = NFS_SERVER(state->inode);
269 struct nfs4_exception exception = { };
270 int err;
271 do {
272 err = _nfs4_open_reclaim(sp, state);
273 switch (err) {
274 case 0:
275 case -NFS4ERR_STALE_CLIENTID:
276 case -NFS4ERR_STALE_STATEID:
277 case -NFS4ERR_EXPIRED:
278 return err;
279 }
280 err = nfs4_handle_exception(server, err, &exception);
281 } while (exception.retry);
282 return err;
283}
284
285static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
286{
287 struct nfs4_state_owner *sp = state->owner;
288 struct inode *inode = dentry->d_inode;
289 struct nfs_server *server = NFS_SERVER(inode);
290 struct dentry *parent = dget_parent(dentry);
291 struct nfs_openargs arg = {
292 .fh = NFS_FH(parent->d_inode),
293 .clientid = server->nfs4_state->cl_clientid,
294 .name = &dentry->d_name,
295 .id = sp->so_id,
296 .server = server,
297 .bitmask = server->attr_bitmask,
298 .claim = NFS4_OPEN_CLAIM_DELEGATE_CUR,
299 };
300 struct nfs_openres res = {
301 .server = server,
302 };
303 struct rpc_message msg = {
304 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR],
305 .rpc_argp = &arg,
306 .rpc_resp = &res,
307 .rpc_cred = sp->so_cred,
308 };
309 int status = 0;
310
311 down(&sp->so_sema);
312 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
313 goto out;
314 if (state->state == 0)
315 goto out;
316 arg.seqid = sp->so_seqid;
317 arg.open_flags = state->state;
318 memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data));
319 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
320 nfs4_increment_seqid(status, sp);
321 if (status >= 0) {
322 memcpy(state->stateid.data, res.stateid.data,
323 sizeof(state->stateid.data));
324 clear_bit(NFS_DELEGATED_STATE, &state->flags);
325 }
326out:
327 up(&sp->so_sema);
328 dput(parent);
329 return status;
330}
331
332int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
333{
334 struct nfs4_exception exception = { };
335 struct nfs_server *server = NFS_SERVER(dentry->d_inode);
336 int err;
337 do {
338 err = _nfs4_open_delegation_recall(dentry, state);
339 switch (err) {
340 case 0:
341 return err;
342 case -NFS4ERR_STALE_CLIENTID:
343 case -NFS4ERR_STALE_STATEID:
344 case -NFS4ERR_EXPIRED:
345 /* Don't recall a delegation if it was lost */
346 nfs4_schedule_state_recovery(server->nfs4_state);
347 return err;
348 }
349 err = nfs4_handle_exception(server, err, &exception);
350 } while (exception.retry);
351 return err;
352}
353
354static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid)
355{
356 struct nfs_open_confirmargs arg = {
357 .fh = fh,
358 .seqid = sp->so_seqid,
359 .stateid = *stateid,
360 };
361 struct nfs_open_confirmres res;
362 struct rpc_message msg = {
363 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
364 .rpc_argp = &arg,
365 .rpc_resp = &res,
366 .rpc_cred = sp->so_cred,
367 };
368 int status;
369
370 status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR);
371 nfs4_increment_seqid(status, sp);
372 if (status >= 0)
373 memcpy(stateid, &res.stateid, sizeof(*stateid));
374 return status;
375}
376
377static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, struct nfs_openargs *o_arg, struct nfs_openres *o_res)
378{
379 struct nfs_server *server = NFS_SERVER(dir);
380 struct rpc_message msg = {
381 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
382 .rpc_argp = o_arg,
383 .rpc_resp = o_res,
384 .rpc_cred = sp->so_cred,
385 };
386 int status;
387
388 /* Update sequence id. The caller must serialize! */
389 o_arg->seqid = sp->so_seqid;
390 o_arg->id = sp->so_id;
391 o_arg->clientid = sp->so_client->cl_clientid;
392
393 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
394 nfs4_increment_seqid(status, sp);
395 if (status != 0)
396 goto out;
397 update_changeattr(dir, &o_res->cinfo);
398 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
399 status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
400 sp, &o_res->stateid);
401 if (status != 0)
402 goto out;
403 }
404 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
405 status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
406out:
407 return status;
408}
409
410static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags)
411{
412 struct nfs_access_entry cache;
413 int mask = 0;
414 int status;
415
416 if (openflags & FMODE_READ)
417 mask |= MAY_READ;
418 if (openflags & FMODE_WRITE)
419 mask |= MAY_WRITE;
420 status = nfs_access_get_cached(inode, cred, &cache);
421 if (status == 0)
422 goto out;
423
424 /* Be clever: ask server to check for all possible rights */
425 cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
426 cache.cred = cred;
427 cache.jiffies = jiffies;
428 status = _nfs4_proc_access(inode, &cache);
429 if (status != 0)
430 return status;
431 nfs_access_add_cache(inode, &cache);
432out:
433 if ((cache.mask & mask) == mask)
434 return 0;
435 return -EACCES;
436}
437
438/*
439 * OPEN_EXPIRED:
440 * reclaim state on the server after a network partition.
441 * Assumes caller holds the appropriate lock
442 */
443static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
444{
445 struct dentry *parent = dget_parent(dentry);
446 struct inode *dir = parent->d_inode;
447 struct inode *inode = state->inode;
448 struct nfs_server *server = NFS_SERVER(dir);
449 struct nfs_delegation *delegation = NFS_I(inode)->delegation;
450 struct nfs_fattr f_attr = {
451 .valid = 0,
452 };
453 struct nfs_openargs o_arg = {
454 .fh = NFS_FH(dir),
455 .open_flags = state->state,
456 .name = &dentry->d_name,
457 .bitmask = server->attr_bitmask,
458 .claim = NFS4_OPEN_CLAIM_NULL,
459 };
460 struct nfs_openres o_res = {
461 .f_attr = &f_attr,
462 .server = server,
463 };
464 int status = 0;
465
466 if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
467 status = _nfs4_do_access(inode, sp->so_cred, state->state);
468 if (status < 0)
469 goto out;
470 memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
471 set_bit(NFS_DELEGATED_STATE, &state->flags);
472 goto out;
473 }
474 status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
475 if (status != 0)
476 goto out_nodeleg;
477 /* Check if files differ */
478 if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT))
479 goto out_stale;
480 /* Has the file handle changed? */
481 if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) {
482 /* Verify if the change attributes are the same */
483 if (f_attr.change_attr != NFS_I(inode)->change_attr)
484 goto out_stale;
485 if (nfs_size_to_loff_t(f_attr.size) != inode->i_size)
486 goto out_stale;
487 /* Lets just pretend that this is the same file */
488 nfs_copy_fh(NFS_FH(inode), &o_res.fh);
489 NFS_I(inode)->fileid = f_attr.fileid;
490 }
491 memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
492 if (o_res.delegation_type != 0) {
493 if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM))
494 nfs_inode_set_delegation(inode, sp->so_cred, &o_res);
495 else
496 nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
497 }
498out_nodeleg:
499 clear_bit(NFS_DELEGATED_STATE, &state->flags);
500out:
501 dput(parent);
502 return status;
503out_stale:
504 status = -ESTALE;
505 /* Invalidate the state owner so we don't ever use it again */
506 nfs4_drop_state_owner(sp);
507 d_drop(dentry);
508 /* Should we be trying to close that stateid? */
509 goto out_nodeleg;
510}
511
512static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
513{
514 struct nfs_inode *nfsi = NFS_I(state->inode);
515 struct nfs_open_context *ctx;
516 int status;
517
518 spin_lock(&state->inode->i_lock);
519 list_for_each_entry(ctx, &nfsi->open_files, list) {
520 if (ctx->state != state)
521 continue;
522 get_nfs_open_context(ctx);
523 spin_unlock(&state->inode->i_lock);
524 status = _nfs4_open_expired(sp, state, ctx->dentry);
525 put_nfs_open_context(ctx);
526 return status;
527 }
528 spin_unlock(&state->inode->i_lock);
529 return -ENOENT;
530}
531
532/*
533 * Returns an nfs4_state + an extra reference to the inode
534 */
535static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
536{
537 struct nfs_delegation *delegation;
538 struct nfs_server *server = NFS_SERVER(inode);
539 struct nfs4_client *clp = server->nfs4_state;
540 struct nfs_inode *nfsi = NFS_I(inode);
541 struct nfs4_state_owner *sp = NULL;
542 struct nfs4_state *state = NULL;
543 int open_flags = flags & (FMODE_READ|FMODE_WRITE);
544 int err;
545
546 /* Protect against reboot recovery - NOTE ORDER! */
547 down_read(&clp->cl_sem);
548 /* Protect against delegation recall */
549 down_read(&nfsi->rwsem);
550 delegation = NFS_I(inode)->delegation;
551 err = -ENOENT;
552 if (delegation == NULL || (delegation->type & open_flags) != open_flags)
553 goto out_err;
554 err = -ENOMEM;
555 if (!(sp = nfs4_get_state_owner(server, cred))) {
556 dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
557 goto out_err;
558 }
559 down(&sp->so_sema);
560 state = nfs4_get_open_state(inode, sp);
561 if (state == NULL)
562 goto out_err;
563
564 err = -ENOENT;
565 if ((state->state & open_flags) == open_flags) {
566 spin_lock(&inode->i_lock);
567 if (open_flags & FMODE_READ)
568 state->nreaders++;
569 if (open_flags & FMODE_WRITE)
570 state->nwriters++;
571 spin_unlock(&inode->i_lock);
572 goto out_ok;
573 } else if (state->state != 0)
574 goto out_err;
575
576 lock_kernel();
577 err = _nfs4_do_access(inode, cred, open_flags);
578 unlock_kernel();
579 if (err != 0)
580 goto out_err;
581 set_bit(NFS_DELEGATED_STATE, &state->flags);
582 update_open_stateid(state, &delegation->stateid, open_flags);
583out_ok:
584 up(&sp->so_sema);
585 nfs4_put_state_owner(sp);
586 up_read(&nfsi->rwsem);
587 up_read(&clp->cl_sem);
588 igrab(inode);
589 *res = state;
590 return 0;
591out_err:
592 if (sp != NULL) {
593 if (state != NULL)
594 nfs4_put_open_state(state);
595 up(&sp->so_sema);
596 nfs4_put_state_owner(sp);
597 }
598 up_read(&nfsi->rwsem);
599 up_read(&clp->cl_sem);
600 return err;
601}
602
603static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
604{
605 struct nfs4_exception exception = { };
606 struct nfs4_state *res;
607 int err;
608
609 do {
610 err = _nfs4_open_delegated(inode, flags, cred, &res);
611 if (err == 0)
612 break;
613 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
614 err, &exception));
615 } while (exception.retry);
616 return res;
617}
618
619/*
620 * Returns an nfs4_state + an referenced inode
621 */
622static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
623{
624 struct nfs4_state_owner *sp;
625 struct nfs4_state *state = NULL;
626 struct nfs_server *server = NFS_SERVER(dir);
627 struct nfs4_client *clp = server->nfs4_state;
628 struct inode *inode = NULL;
629 int status;
630 struct nfs_fattr f_attr = {
631 .valid = 0,
632 };
633 struct nfs_openargs o_arg = {
634 .fh = NFS_FH(dir),
635 .open_flags = flags,
636 .name = &dentry->d_name,
637 .server = server,
638 .bitmask = server->attr_bitmask,
639 .claim = NFS4_OPEN_CLAIM_NULL,
640 };
641 struct nfs_openres o_res = {
642 .f_attr = &f_attr,
643 .server = server,
644 };
645
646 /* Protect against reboot recovery conflicts */
647 down_read(&clp->cl_sem);
648 status = -ENOMEM;
649 if (!(sp = nfs4_get_state_owner(server, cred))) {
650 dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
651 goto out_err;
652 }
653 if (flags & O_EXCL) {
654 u32 *p = (u32 *) o_arg.u.verifier.data;
655 p[0] = jiffies;
656 p[1] = current->pid;
657 } else
658 o_arg.u.attrs = sattr;
659 /* Serialization for the sequence id */
660 down(&sp->so_sema);
661
662 status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
663 if (status != 0)
664 goto out_err;
665
666 status = -ENOMEM;
667 inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr);
668 if (!inode)
669 goto out_err;
670 state = nfs4_get_open_state(inode, sp);
671 if (!state)
672 goto out_err;
673 update_open_stateid(state, &o_res.stateid, flags);
674 if (o_res.delegation_type != 0)
675 nfs_inode_set_delegation(inode, cred, &o_res);
676 up(&sp->so_sema);
677 nfs4_put_state_owner(sp);
678 up_read(&clp->cl_sem);
679 *res = state;
680 return 0;
681out_err:
682 if (sp != NULL) {
683 if (state != NULL)
684 nfs4_put_open_state(state);
685 up(&sp->so_sema);
686 nfs4_put_state_owner(sp);
687 }
688 /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
689 up_read(&clp->cl_sem);
690 if (inode != NULL)
691 iput(inode);
692 *res = NULL;
693 return status;
694}
695
696
697static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
698{
699 struct nfs4_exception exception = { };
700 struct nfs4_state *res;
701 int status;
702
703 do {
704 status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
705 if (status == 0)
706 break;
707 /* NOTE: BAD_SEQID means the server and client disagree about the
708 * book-keeping w.r.t. state-changing operations
709 * (OPEN/CLOSE/LOCK/LOCKU...)
710 * It is actually a sign of a bug on the client or on the server.
711 *
712 * If we receive a BAD_SEQID error in the particular case of
713 * doing an OPEN, we assume that nfs4_increment_seqid() will
714 * have unhashed the old state_owner for us, and that we can
715 * therefore safely retry using a new one. We should still warn
716 * the user though...
717 */
718 if (status == -NFS4ERR_BAD_SEQID) {
719 printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
720 exception.retry = 1;
721 continue;
722 }
723 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
724 status, &exception));
725 } while (exception.retry);
726 return res;
727}
728
729static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
730 struct nfs_fh *fhandle, struct iattr *sattr,
731 struct nfs4_state *state)
732{
733 struct nfs_setattrargs arg = {
734 .fh = fhandle,
735 .iap = sattr,
736 .server = server,
737 .bitmask = server->attr_bitmask,
738 };
739 struct nfs_setattrres res = {
740 .fattr = fattr,
741 .server = server,
742 };
743 struct rpc_message msg = {
744 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
745 .rpc_argp = &arg,
746 .rpc_resp = &res,
747 };
748
749 fattr->valid = 0;
750
751 if (state != NULL)
752 msg.rpc_cred = state->owner->so_cred;
753 if (sattr->ia_valid & ATTR_SIZE)
754 nfs4_copy_stateid(&arg.stateid, state, NULL);
755 else
756 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
757
758 return rpc_call_sync(server->client, &msg, 0);
759}
760
761static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
762 struct nfs_fh *fhandle, struct iattr *sattr,
763 struct nfs4_state *state)
764{
765 struct nfs4_exception exception = { };
766 int err;
767 do {
768 err = nfs4_handle_exception(server,
769 _nfs4_do_setattr(server, fattr, fhandle, sattr,
770 state),
771 &exception);
772 } while (exception.retry);
773 return err;
774}
775
776struct nfs4_closedata {
777 struct inode *inode;
778 struct nfs4_state *state;
779 struct nfs_closeargs arg;
780 struct nfs_closeres res;
781};
782
783static void nfs4_close_done(struct rpc_task *task)
784{
785 struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
786 struct nfs4_state *state = calldata->state;
787 struct nfs4_state_owner *sp = state->owner;
788 struct nfs_server *server = NFS_SERVER(calldata->inode);
789
790 /* hmm. we are done with the inode, and in the process of freeing
791 * the state_owner. we keep this around to process errors
792 */
793 nfs4_increment_seqid(task->tk_status, sp);
794 switch (task->tk_status) {
795 case 0:
796 memcpy(&state->stateid, &calldata->res.stateid,
797 sizeof(state->stateid));
798 break;
799 case -NFS4ERR_STALE_STATEID:
800 case -NFS4ERR_EXPIRED:
801 state->state = calldata->arg.open_flags;
802 nfs4_schedule_state_recovery(server->nfs4_state);
803 break;
804 default:
805 if (nfs4_async_handle_error(task, server) == -EAGAIN) {
806 rpc_restart_call(task);
807 return;
808 }
809 }
810 state->state = calldata->arg.open_flags;
811 nfs4_put_open_state(state);
812 up(&sp->so_sema);
813 nfs4_put_state_owner(sp);
814 up_read(&server->nfs4_state->cl_sem);
815 kfree(calldata);
816}
817
818static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
819{
820 struct rpc_message msg = {
821 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
822 .rpc_argp = &calldata->arg,
823 .rpc_resp = &calldata->res,
824 .rpc_cred = calldata->state->owner->so_cred,
825 };
826 if (calldata->arg.open_flags != 0)
827 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
828 return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
829}
830
831/*
832 * It is possible for data to be read/written from a mem-mapped file
833 * after the sys_close call (which hits the vfs layer as a flush).
834 * This means that we can't safely call nfsv4 close on a file until
835 * the inode is cleared. This in turn means that we are not good
836 * NFSv4 citizens - we do not indicate to the server to update the file's
837 * share state even when we are done with one of the three share
838 * stateid's in the inode.
839 *
840 * NOTE: Caller must be holding the sp->so_owner semaphore!
841 */
842int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode)
843{
844 struct nfs4_closedata *calldata;
845 int status;
846
847 /* Tell caller we're done */
848 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
849 state->state = mode;
850 return 0;
851 }
852 calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
853 if (calldata == NULL)
854 return -ENOMEM;
855 calldata->inode = inode;
856 calldata->state = state;
857 calldata->arg.fh = NFS_FH(inode);
858 /* Serialization for the sequence id */
859 calldata->arg.seqid = state->owner->so_seqid;
860 calldata->arg.open_flags = mode;
861 memcpy(&calldata->arg.stateid, &state->stateid,
862 sizeof(calldata->arg.stateid));
863 status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
864 /*
865 * Return -EINPROGRESS on success in order to indicate to the
866 * caller that an asynchronous RPC call has been launched, and
867 * that it will release the semaphores on completion.
868 */
869 return (status == 0) ? -EINPROGRESS : status;
870}
871
872struct inode *
873nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
874{
875 struct iattr attr;
876 struct rpc_cred *cred;
877 struct nfs4_state *state;
878
879 if (nd->flags & LOOKUP_CREATE) {
880 attr.ia_mode = nd->intent.open.create_mode;
881 attr.ia_valid = ATTR_MODE;
882 if (!IS_POSIXACL(dir))
883 attr.ia_mode &= ~current->fs->umask;
884 } else {
885 attr.ia_valid = 0;
886 BUG_ON(nd->intent.open.flags & O_CREAT);
887 }
888
889 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
890 if (IS_ERR(cred))
891 return (struct inode *)cred;
892 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
893 put_rpccred(cred);
894 if (IS_ERR(state))
895 return (struct inode *)state;
896 return state->inode;
897}
898
899int
900nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
901{
902 struct rpc_cred *cred;
903 struct nfs4_state *state;
904 struct inode *inode;
905
906 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
907 if (IS_ERR(cred))
908 return PTR_ERR(cred);
909 state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
910 if (IS_ERR(state))
911 state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
912 put_rpccred(cred);
913 if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
914 return 1;
915 if (IS_ERR(state))
916 return 0;
917 inode = state->inode;
918 if (inode == dentry->d_inode) {
919 iput(inode);
920 return 1;
921 }
922 d_drop(dentry);
923 nfs4_close_state(state, openflags);
924 iput(inode);
925 return 0;
926}
927
928
929static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
930{
931 struct nfs4_server_caps_res res = {};
932 struct rpc_message msg = {
933 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS],
934 .rpc_argp = fhandle,
935 .rpc_resp = &res,
936 };
937 int status;
938
939 status = rpc_call_sync(server->client, &msg, 0);
940 if (status == 0) {
941 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
942 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
943 server->caps |= NFS_CAP_ACLS;
944 if (res.has_links != 0)
945 server->caps |= NFS_CAP_HARDLINKS;
946 if (res.has_symlinks != 0)
947 server->caps |= NFS_CAP_SYMLINKS;
948 server->acl_bitmask = res.acl_bitmask;
949 }
950 return status;
951}
952
953static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
954{
955 struct nfs4_exception exception = { };
956 int err;
957 do {
958 err = nfs4_handle_exception(server,
959 _nfs4_server_capabilities(server, fhandle),
960 &exception);
961 } while (exception.retry);
962 return err;
963}
964
965static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
966 struct nfs_fsinfo *info)
967{
968 struct nfs_fattr * fattr = info->fattr;
969 struct nfs4_lookup_root_arg args = {
970 .bitmask = nfs4_fattr_bitmap,
971 };
972 struct nfs4_lookup_res res = {
973 .server = server,
974 .fattr = fattr,
975 .fh = fhandle,
976 };
977 struct rpc_message msg = {
978 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP_ROOT],
979 .rpc_argp = &args,
980 .rpc_resp = &res,
981 };
982 fattr->valid = 0;
983 return rpc_call_sync(server->client, &msg, 0);
984}
985
986static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
987 struct nfs_fsinfo *info)
988{
989 struct nfs4_exception exception = { };
990 int err;
991 do {
992 err = nfs4_handle_exception(server,
993 _nfs4_lookup_root(server, fhandle, info),
994 &exception);
995 } while (exception.retry);
996 return err;
997}
998
999static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
1000 struct nfs_fsinfo *info)
1001{
1002 struct nfs_fattr * fattr = info->fattr;
1003 unsigned char * p;
1004 struct qstr q;
1005 struct nfs4_lookup_arg args = {
1006 .dir_fh = fhandle,
1007 .name = &q,
1008 .bitmask = nfs4_fattr_bitmap,
1009 };
1010 struct nfs4_lookup_res res = {
1011 .server = server,
1012 .fattr = fattr,
1013 .fh = fhandle,
1014 };
1015 struct rpc_message msg = {
1016 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
1017 .rpc_argp = &args,
1018 .rpc_resp = &res,
1019 };
1020 int status;
1021
1022 /*
1023 * Now we do a separate LOOKUP for each component of the mount path.
1024 * The LOOKUPs are done separately so that we can conveniently
1025 * catch an ERR_WRONGSEC if it occurs along the way...
1026 */
1027 status = nfs4_lookup_root(server, fhandle, info);
1028 if (status)
1029 goto out;
1030
1031 p = server->mnt_path;
1032 for (;;) {
1033 struct nfs4_exception exception = { };
1034
1035 while (*p == '/')
1036 p++;
1037 if (!*p)
1038 break;
1039 q.name = p;
1040 while (*p && (*p != '/'))
1041 p++;
1042 q.len = p - q.name;
1043
1044 do {
1045 fattr->valid = 0;
1046 status = nfs4_handle_exception(server,
1047 rpc_call_sync(server->client, &msg, 0),
1048 &exception);
1049 } while (exception.retry);
1050 if (status == 0)
1051 continue;
1052 if (status == -ENOENT) {
1053 printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
1054 printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
1055 }
1056 break;
1057 }
1058 if (status == 0)
1059 status = nfs4_server_capabilities(server, fhandle);
1060 if (status == 0)
1061 status = nfs4_do_fsinfo(server, fhandle, info);
1062out:
1063 return status;
1064}
1065
1066static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1067{
1068 struct nfs4_getattr_arg args = {
1069 .fh = fhandle,
1070 .bitmask = server->attr_bitmask,
1071 };
1072 struct nfs4_getattr_res res = {
1073 .fattr = fattr,
1074 .server = server,
1075 };
1076 struct rpc_message msg = {
1077 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR],
1078 .rpc_argp = &args,
1079 .rpc_resp = &res,
1080 };
1081
1082 fattr->valid = 0;
1083 return rpc_call_sync(server->client, &msg, 0);
1084}
1085
1086static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1087{
1088 struct nfs4_exception exception = { };
1089 int err;
1090 do {
1091 err = nfs4_handle_exception(server,
1092 _nfs4_proc_getattr(server, fhandle, fattr),
1093 &exception);
1094 } while (exception.retry);
1095 return err;
1096}
1097
1098/*
1099 * The file is not closed if it is opened due to the a request to change
1100 * the size of the file. The open call will not be needed once the
1101 * VFS layer lookup-intents are implemented.
1102 *
1103 * Close is called when the inode is destroyed.
1104 * If we haven't opened the file for O_WRONLY, we
1105 * need to in the size_change case to obtain a stateid.
1106 *
1107 * Got race?
1108 * Because OPEN is always done by name in nfsv4, it is
1109 * possible that we opened a different file by the same
1110 * name. We can recognize this race condition, but we
1111 * can't do anything about it besides returning an error.
1112 *
1113 * This will be fixed with VFS changes (lookup-intent).
1114 */
1115static int
1116nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1117 struct iattr *sattr)
1118{
1119 struct inode * inode = dentry->d_inode;
1120 int size_change = sattr->ia_valid & ATTR_SIZE;
1121 struct nfs4_state *state = NULL;
1122 int need_iput = 0;
1123 int status;
1124
1125 fattr->valid = 0;
1126
1127 if (size_change) {
1128 struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
1129 if (IS_ERR(cred))
1130 return PTR_ERR(cred);
1131 state = nfs4_find_state(inode, cred, FMODE_WRITE);
1132 if (state == NULL) {
1133 state = nfs4_open_delegated(dentry->d_inode,
1134 FMODE_WRITE, cred);
1135 if (IS_ERR(state))
1136 state = nfs4_do_open(dentry->d_parent->d_inode,
1137 dentry, FMODE_WRITE,
1138 NULL, cred);
1139 need_iput = 1;
1140 }
1141 put_rpccred(cred);
1142 if (IS_ERR(state))
1143 return PTR_ERR(state);
1144
1145 if (state->inode != inode) {
1146 printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode);
1147 status = -EIO;
1148 goto out;
1149 }
1150 }
1151 status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
1152 NFS_FH(inode), sattr, state);
1153out:
1154 if (state) {
1155 inode = state->inode;
1156 nfs4_close_state(state, FMODE_WRITE);
1157 if (need_iput)
1158 iput(inode);
1159 }
1160 return status;
1161}
1162
1163static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
1164 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1165{
1166 int status;
1167 struct nfs_server *server = NFS_SERVER(dir);
1168 struct nfs4_lookup_arg args = {
1169 .bitmask = server->attr_bitmask,
1170 .dir_fh = NFS_FH(dir),
1171 .name = name,
1172 };
1173 struct nfs4_lookup_res res = {
1174 .server = server,
1175 .fattr = fattr,
1176 .fh = fhandle,
1177 };
1178 struct rpc_message msg = {
1179 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
1180 .rpc_argp = &args,
1181 .rpc_resp = &res,
1182 };
1183
1184 fattr->valid = 0;
1185
1186 dprintk("NFS call lookup %s\n", name->name);
1187 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1188 dprintk("NFS reply lookup: %d\n", status);
1189 return status;
1190}
1191
1192static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1193{
1194 struct nfs4_exception exception = { };
1195 int err;
1196 do {
1197 err = nfs4_handle_exception(NFS_SERVER(dir),
1198 _nfs4_proc_lookup(dir, name, fhandle, fattr),
1199 &exception);
1200 } while (exception.retry);
1201 return err;
1202}
1203
1204static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
1205{
1206 struct nfs4_accessargs args = {
1207 .fh = NFS_FH(inode),
1208 };
1209 struct nfs4_accessres res = { 0 };
1210 struct rpc_message msg = {
1211 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
1212 .rpc_argp = &args,
1213 .rpc_resp = &res,
1214 .rpc_cred = entry->cred,
1215 };
1216 int mode = entry->mask;
1217 int status;
1218
1219 /*
1220 * Determine which access bits we want to ask for...
1221 */
1222 if (mode & MAY_READ)
1223 args.access |= NFS4_ACCESS_READ;
1224 if (S_ISDIR(inode->i_mode)) {
1225 if (mode & MAY_WRITE)
1226 args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE;
1227 if (mode & MAY_EXEC)
1228 args.access |= NFS4_ACCESS_LOOKUP;
1229 } else {
1230 if (mode & MAY_WRITE)
1231 args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND;
1232 if (mode & MAY_EXEC)
1233 args.access |= NFS4_ACCESS_EXECUTE;
1234 }
1235 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
1236 if (!status) {
1237 entry->mask = 0;
1238 if (res.access & NFS4_ACCESS_READ)
1239 entry->mask |= MAY_READ;
1240 if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE))
1241 entry->mask |= MAY_WRITE;
1242 if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
1243 entry->mask |= MAY_EXEC;
1244 }
1245 return status;
1246}
1247
1248static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
1249{
1250 struct nfs4_exception exception = { };
1251 int err;
1252 do {
1253 err = nfs4_handle_exception(NFS_SERVER(inode),
1254 _nfs4_proc_access(inode, entry),
1255 &exception);
1256 } while (exception.retry);
1257 return err;
1258}
1259
1260/*
1261 * TODO: For the time being, we don't try to get any attributes
1262 * along with any of the zero-copy operations READ, READDIR,
1263 * READLINK, WRITE.
1264 *
1265 * In the case of the first three, we want to put the GETATTR
1266 * after the read-type operation -- this is because it is hard
1267 * to predict the length of a GETATTR response in v4, and thus
1268 * align the READ data correctly. This means that the GETATTR
1269 * may end up partially falling into the page cache, and we should
1270 * shift it into the 'tail' of the xdr_buf before processing.
1271 * To do this efficiently, we need to know the total length
1272 * of data received, which doesn't seem to be available outside
1273 * of the RPC layer.
1274 *
1275 * In the case of WRITE, we also want to put the GETATTR after
1276 * the operation -- in this case because we want to make sure
1277 * we get the post-operation mtime and size. This means that
1278 * we can't use xdr_encode_pages() as written: we need a variant
1279 * of it which would leave room in the 'tail' iovec.
1280 *
1281 * Both of these changes to the XDR layer would in fact be quite
1282 * minor, but I decided to leave them for a subsequent patch.
1283 */
1284static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
1285 unsigned int pgbase, unsigned int pglen)
1286{
1287 struct nfs4_readlink args = {
1288 .fh = NFS_FH(inode),
1289 .pgbase = pgbase,
1290 .pglen = pglen,
1291 .pages = &page,
1292 };
1293 struct rpc_message msg = {
1294 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK],
1295 .rpc_argp = &args,
1296 .rpc_resp = NULL,
1297 };
1298
1299 return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
1300}
1301
1302static int nfs4_proc_readlink(struct inode *inode, struct page *page,
1303 unsigned int pgbase, unsigned int pglen)
1304{
1305 struct nfs4_exception exception = { };
1306 int err;
1307 do {
1308 err = nfs4_handle_exception(NFS_SERVER(inode),
1309 _nfs4_proc_readlink(inode, page, pgbase, pglen),
1310 &exception);
1311 } while (exception.retry);
1312 return err;
1313}
1314
1315static int _nfs4_proc_read(struct nfs_read_data *rdata)
1316{
1317 int flags = rdata->flags;
1318 struct inode *inode = rdata->inode;
1319 struct nfs_fattr *fattr = rdata->res.fattr;
1320 struct nfs_server *server = NFS_SERVER(inode);
1321 struct rpc_message msg = {
1322 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
1323 .rpc_argp = &rdata->args,
1324 .rpc_resp = &rdata->res,
1325 .rpc_cred = rdata->cred,
1326 };
1327 unsigned long timestamp = jiffies;
1328 int status;
1329
1330 dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
1331 (long long) rdata->args.offset);
1332
1333 fattr->valid = 0;
1334 status = rpc_call_sync(server->client, &msg, flags);
1335 if (!status)
1336 renew_lease(server, timestamp);
1337 dprintk("NFS reply read: %d\n", status);
1338 return status;
1339}
1340
1341static int nfs4_proc_read(struct nfs_read_data *rdata)
1342{
1343 struct nfs4_exception exception = { };
1344 int err;
1345 do {
1346 err = nfs4_handle_exception(NFS_SERVER(rdata->inode),
1347 _nfs4_proc_read(rdata),
1348 &exception);
1349 } while (exception.retry);
1350 return err;
1351}
1352
1353static int _nfs4_proc_write(struct nfs_write_data *wdata)
1354{
1355 int rpcflags = wdata->flags;
1356 struct inode *inode = wdata->inode;
1357 struct nfs_fattr *fattr = wdata->res.fattr;
1358 struct nfs_server *server = NFS_SERVER(inode);
1359 struct rpc_message msg = {
1360 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
1361 .rpc_argp = &wdata->args,
1362 .rpc_resp = &wdata->res,
1363 .rpc_cred = wdata->cred,
1364 };
1365 int status;
1366
1367 dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
1368 (long long) wdata->args.offset);
1369
1370 fattr->valid = 0;
1371 status = rpc_call_sync(server->client, &msg, rpcflags);
1372 dprintk("NFS reply write: %d\n", status);
1373 return status;
1374}
1375
1376static int nfs4_proc_write(struct nfs_write_data *wdata)
1377{
1378 struct nfs4_exception exception = { };
1379 int err;
1380 do {
1381 err = nfs4_handle_exception(NFS_SERVER(wdata->inode),
1382 _nfs4_proc_write(wdata),
1383 &exception);
1384 } while (exception.retry);
1385 return err;
1386}
1387
1388static int _nfs4_proc_commit(struct nfs_write_data *cdata)
1389{
1390 struct inode *inode = cdata->inode;
1391 struct nfs_fattr *fattr = cdata->res.fattr;
1392 struct nfs_server *server = NFS_SERVER(inode);
1393 struct rpc_message msg = {
1394 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
1395 .rpc_argp = &cdata->args,
1396 .rpc_resp = &cdata->res,
1397 .rpc_cred = cdata->cred,
1398 };
1399 int status;
1400
1401 dprintk("NFS call commit %d @ %Ld\n", cdata->args.count,
1402 (long long) cdata->args.offset);
1403
1404 fattr->valid = 0;
1405 status = rpc_call_sync(server->client, &msg, 0);
1406 dprintk("NFS reply commit: %d\n", status);
1407 return status;
1408}
1409
1410static int nfs4_proc_commit(struct nfs_write_data *cdata)
1411{
1412 struct nfs4_exception exception = { };
1413 int err;
1414 do {
1415 err = nfs4_handle_exception(NFS_SERVER(cdata->inode),
1416 _nfs4_proc_commit(cdata),
1417 &exception);
1418 } while (exception.retry);
1419 return err;
1420}
1421
1422/*
1423 * Got race?
1424 * We will need to arrange for the VFS layer to provide an atomic open.
1425 * Until then, this create/open method is prone to inefficiency and race
1426 * conditions due to the lookup, create, and open VFS calls from sys_open()
1427 * placed on the wire.
1428 *
1429 * Given the above sorry state of affairs, I'm simply sending an OPEN.
1430 * The file will be opened again in the subsequent VFS open call
1431 * (nfs4_proc_file_open).
1432 *
1433 * The open for read will just hang around to be used by any process that
1434 * opens the file O_RDONLY. This will all be resolved with the VFS changes.
1435 */
1436
1437static int
1438nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1439 int flags)
1440{
1441 struct nfs4_state *state;
1442 struct rpc_cred *cred;
1443 int status = 0;
1444
1445 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
1446 if (IS_ERR(cred)) {
1447 status = PTR_ERR(cred);
1448 goto out;
1449 }
1450 state = nfs4_do_open(dir, dentry, flags, sattr, cred);
1451 put_rpccred(cred);
1452 if (IS_ERR(state)) {
1453 status = PTR_ERR(state);
1454 goto out;
1455 }
1456 d_instantiate(dentry, state->inode);
1457 if (flags & O_EXCL) {
1458 struct nfs_fattr fattr;
1459 status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
1460 NFS_FH(state->inode), sattr, state);
1461 if (status == 0)
1462 goto out;
1463 } else if (flags != 0)
1464 goto out;
1465 nfs4_close_state(state, flags);
1466out:
1467 return status;
1468}
1469
1470static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
1471{
1472 struct nfs4_remove_arg args = {
1473 .fh = NFS_FH(dir),
1474 .name = name,
1475 };
1476 struct nfs4_change_info res;
1477 struct rpc_message msg = {
1478 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE],
1479 .rpc_argp = &args,
1480 .rpc_resp = &res,
1481 };
1482 int status;
1483
1484 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1485 if (status == 0)
1486 update_changeattr(dir, &res);
1487 return status;
1488}
1489
1490static int nfs4_proc_remove(struct inode *dir, struct qstr *name)
1491{
1492 struct nfs4_exception exception = { };
1493 int err;
1494 do {
1495 err = nfs4_handle_exception(NFS_SERVER(dir),
1496 _nfs4_proc_remove(dir, name),
1497 &exception);
1498 } while (exception.retry);
1499 return err;
1500}
1501
1502struct unlink_desc {
1503 struct nfs4_remove_arg args;
1504 struct nfs4_change_info res;
1505};
1506
1507static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
1508 struct qstr *name)
1509{
1510 struct unlink_desc *up;
1511
1512 up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
1513 if (!up)
1514 return -ENOMEM;
1515
1516 up->args.fh = NFS_FH(dir->d_inode);
1517 up->args.name = name;
1518
1519 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
1520 msg->rpc_argp = &up->args;
1521 msg->rpc_resp = &up->res;
1522 return 0;
1523}
1524
1525static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
1526{
1527 struct rpc_message *msg = &task->tk_msg;
1528 struct unlink_desc *up;
1529
1530 if (msg->rpc_resp != NULL) {
1531 up = container_of(msg->rpc_resp, struct unlink_desc, res);
1532 update_changeattr(dir->d_inode, &up->res);
1533 kfree(up);
1534 msg->rpc_resp = NULL;
1535 msg->rpc_argp = NULL;
1536 }
1537 return 0;
1538}
1539
1540static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
1541 struct inode *new_dir, struct qstr *new_name)
1542{
1543 struct nfs4_rename_arg arg = {
1544 .old_dir = NFS_FH(old_dir),
1545 .new_dir = NFS_FH(new_dir),
1546 .old_name = old_name,
1547 .new_name = new_name,
1548 };
1549 struct nfs4_rename_res res = { };
1550 struct rpc_message msg = {
1551 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
1552 .rpc_argp = &arg,
1553 .rpc_resp = &res,
1554 };
1555 int status;
1556
1557 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
1558
1559 if (!status) {
1560 update_changeattr(old_dir, &res.old_cinfo);
1561 update_changeattr(new_dir, &res.new_cinfo);
1562 }
1563 return status;
1564}
1565
1566static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
1567 struct inode *new_dir, struct qstr *new_name)
1568{
1569 struct nfs4_exception exception = { };
1570 int err;
1571 do {
1572 err = nfs4_handle_exception(NFS_SERVER(old_dir),
1573 _nfs4_proc_rename(old_dir, old_name,
1574 new_dir, new_name),
1575 &exception);
1576 } while (exception.retry);
1577 return err;
1578}
1579
1580static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
1581{
1582 struct nfs4_link_arg arg = {
1583 .fh = NFS_FH(inode),
1584 .dir_fh = NFS_FH(dir),
1585 .name = name,
1586 };
1587 struct nfs4_change_info cinfo = { };
1588 struct rpc_message msg = {
1589 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
1590 .rpc_argp = &arg,
1591 .rpc_resp = &cinfo,
1592 };
1593 int status;
1594
1595 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
1596 if (!status)
1597 update_changeattr(dir, &cinfo);
1598
1599 return status;
1600}
1601
1602static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
1603{
1604 struct nfs4_exception exception = { };
1605 int err;
1606 do {
1607 err = nfs4_handle_exception(NFS_SERVER(inode),
1608 _nfs4_proc_link(inode, dir, name),
1609 &exception);
1610 } while (exception.retry);
1611 return err;
1612}
1613
1614static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
1615 struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
1616 struct nfs_fattr *fattr)
1617{
1618 struct nfs_server *server = NFS_SERVER(dir);
1619 struct nfs4_create_arg arg = {
1620 .dir_fh = NFS_FH(dir),
1621 .server = server,
1622 .name = name,
1623 .attrs = sattr,
1624 .ftype = NF4LNK,
1625 .bitmask = server->attr_bitmask,
1626 };
1627 struct nfs4_create_res res = {
1628 .server = server,
1629 .fh = fhandle,
1630 .fattr = fattr,
1631 };
1632 struct rpc_message msg = {
1633 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK],
1634 .rpc_argp = &arg,
1635 .rpc_resp = &res,
1636 };
1637 int status;
1638
1639 if (path->len > NFS4_MAXPATHLEN)
1640 return -ENAMETOOLONG;
1641 arg.u.symlink = path;
1642 fattr->valid = 0;
1643
1644 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1645 if (!status)
1646 update_changeattr(dir, &res.dir_cinfo);
1647 return status;
1648}
1649
1650static int nfs4_proc_symlink(struct inode *dir, struct qstr *name,
1651 struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
1652 struct nfs_fattr *fattr)
1653{
1654 struct nfs4_exception exception = { };
1655 int err;
1656 do {
1657 err = nfs4_handle_exception(NFS_SERVER(dir),
1658 _nfs4_proc_symlink(dir, name, path, sattr,
1659 fhandle, fattr),
1660 &exception);
1661 } while (exception.retry);
1662 return err;
1663}
1664
1665static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
1666 struct iattr *sattr)
1667{
1668 struct nfs_server *server = NFS_SERVER(dir);
1669 struct nfs_fh fhandle;
1670 struct nfs_fattr fattr;
1671 struct nfs4_create_arg arg = {
1672 .dir_fh = NFS_FH(dir),
1673 .server = server,
1674 .name = &dentry->d_name,
1675 .attrs = sattr,
1676 .ftype = NF4DIR,
1677 .bitmask = server->attr_bitmask,
1678 };
1679 struct nfs4_create_res res = {
1680 .server = server,
1681 .fh = &fhandle,
1682 .fattr = &fattr,
1683 };
1684 struct rpc_message msg = {
1685 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
1686 .rpc_argp = &arg,
1687 .rpc_resp = &res,
1688 };
1689 int status;
1690
1691 fattr.valid = 0;
1692
1693 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1694 if (!status) {
1695 update_changeattr(dir, &res.dir_cinfo);
1696 status = nfs_instantiate(dentry, &fhandle, &fattr);
1697 }
1698 return status;
1699}
1700
1701static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
1702 struct iattr *sattr)
1703{
1704 struct nfs4_exception exception = { };
1705 int err;
1706 do {
1707 err = nfs4_handle_exception(NFS_SERVER(dir),
1708 _nfs4_proc_mkdir(dir, dentry, sattr),
1709 &exception);
1710 } while (exception.retry);
1711 return err;
1712}
1713
1714static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
1715 u64 cookie, struct page *page, unsigned int count, int plus)
1716{
1717 struct inode *dir = dentry->d_inode;
1718 struct nfs4_readdir_arg args = {
1719 .fh = NFS_FH(dir),
1720 .pages = &page,
1721 .pgbase = 0,
1722 .count = count,
1723 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
1724 };
1725 struct nfs4_readdir_res res;
1726 struct rpc_message msg = {
1727 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR],
1728 .rpc_argp = &args,
1729 .rpc_resp = &res,
1730 .rpc_cred = cred,
1731 };
1732 int status;
1733
1734 lock_kernel();
1735 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
1736 res.pgbase = args.pgbase;
1737 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1738 if (status == 0)
1739 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
1740 unlock_kernel();
1741 return status;
1742}
1743
1744static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
1745 u64 cookie, struct page *page, unsigned int count, int plus)
1746{
1747 struct nfs4_exception exception = { };
1748 int err;
1749 do {
1750 err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
1751 _nfs4_proc_readdir(dentry, cred, cookie,
1752 page, count, plus),
1753 &exception);
1754 } while (exception.retry);
1755 return err;
1756}
1757
1758static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
1759 struct iattr *sattr, dev_t rdev)
1760{
1761 struct nfs_server *server = NFS_SERVER(dir);
1762 struct nfs_fh fh;
1763 struct nfs_fattr fattr;
1764 struct nfs4_create_arg arg = {
1765 .dir_fh = NFS_FH(dir),
1766 .server = server,
1767 .name = &dentry->d_name,
1768 .attrs = sattr,
1769 .bitmask = server->attr_bitmask,
1770 };
1771 struct nfs4_create_res res = {
1772 .server = server,
1773 .fh = &fh,
1774 .fattr = &fattr,
1775 };
1776 struct rpc_message msg = {
1777 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
1778 .rpc_argp = &arg,
1779 .rpc_resp = &res,
1780 };
1781 int status;
1782 int mode = sattr->ia_mode;
1783
1784 fattr.valid = 0;
1785
1786 BUG_ON(!(sattr->ia_valid & ATTR_MODE));
1787 BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
1788 if (S_ISFIFO(mode))
1789 arg.ftype = NF4FIFO;
1790 else if (S_ISBLK(mode)) {
1791 arg.ftype = NF4BLK;
1792 arg.u.device.specdata1 = MAJOR(rdev);
1793 arg.u.device.specdata2 = MINOR(rdev);
1794 }
1795 else if (S_ISCHR(mode)) {
1796 arg.ftype = NF4CHR;
1797 arg.u.device.specdata1 = MAJOR(rdev);
1798 arg.u.device.specdata2 = MINOR(rdev);
1799 }
1800 else
1801 arg.ftype = NF4SOCK;
1802
1803 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1804 if (status == 0) {
1805 update_changeattr(dir, &res.dir_cinfo);
1806 status = nfs_instantiate(dentry, &fh, &fattr);
1807 }
1808 return status;
1809}
1810
1811static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
1812 struct iattr *sattr, dev_t rdev)
1813{
1814 struct nfs4_exception exception = { };
1815 int err;
1816 do {
1817 err = nfs4_handle_exception(NFS_SERVER(dir),
1818 _nfs4_proc_mknod(dir, dentry, sattr, rdev),
1819 &exception);
1820 } while (exception.retry);
1821 return err;
1822}
1823
1824static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
1825 struct nfs_fsstat *fsstat)
1826{
1827 struct nfs4_statfs_arg args = {
1828 .fh = fhandle,
1829 .bitmask = server->attr_bitmask,
1830 };
1831 struct rpc_message msg = {
1832 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS],
1833 .rpc_argp = &args,
1834 .rpc_resp = fsstat,
1835 };
1836
1837 fsstat->fattr->valid = 0;
1838 return rpc_call_sync(server->client, &msg, 0);
1839}
1840
1841static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
1842{
1843 struct nfs4_exception exception = { };
1844 int err;
1845 do {
1846 err = nfs4_handle_exception(server,
1847 _nfs4_proc_statfs(server, fhandle, fsstat),
1848 &exception);
1849 } while (exception.retry);
1850 return err;
1851}
1852
1853static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
1854 struct nfs_fsinfo *fsinfo)
1855{
1856 struct nfs4_fsinfo_arg args = {
1857 .fh = fhandle,
1858 .bitmask = server->attr_bitmask,
1859 };
1860 struct rpc_message msg = {
1861 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO],
1862 .rpc_argp = &args,
1863 .rpc_resp = fsinfo,
1864 };
1865
1866 return rpc_call_sync(server->client, &msg, 0);
1867}
1868
1869static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
1870{
1871 struct nfs4_exception exception = { };
1872 int err;
1873
1874 do {
1875 err = nfs4_handle_exception(server,
1876 _nfs4_do_fsinfo(server, fhandle, fsinfo),
1877 &exception);
1878 } while (exception.retry);
1879 return err;
1880}
1881
1882static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
1883{
1884 fsinfo->fattr->valid = 0;
1885 return nfs4_do_fsinfo(server, fhandle, fsinfo);
1886}
1887
1888static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
1889 struct nfs_pathconf *pathconf)
1890{
1891 struct nfs4_pathconf_arg args = {
1892 .fh = fhandle,
1893 .bitmask = server->attr_bitmask,
1894 };
1895 struct rpc_message msg = {
1896 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF],
1897 .rpc_argp = &args,
1898 .rpc_resp = pathconf,
1899 };
1900
1901 /* None of the pathconf attributes are mandatory to implement */
1902 if ((args.bitmask[0] & nfs4_pathconf_bitmap[0]) == 0) {
1903 memset(pathconf, 0, sizeof(*pathconf));
1904 return 0;
1905 }
1906
1907 pathconf->fattr->valid = 0;
1908 return rpc_call_sync(server->client, &msg, 0);
1909}
1910
1911static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
1912 struct nfs_pathconf *pathconf)
1913{
1914 struct nfs4_exception exception = { };
1915 int err;
1916
1917 do {
1918 err = nfs4_handle_exception(server,
1919 _nfs4_proc_pathconf(server, fhandle, pathconf),
1920 &exception);
1921 } while (exception.retry);
1922 return err;
1923}
1924
1925static void
1926nfs4_read_done(struct rpc_task *task)
1927{
1928 struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
1929 struct inode *inode = data->inode;
1930
1931 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
1932 rpc_restart_call(task);
1933 return;
1934 }
1935 if (task->tk_status > 0)
1936 renew_lease(NFS_SERVER(inode), data->timestamp);
1937 /* Call back common NFS readpage processing */
1938 nfs_readpage_result(task);
1939}
1940
1941static void
1942nfs4_proc_read_setup(struct nfs_read_data *data)
1943{
1944 struct rpc_task *task = &data->task;
1945 struct rpc_message msg = {
1946 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
1947 .rpc_argp = &data->args,
1948 .rpc_resp = &data->res,
1949 .rpc_cred = data->cred,
1950 };
1951 struct inode *inode = data->inode;
1952 int flags;
1953
1954 data->timestamp = jiffies;
1955
1956 /* N.B. Do we need to test? Never called for swapfile inode */
1957 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
1958
1959 /* Finalize the task. */
1960 rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags);
1961 rpc_call_setup(task, &msg, 0);
1962}
1963
1964static void
1965nfs4_write_done(struct rpc_task *task)
1966{
1967 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
1968 struct inode *inode = data->inode;
1969
1970 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
1971 rpc_restart_call(task);
1972 return;
1973 }
1974 if (task->tk_status >= 0)
1975 renew_lease(NFS_SERVER(inode), data->timestamp);
1976 /* Call back common NFS writeback processing */
1977 nfs_writeback_done(task);
1978}
1979
1980static void
1981nfs4_proc_write_setup(struct nfs_write_data *data, int how)
1982{
1983 struct rpc_task *task = &data->task;
1984 struct rpc_message msg = {
1985 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
1986 .rpc_argp = &data->args,
1987 .rpc_resp = &data->res,
1988 .rpc_cred = data->cred,
1989 };
1990 struct inode *inode = data->inode;
1991 int stable;
1992 int flags;
1993
1994 if (how & FLUSH_STABLE) {
1995 if (!NFS_I(inode)->ncommit)
1996 stable = NFS_FILE_SYNC;
1997 else
1998 stable = NFS_DATA_SYNC;
1999 } else
2000 stable = NFS_UNSTABLE;
2001 data->args.stable = stable;
2002
2003 data->timestamp = jiffies;
2004
2005 /* Set the initial flags for the task. */
2006 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
2007
2008 /* Finalize the task. */
2009 rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags);
2010 rpc_call_setup(task, &msg, 0);
2011}
2012
2013static void
2014nfs4_commit_done(struct rpc_task *task)
2015{
2016 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
2017 struct inode *inode = data->inode;
2018
2019 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
2020 rpc_restart_call(task);
2021 return;
2022 }
2023 /* Call back common NFS writeback processing */
2024 nfs_commit_done(task);
2025}
2026
2027static void
2028nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
2029{
2030 struct rpc_task *task = &data->task;
2031 struct rpc_message msg = {
2032 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
2033 .rpc_argp = &data->args,
2034 .rpc_resp = &data->res,
2035 .rpc_cred = data->cred,
2036 };
2037 struct inode *inode = data->inode;
2038 int flags;
2039
2040 /* Set the initial flags for the task. */
2041 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
2042
2043 /* Finalize the task. */
2044 rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags);
2045 rpc_call_setup(task, &msg, 0);
2046}
2047
2048/*
2049 * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
2050 * standalone procedure for queueing an asynchronous RENEW.
2051 */
2052static void
2053renew_done(struct rpc_task *task)
2054{
2055 struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
2056 unsigned long timestamp = (unsigned long)task->tk_calldata;
2057
2058 if (task->tk_status < 0) {
2059 switch (task->tk_status) {
2060 case -NFS4ERR_STALE_CLIENTID:
2061 case -NFS4ERR_EXPIRED:
2062 case -NFS4ERR_CB_PATH_DOWN:
2063 nfs4_schedule_state_recovery(clp);
2064 }
2065 return;
2066 }
2067 spin_lock(&clp->cl_lock);
2068 if (time_before(clp->cl_last_renewal,timestamp))
2069 clp->cl_last_renewal = timestamp;
2070 spin_unlock(&clp->cl_lock);
2071}
2072
2073int
2074nfs4_proc_async_renew(struct nfs4_client *clp)
2075{
2076 struct rpc_message msg = {
2077 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
2078 .rpc_argp = clp,
2079 .rpc_cred = clp->cl_cred,
2080 };
2081
2082 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
2083 renew_done, (void *)jiffies);
2084}
2085
2086int
2087nfs4_proc_renew(struct nfs4_client *clp)
2088{
2089 struct rpc_message msg = {
2090 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
2091 .rpc_argp = clp,
2092 .rpc_cred = clp->cl_cred,
2093 };
2094 unsigned long now = jiffies;
2095 int status;
2096
2097 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
2098 if (status < 0)
2099 return status;
2100 spin_lock(&clp->cl_lock);
2101 if (time_before(clp->cl_last_renewal,now))
2102 clp->cl_last_renewal = now;
2103 spin_unlock(&clp->cl_lock);
2104 return 0;
2105}
2106
2107/*
2108 * We will need to arrange for the VFS layer to provide an atomic open.
2109 * Until then, this open method is prone to inefficiency and race conditions
2110 * due to the lookup, potential create, and open VFS calls from sys_open()
2111 * placed on the wire.
2112 */
2113static int
2114nfs4_proc_file_open(struct inode *inode, struct file *filp)
2115{
2116 struct dentry *dentry = filp->f_dentry;
2117 struct nfs_open_context *ctx;
2118 struct nfs4_state *state = NULL;
2119 struct rpc_cred *cred;
2120 int status = -ENOMEM;
2121
2122 dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
2123 (int)dentry->d_parent->d_name.len,
2124 dentry->d_parent->d_name.name,
2125 (int)dentry->d_name.len, dentry->d_name.name);
2126
2127
2128 /* Find our open stateid */
2129 cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
2130 if (IS_ERR(cred))
2131 return PTR_ERR(cred);
2132 ctx = alloc_nfs_open_context(dentry, cred);
2133 put_rpccred(cred);
2134 if (unlikely(ctx == NULL))
2135 return -ENOMEM;
2136 status = -EIO; /* ERACE actually */
2137 state = nfs4_find_state(inode, cred, filp->f_mode);
2138 if (unlikely(state == NULL))
2139 goto no_state;
2140 ctx->state = state;
2141 nfs4_close_state(state, filp->f_mode);
2142 ctx->mode = filp->f_mode;
2143 nfs_file_set_open_context(filp, ctx);
2144 put_nfs_open_context(ctx);
2145 if (filp->f_mode & FMODE_WRITE)
2146 nfs_begin_data_update(inode);
2147 return 0;
2148no_state:
2149 printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
2150 put_nfs_open_context(ctx);
2151 return status;
2152}
2153
2154/*
2155 * Release our state
2156 */
2157static int
2158nfs4_proc_file_release(struct inode *inode, struct file *filp)
2159{
2160 if (filp->f_mode & FMODE_WRITE)
2161 nfs_end_data_update(inode);
2162 nfs_file_clear_open_context(filp);
2163 return 0;
2164}
2165
2166static int
2167nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
2168{
2169 struct nfs4_client *clp = server->nfs4_state;
2170
2171 if (!clp || task->tk_status >= 0)
2172 return 0;
2173 switch(task->tk_status) {
2174 case -NFS4ERR_STALE_CLIENTID:
2175 case -NFS4ERR_STALE_STATEID:
2176 case -NFS4ERR_EXPIRED:
2177 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
2178 nfs4_schedule_state_recovery(clp);
2179 if (test_bit(NFS4CLNT_OK, &clp->cl_state))
2180 rpc_wake_up_task(task);
2181 task->tk_status = 0;
2182 return -EAGAIN;
2183 case -NFS4ERR_GRACE:
2184 case -NFS4ERR_DELAY:
2185 rpc_delay(task, NFS4_POLL_RETRY_MAX);
2186 task->tk_status = 0;
2187 return -EAGAIN;
2188 case -NFS4ERR_OLD_STATEID:
2189 task->tk_status = 0;
2190 return -EAGAIN;
2191 }
2192 task->tk_status = nfs4_map_errors(task->tk_status);
2193 return 0;
2194}
2195
2196static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
2197{
2198 DEFINE_WAIT(wait);
2199 sigset_t oldset;
2200 int interruptible, res = 0;
2201
2202 might_sleep();
2203
2204 rpc_clnt_sigmask(clnt, &oldset);
2205 interruptible = TASK_UNINTERRUPTIBLE;
2206 if (clnt->cl_intr)
2207 interruptible = TASK_INTERRUPTIBLE;
2208 prepare_to_wait(&clp->cl_waitq, &wait, interruptible);
2209 nfs4_schedule_state_recovery(clp);
2210 if (clnt->cl_intr && signalled())
2211 res = -ERESTARTSYS;
2212 else if (!test_bit(NFS4CLNT_OK, &clp->cl_state))
2213 schedule();
2214 finish_wait(&clp->cl_waitq, &wait);
2215 rpc_clnt_sigunmask(clnt, &oldset);
2216 return res;
2217}
2218
2219static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
2220{
2221 sigset_t oldset;
2222 int res = 0;
2223
2224 might_sleep();
2225
2226 if (*timeout <= 0)
2227 *timeout = NFS4_POLL_RETRY_MIN;
2228 if (*timeout > NFS4_POLL_RETRY_MAX)
2229 *timeout = NFS4_POLL_RETRY_MAX;
2230 rpc_clnt_sigmask(clnt, &oldset);
2231 if (clnt->cl_intr) {
2232 set_current_state(TASK_INTERRUPTIBLE);
2233 schedule_timeout(*timeout);
2234 if (signalled())
2235 res = -ERESTARTSYS;
2236 } else {
2237 set_current_state(TASK_UNINTERRUPTIBLE);
2238 schedule_timeout(*timeout);
2239 }
2240 rpc_clnt_sigunmask(clnt, &oldset);
2241 *timeout <<= 1;
2242 return res;
2243}
2244
2245/* This is the error handling routine for processes that are allowed
2246 * to sleep.
2247 */
2248int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
2249{
2250 struct nfs4_client *clp = server->nfs4_state;
2251 int ret = errorcode;
2252
2253 exception->retry = 0;
2254 switch(errorcode) {
2255 case 0:
2256 return 0;
2257 case -NFS4ERR_STALE_CLIENTID:
2258 case -NFS4ERR_STALE_STATEID:
2259 case -NFS4ERR_EXPIRED:
2260 ret = nfs4_wait_clnt_recover(server->client, clp);
2261 if (ret == 0)
2262 exception->retry = 1;
2263 break;
2264 case -NFS4ERR_GRACE:
2265 case -NFS4ERR_DELAY:
2266 ret = nfs4_delay(server->client, &exception->timeout);
2267 if (ret == 0)
2268 exception->retry = 1;
2269 break;
2270 case -NFS4ERR_OLD_STATEID:
2271 if (ret == 0)
2272 exception->retry = 1;
2273 }
2274 /* We failed to handle the error */
2275 return nfs4_map_errors(ret);
2276}
2277
2278int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port)
2279{
2280 nfs4_verifier sc_verifier;
2281 struct nfs4_setclientid setclientid = {
2282 .sc_verifier = &sc_verifier,
2283 .sc_prog = program,
2284 };
2285 struct rpc_message msg = {
2286 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
2287 .rpc_argp = &setclientid,
2288 .rpc_resp = clp,
2289 .rpc_cred = clp->cl_cred,
2290 };
2291 u32 *p;
2292 int loop = 0;
2293 int status;
2294
2295 p = (u32*)sc_verifier.data;
2296 *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
2297 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
2298
2299 for(;;) {
2300 setclientid.sc_name_len = scnprintf(setclientid.sc_name,
2301 sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
2302 clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
2303 clp->cl_cred->cr_ops->cr_name,
2304 clp->cl_id_uniquifier);
2305 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
2306 sizeof(setclientid.sc_netid), "tcp");
2307 setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
2308 sizeof(setclientid.sc_uaddr), "%s.%d.%d",
2309 clp->cl_ipaddr, port >> 8, port & 255);
2310
2311 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
2312 if (status != -NFS4ERR_CLID_INUSE)
2313 break;
2314 if (signalled())
2315 break;
2316 if (loop++ & 1)
2317 ssleep(clp->cl_lease_time + 1);
2318 else
2319 if (++clp->cl_id_uniquifier == 0)
2320 break;
2321 }
2322 return status;
2323}
2324
2325int
2326nfs4_proc_setclientid_confirm(struct nfs4_client *clp)
2327{
2328 struct nfs_fsinfo fsinfo;
2329 struct rpc_message msg = {
2330 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
2331 .rpc_argp = clp,
2332 .rpc_resp = &fsinfo,
2333 .rpc_cred = clp->cl_cred,
2334 };
2335 unsigned long now;
2336 int status;
2337
2338 now = jiffies;
2339 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
2340 if (status == 0) {
2341 spin_lock(&clp->cl_lock);
2342 clp->cl_lease_time = fsinfo.lease_time * HZ;
2343 clp->cl_last_renewal = now;
2344 spin_unlock(&clp->cl_lock);
2345 }
2346 return status;
2347}
2348
2349static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
2350{
2351 struct nfs4_delegreturnargs args = {
2352 .fhandle = NFS_FH(inode),
2353 .stateid = stateid,
2354 };
2355 struct rpc_message msg = {
2356 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
2357 .rpc_argp = &args,
2358 .rpc_cred = cred,
2359 };
2360
2361 return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
2362}
2363
2364int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
2365{
2366 struct nfs_server *server = NFS_SERVER(inode);
2367 struct nfs4_exception exception = { };
2368 int err;
2369 do {
2370 err = _nfs4_proc_delegreturn(inode, cred, stateid);
2371 switch (err) {
2372 case -NFS4ERR_STALE_STATEID:
2373 case -NFS4ERR_EXPIRED:
2374 nfs4_schedule_state_recovery(server->nfs4_state);
2375 case 0:
2376 return 0;
2377 }
2378 err = nfs4_handle_exception(server, err, &exception);
2379 } while (exception.retry);
2380 return err;
2381}
2382
2383#define NFS4_LOCK_MINTIMEOUT (1 * HZ)
2384#define NFS4_LOCK_MAXTIMEOUT (30 * HZ)
2385
2386/*
2387 * sleep, with exponential backoff, and retry the LOCK operation.
2388 */
2389static unsigned long
2390nfs4_set_lock_task_retry(unsigned long timeout)
2391{
2392 current->state = TASK_INTERRUPTIBLE;
2393 schedule_timeout(timeout);
2394 timeout <<= 1;
2395 if (timeout > NFS4_LOCK_MAXTIMEOUT)
2396 return NFS4_LOCK_MAXTIMEOUT;
2397 return timeout;
2398}
2399
2400static inline int
2401nfs4_lck_type(int cmd, struct file_lock *request)
2402{
2403 /* set lock type */
2404 switch (request->fl_type) {
2405 case F_RDLCK:
2406 return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT;
2407 case F_WRLCK:
2408 return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
2409 case F_UNLCK:
2410 return NFS4_WRITE_LT;
2411 }
2412 BUG();
2413 return 0;
2414}
2415
2416static inline uint64_t
2417nfs4_lck_length(struct file_lock *request)
2418{
2419 if (request->fl_end == OFFSET_MAX)
2420 return ~(uint64_t)0;
2421 return request->fl_end - request->fl_start + 1;
2422}
2423
2424static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
2425{
2426 struct inode *inode = state->inode;
2427 struct nfs_server *server = NFS_SERVER(inode);
2428 struct nfs4_client *clp = server->nfs4_state;
2429 struct nfs_lockargs arg = {
2430 .fh = NFS_FH(inode),
2431 .type = nfs4_lck_type(cmd, request),
2432 .offset = request->fl_start,
2433 .length = nfs4_lck_length(request),
2434 };
2435 struct nfs_lockres res = {
2436 .server = server,
2437 };
2438 struct rpc_message msg = {
2439 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT],
2440 .rpc_argp = &arg,
2441 .rpc_resp = &res,
2442 .rpc_cred = state->owner->so_cred,
2443 };
2444 struct nfs_lowner nlo;
2445 struct nfs4_lock_state *lsp;
2446 int status;
2447
2448 down_read(&clp->cl_sem);
2449 nlo.clientid = clp->cl_clientid;
2450 down(&state->lock_sema);
2451 lsp = nfs4_find_lock_state(state, request->fl_owner);
2452 if (lsp)
2453 nlo.id = lsp->ls_id;
2454 else {
2455 spin_lock(&clp->cl_lock);
2456 nlo.id = nfs4_alloc_lockowner_id(clp);
2457 spin_unlock(&clp->cl_lock);
2458 }
2459 arg.u.lockt = &nlo;
2460 status = rpc_call_sync(server->client, &msg, 0);
2461 if (!status) {
2462 request->fl_type = F_UNLCK;
2463 } else if (status == -NFS4ERR_DENIED) {
2464 int64_t len, start, end;
2465 start = res.u.denied.offset;
2466 len = res.u.denied.length;
2467 end = start + len - 1;
2468 if (end < 0 || len == 0)
2469 request->fl_end = OFFSET_MAX;
2470 else
2471 request->fl_end = (loff_t)end;
2472 request->fl_start = (loff_t)start;
2473 request->fl_type = F_WRLCK;
2474 if (res.u.denied.type & 1)
2475 request->fl_type = F_RDLCK;
2476 request->fl_pid = 0;
2477 status = 0;
2478 }
2479 if (lsp)
2480 nfs4_put_lock_state(lsp);
2481 up(&state->lock_sema);
2482 up_read(&clp->cl_sem);
2483 return status;
2484}
2485
2486static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
2487{
2488 struct nfs4_exception exception = { };
2489 int err;
2490
2491 do {
2492 err = nfs4_handle_exception(NFS_SERVER(state->inode),
2493 _nfs4_proc_getlk(state, cmd, request),
2494 &exception);
2495 } while (exception.retry);
2496 return err;
2497}
2498
2499static int do_vfs_lock(struct file *file, struct file_lock *fl)
2500{
2501 int res = 0;
2502 switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
2503 case FL_POSIX:
2504 res = posix_lock_file_wait(file, fl);
2505 break;
2506 case FL_FLOCK:
2507 res = flock_lock_file_wait(file, fl);
2508 break;
2509 default:
2510 BUG();
2511 }
2512 if (res < 0)
2513 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
2514 __FUNCTION__);
2515 return res;
2516}
2517
2518static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
2519{
2520 struct inode *inode = state->inode;
2521 struct nfs_server *server = NFS_SERVER(inode);
2522 struct nfs4_client *clp = server->nfs4_state;
2523 struct nfs_lockargs arg = {
2524 .fh = NFS_FH(inode),
2525 .type = nfs4_lck_type(cmd, request),
2526 .offset = request->fl_start,
2527 .length = nfs4_lck_length(request),
2528 };
2529 struct nfs_lockres res = {
2530 .server = server,
2531 };
2532 struct rpc_message msg = {
2533 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
2534 .rpc_argp = &arg,
2535 .rpc_resp = &res,
2536 .rpc_cred = state->owner->so_cred,
2537 };
2538 struct nfs4_lock_state *lsp;
2539 struct nfs_locku_opargs luargs;
2540 int status = 0;
2541
2542 down_read(&clp->cl_sem);
2543 down(&state->lock_sema);
2544 lsp = nfs4_find_lock_state(state, request->fl_owner);
2545 if (!lsp)
2546 goto out;
2547 /* We might have lost the locks! */
2548 if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
2549 luargs.seqid = lsp->ls_seqid;
2550 memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
2551 arg.u.locku = &luargs;
2552 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
2553 nfs4_increment_lock_seqid(status, lsp);
2554 }
2555
2556 if (status == 0) {
2557 memcpy(&lsp->ls_stateid, &res.u.stateid,
2558 sizeof(lsp->ls_stateid));
2559 nfs4_notify_unlck(state, request, lsp);
2560 }
2561 nfs4_put_lock_state(lsp);
2562out:
2563 up(&state->lock_sema);
2564 if (status == 0)
2565 do_vfs_lock(request->fl_file, request);
2566 up_read(&clp->cl_sem);
2567 return status;
2568}
2569
2570static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
2571{
2572 struct nfs4_exception exception = { };
2573 int err;
2574
2575 do {
2576 err = nfs4_handle_exception(NFS_SERVER(state->inode),
2577 _nfs4_proc_unlck(state, cmd, request),
2578 &exception);
2579 } while (exception.retry);
2580 return err;
2581}
2582
2583static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim)
2584{
2585 struct inode *inode = state->inode;
2586 struct nfs_server *server = NFS_SERVER(inode);
2587 struct nfs4_lock_state *lsp;
2588 struct nfs_lockargs arg = {
2589 .fh = NFS_FH(inode),
2590 .type = nfs4_lck_type(cmd, request),
2591 .offset = request->fl_start,
2592 .length = nfs4_lck_length(request),
2593 };
2594 struct nfs_lockres res = {
2595 .server = server,
2596 };
2597 struct rpc_message msg = {
2598 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
2599 .rpc_argp = &arg,
2600 .rpc_resp = &res,
2601 .rpc_cred = state->owner->so_cred,
2602 };
2603 struct nfs_lock_opargs largs = {
2604 .reclaim = reclaim,
2605 .new_lock_owner = 0,
2606 };
2607 int status;
2608
2609 lsp = nfs4_get_lock_state(state, request->fl_owner);
2610 if (lsp == NULL)
2611 return -ENOMEM;
2612 if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
2613 struct nfs4_state_owner *owner = state->owner;
2614 struct nfs_open_to_lock otl = {
2615 .lock_owner = {
2616 .clientid = server->nfs4_state->cl_clientid,
2617 },
2618 };
2619
2620 otl.lock_seqid = lsp->ls_seqid;
2621 otl.lock_owner.id = lsp->ls_id;
2622 memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid));
2623 largs.u.open_lock = &otl;
2624 largs.new_lock_owner = 1;
2625 arg.u.lock = &largs;
2626 down(&owner->so_sema);
2627 otl.open_seqid = owner->so_seqid;
2628 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
2629 /* increment open_owner seqid on success, and
2630 * seqid mutating errors */
2631 nfs4_increment_seqid(status, owner);
2632 up(&owner->so_sema);
2633 } else {
2634 struct nfs_exist_lock el = {
2635 .seqid = lsp->ls_seqid,
2636 };
2637 memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
2638 largs.u.exist_lock = &el;
2639 largs.new_lock_owner = 0;
2640 arg.u.lock = &largs;
2641 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
2642 }
2643 /* increment seqid on success, and * seqid mutating errors*/
2644 nfs4_increment_lock_seqid(status, lsp);
2645 /* save the returned stateid. */
2646 if (status == 0) {
2647 memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
2648 lsp->ls_flags |= NFS_LOCK_INITIALIZED;
2649 if (!reclaim)
2650 nfs4_notify_setlk(state, request, lsp);
2651 } else if (status == -NFS4ERR_DENIED)
2652 status = -EAGAIN;
2653 nfs4_put_lock_state(lsp);
2654 return status;
2655}
2656
2657static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
2658{
2659 return _nfs4_do_setlk(state, F_SETLK, request, 1);
2660}
2661
2662static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
2663{
2664 return _nfs4_do_setlk(state, F_SETLK, request, 0);
2665}
2666
2667static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
2668{
2669 struct nfs4_client *clp = state->owner->so_client;
2670 int status;
2671
2672 down_read(&clp->cl_sem);
2673 down(&state->lock_sema);
2674 status = _nfs4_do_setlk(state, cmd, request, 0);
2675 up(&state->lock_sema);
2676 if (status == 0) {
2677 /* Note: we always want to sleep here! */
2678 request->fl_flags |= FL_SLEEP;
2679 if (do_vfs_lock(request->fl_file, request) < 0)
2680 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
2681 }
2682 up_read(&clp->cl_sem);
2683 return status;
2684}
2685
2686static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
2687{
2688 struct nfs4_exception exception = { };
2689 int err;
2690
2691 do {
2692 err = nfs4_handle_exception(NFS_SERVER(state->inode),
2693 _nfs4_proc_setlk(state, cmd, request),
2694 &exception);
2695 } while (exception.retry);
2696 return err;
2697}
2698
2699static int
2700nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
2701{
2702 struct nfs_open_context *ctx;
2703 struct nfs4_state *state;
2704 unsigned long timeout = NFS4_LOCK_MINTIMEOUT;
2705 int status;
2706
2707 /* verify open state */
2708 ctx = (struct nfs_open_context *)filp->private_data;
2709 state = ctx->state;
2710
2711 if (request->fl_start < 0 || request->fl_end < 0)
2712 return -EINVAL;
2713
2714 if (IS_GETLK(cmd))
2715 return nfs4_proc_getlk(state, F_GETLK, request);
2716
2717 if (!(IS_SETLK(cmd) || IS_SETLKW(cmd)))
2718 return -EINVAL;
2719
2720 if (request->fl_type == F_UNLCK)
2721 return nfs4_proc_unlck(state, cmd, request);
2722
2723 do {
2724 status = nfs4_proc_setlk(state, cmd, request);
2725 if ((status != -EAGAIN) || IS_SETLK(cmd))
2726 break;
2727 timeout = nfs4_set_lock_task_retry(timeout);
2728 status = -ERESTARTSYS;
2729 if (signalled())
2730 break;
2731 } while(status < 0);
2732
2733 return status;
2734}
2735
2736struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
2737 .recover_open = nfs4_open_reclaim,
2738 .recover_lock = nfs4_lock_reclaim,
2739};
2740
2741struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = {
2742 .recover_open = nfs4_open_expired,
2743 .recover_lock = nfs4_lock_expired,
2744};
2745
2746struct nfs_rpc_ops nfs_v4_clientops = {
2747 .version = 4, /* protocol version */
2748 .dentry_ops = &nfs4_dentry_operations,
2749 .dir_inode_ops = &nfs4_dir_inode_operations,
2750 .getroot = nfs4_proc_get_root,
2751 .getattr = nfs4_proc_getattr,
2752 .setattr = nfs4_proc_setattr,
2753 .lookup = nfs4_proc_lookup,
2754 .access = nfs4_proc_access,
2755 .readlink = nfs4_proc_readlink,
2756 .read = nfs4_proc_read,
2757 .write = nfs4_proc_write,
2758 .commit = nfs4_proc_commit,
2759 .create = nfs4_proc_create,
2760 .remove = nfs4_proc_remove,
2761 .unlink_setup = nfs4_proc_unlink_setup,
2762 .unlink_done = nfs4_proc_unlink_done,
2763 .rename = nfs4_proc_rename,
2764 .link = nfs4_proc_link,
2765 .symlink = nfs4_proc_symlink,
2766 .mkdir = nfs4_proc_mkdir,
2767 .rmdir = nfs4_proc_remove,
2768 .readdir = nfs4_proc_readdir,
2769 .mknod = nfs4_proc_mknod,
2770 .statfs = nfs4_proc_statfs,
2771 .fsinfo = nfs4_proc_fsinfo,
2772 .pathconf = nfs4_proc_pathconf,
2773 .decode_dirent = nfs4_decode_dirent,
2774 .read_setup = nfs4_proc_read_setup,
2775 .write_setup = nfs4_proc_write_setup,
2776 .commit_setup = nfs4_proc_commit_setup,
2777 .file_open = nfs4_proc_file_open,
2778 .file_release = nfs4_proc_file_release,
2779 .lock = nfs4_proc_lock,
2780};
2781
2782/*
2783 * Local variables:
2784 * c-basic-offset: 8
2785 * End:
2786 */
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
new file mode 100644
index 000000000000..667e06f1c647
--- /dev/null
+++ b/fs/nfs/nfs4renewd.c
@@ -0,0 +1,148 @@
1/*
2 * fs/nfs/nfs4renewd.c
3 *
4 * Copyright (c) 2002 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Kendrick Smith <kmsmith@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * Implementation of the NFSv4 "renew daemon", which wakes up periodically to
35 * send a RENEW, to keep state alive on the server. The daemon is implemented
36 * as an rpc_task, not a real kernel thread, so it always runs in rpciod's
37 * context. There is one renewd per nfs_server.
38 *
39 * TODO: If the send queue gets backlogged (e.g., if the server goes down),
40 * we will keep filling the queue with periodic RENEW requests. We need a
41 * mechanism for ensuring that if renewd successfully sends off a request,
42 * then it only wakes up when the request is finished. Maybe use the
43 * child task framework of the RPC layer?
44 */
45
46#include <linux/sched.h>
47#include <linux/smp_lock.h>
48#include <linux/mm.h>
49#include <linux/pagemap.h>
50#include <linux/sunrpc/sched.h>
51#include <linux/sunrpc/clnt.h>
52
53#include <linux/nfs.h>
54#include <linux/nfs4.h>
55#include <linux/nfs_fs.h>
56
57#define NFSDBG_FACILITY NFSDBG_PROC
58
59void
60nfs4_renew_state(void *data)
61{
62 struct nfs4_client *clp = (struct nfs4_client *)data;
63 long lease, timeout;
64 unsigned long last, now;
65
66 down_read(&clp->cl_sem);
67 dprintk("%s: start\n", __FUNCTION__);
68 /* Are there any active superblocks? */
69 if (list_empty(&clp->cl_superblocks))
70 goto out;
71 spin_lock(&clp->cl_lock);
72 lease = clp->cl_lease_time;
73 last = clp->cl_last_renewal;
74 now = jiffies;
75 timeout = (2 * lease) / 3 + (long)last - (long)now;
76 /* Are we close to a lease timeout? */
77 if (time_after(now, last + lease/3)) {
78 spin_unlock(&clp->cl_lock);
79 /* Queue an asynchronous RENEW. */
80 nfs4_proc_async_renew(clp);
81 timeout = (2 * lease) / 3;
82 spin_lock(&clp->cl_lock);
83 } else
84 dprintk("%s: failed to call renewd. Reason: lease not expired \n",
85 __FUNCTION__);
86 if (timeout < 5 * HZ) /* safeguard */
87 timeout = 5 * HZ;
88 dprintk("%s: requeueing work. Lease period = %ld\n",
89 __FUNCTION__, (timeout + HZ - 1) / HZ);
90 cancel_delayed_work(&clp->cl_renewd);
91 schedule_delayed_work(&clp->cl_renewd, timeout);
92 spin_unlock(&clp->cl_lock);
93out:
94 up_read(&clp->cl_sem);
95 dprintk("%s: done\n", __FUNCTION__);
96}
97
98/* Must be called with clp->cl_sem locked for writes */
99void
100nfs4_schedule_state_renewal(struct nfs4_client *clp)
101{
102 long timeout;
103
104 spin_lock(&clp->cl_lock);
105 timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal
106 - (long)jiffies;
107 if (timeout < 5 * HZ)
108 timeout = 5 * HZ;
109 dprintk("%s: requeueing work. Lease period = %ld\n",
110 __FUNCTION__, (timeout + HZ - 1) / HZ);
111 cancel_delayed_work(&clp->cl_renewd);
112 schedule_delayed_work(&clp->cl_renewd, timeout);
113 spin_unlock(&clp->cl_lock);
114}
115
116void
117nfs4_renewd_prepare_shutdown(struct nfs_server *server)
118{
119 struct nfs4_client *clp = server->nfs4_state;
120
121 if (!clp)
122 return;
123 flush_scheduled_work();
124 down_write(&clp->cl_sem);
125 if (!list_empty(&server->nfs4_siblings))
126 list_del_init(&server->nfs4_siblings);
127 up_write(&clp->cl_sem);
128}
129
130/* Must be called with clp->cl_sem locked for writes */
131void
132nfs4_kill_renewd(struct nfs4_client *clp)
133{
134 down_read(&clp->cl_sem);
135 if (!list_empty(&clp->cl_superblocks)) {
136 up_read(&clp->cl_sem);
137 return;
138 }
139 cancel_delayed_work(&clp->cl_renewd);
140 up_read(&clp->cl_sem);
141 flush_scheduled_work();
142}
143
144/*
145 * Local variables:
146 * c-basic-offset: 8
147 * End:
148 */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
new file mode 100644
index 000000000000..231cebce3c87
--- /dev/null
+++ b/fs/nfs/nfs4state.c
@@ -0,0 +1,932 @@
1/*
2 * fs/nfs/nfs4state.c
3 *
4 * Client-side XDR for NFSv4.
5 *
6 * Copyright (c) 2002 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Kendrick Smith <kmsmith@umich.edu>
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
25 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 * Implementation of the NFSv4 state model. For the time being,
37 * this is minimal, but will be made much more complex in a
38 * subsequent patch.
39 */
40
41#include <linux/config.h>
42#include <linux/slab.h>
43#include <linux/smp_lock.h>
44#include <linux/nfs_fs.h>
45#include <linux/nfs_idmap.h>
46#include <linux/workqueue.h>
47#include <linux/bitops.h>
48
49#include "callback.h"
50#include "delegation.h"
51
52#define OPENOWNER_POOL_SIZE 8
53
54static DEFINE_SPINLOCK(state_spinlock);
55
56nfs4_stateid zero_stateid;
57
58#if 0
59nfs4_stateid one_stateid =
60 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
61#endif
62
63static LIST_HEAD(nfs4_clientid_list);
64
65static void nfs4_recover_state(void *);
66extern void nfs4_renew_state(void *);
67
68void
69init_nfsv4_state(struct nfs_server *server)
70{
71 server->nfs4_state = NULL;
72 INIT_LIST_HEAD(&server->nfs4_siblings);
73}
74
75void
76destroy_nfsv4_state(struct nfs_server *server)
77{
78 if (server->mnt_path) {
79 kfree(server->mnt_path);
80 server->mnt_path = NULL;
81 }
82 if (server->nfs4_state) {
83 nfs4_put_client(server->nfs4_state);
84 server->nfs4_state = NULL;
85 }
86}
87
88/*
89 * nfs4_get_client(): returns an empty client structure
90 * nfs4_put_client(): drops reference to client structure
91 *
92 * Since these are allocated/deallocated very rarely, we don't
93 * bother putting them in a slab cache...
94 */
95static struct nfs4_client *
96nfs4_alloc_client(struct in_addr *addr)
97{
98 struct nfs4_client *clp;
99
100 if (nfs_callback_up() < 0)
101 return NULL;
102 if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
103 nfs_callback_down();
104 return NULL;
105 }
106 memset(clp, 0, sizeof(*clp));
107 memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
108 init_rwsem(&clp->cl_sem);
109 INIT_LIST_HEAD(&clp->cl_delegations);
110 INIT_LIST_HEAD(&clp->cl_state_owners);
111 INIT_LIST_HEAD(&clp->cl_unused);
112 spin_lock_init(&clp->cl_lock);
113 atomic_set(&clp->cl_count, 1);
114 INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp);
115 INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
116 INIT_LIST_HEAD(&clp->cl_superblocks);
117 init_waitqueue_head(&clp->cl_waitq);
118 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
119 clp->cl_boot_time = CURRENT_TIME;
120 clp->cl_state = 1 << NFS4CLNT_OK;
121 return clp;
122}
123
124static void
125nfs4_free_client(struct nfs4_client *clp)
126{
127 struct nfs4_state_owner *sp;
128
129 while (!list_empty(&clp->cl_unused)) {
130 sp = list_entry(clp->cl_unused.next,
131 struct nfs4_state_owner,
132 so_list);
133 list_del(&sp->so_list);
134 kfree(sp);
135 }
136 BUG_ON(!list_empty(&clp->cl_state_owners));
137 if (clp->cl_cred)
138 put_rpccred(clp->cl_cred);
139 nfs_idmap_delete(clp);
140 if (clp->cl_rpcclient)
141 rpc_shutdown_client(clp->cl_rpcclient);
142 kfree(clp);
143 nfs_callback_down();
144}
145
146static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
147{
148 struct nfs4_client *clp;
149 list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
150 if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
151 atomic_inc(&clp->cl_count);
152 return clp;
153 }
154 }
155 return NULL;
156}
157
158struct nfs4_client *nfs4_find_client(struct in_addr *addr)
159{
160 struct nfs4_client *clp;
161 spin_lock(&state_spinlock);
162 clp = __nfs4_find_client(addr);
163 spin_unlock(&state_spinlock);
164 return clp;
165}
166
167struct nfs4_client *
168nfs4_get_client(struct in_addr *addr)
169{
170 struct nfs4_client *clp, *new = NULL;
171
172 spin_lock(&state_spinlock);
173 for (;;) {
174 clp = __nfs4_find_client(addr);
175 if (clp != NULL)
176 break;
177 clp = new;
178 if (clp != NULL) {
179 list_add(&clp->cl_servers, &nfs4_clientid_list);
180 new = NULL;
181 break;
182 }
183 spin_unlock(&state_spinlock);
184 new = nfs4_alloc_client(addr);
185 spin_lock(&state_spinlock);
186 if (new == NULL)
187 break;
188 }
189 spin_unlock(&state_spinlock);
190 if (new)
191 nfs4_free_client(new);
192 return clp;
193}
194
195void
196nfs4_put_client(struct nfs4_client *clp)
197{
198 if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
199 return;
200 list_del(&clp->cl_servers);
201 spin_unlock(&state_spinlock);
202 BUG_ON(!list_empty(&clp->cl_superblocks));
203 wake_up_all(&clp->cl_waitq);
204 rpc_wake_up(&clp->cl_rpcwaitq);
205 nfs4_kill_renewd(clp);
206 nfs4_free_client(clp);
207}
208
209static int __nfs4_init_client(struct nfs4_client *clp)
210{
211 int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport);
212 if (status == 0)
213 status = nfs4_proc_setclientid_confirm(clp);
214 if (status == 0)
215 nfs4_schedule_state_renewal(clp);
216 return status;
217}
218
219int nfs4_init_client(struct nfs4_client *clp)
220{
221 return nfs4_map_errors(__nfs4_init_client(clp));
222}
223
224u32
225nfs4_alloc_lockowner_id(struct nfs4_client *clp)
226{
227 return clp->cl_lockowner_id ++;
228}
229
230static struct nfs4_state_owner *
231nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
232{
233 struct nfs4_state_owner *sp = NULL;
234
235 if (!list_empty(&clp->cl_unused)) {
236 sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
237 atomic_inc(&sp->so_count);
238 sp->so_cred = cred;
239 list_move(&sp->so_list, &clp->cl_state_owners);
240 clp->cl_nunused--;
241 }
242 return sp;
243}
244
245static struct nfs4_state_owner *
246nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
247{
248 struct nfs4_state_owner *sp, *res = NULL;
249
250 list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
251 if (sp->so_cred != cred)
252 continue;
253 atomic_inc(&sp->so_count);
254 /* Move to the head of the list */
255 list_move(&sp->so_list, &clp->cl_state_owners);
256 res = sp;
257 break;
258 }
259 return res;
260}
261
262/*
263 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
264 * create a new state_owner.
265 *
266 */
267static struct nfs4_state_owner *
268nfs4_alloc_state_owner(void)
269{
270 struct nfs4_state_owner *sp;
271
272 sp = kmalloc(sizeof(*sp),GFP_KERNEL);
273 if (!sp)
274 return NULL;
275 init_MUTEX(&sp->so_sema);
276 sp->so_seqid = 0; /* arbitrary */
277 INIT_LIST_HEAD(&sp->so_states);
278 INIT_LIST_HEAD(&sp->so_delegations);
279 atomic_set(&sp->so_count, 1);
280 return sp;
281}
282
283void
284nfs4_drop_state_owner(struct nfs4_state_owner *sp)
285{
286 struct nfs4_client *clp = sp->so_client;
287 spin_lock(&clp->cl_lock);
288 list_del_init(&sp->so_list);
289 spin_unlock(&clp->cl_lock);
290}
291
292/*
293 * Note: must be called with clp->cl_sem held in order to prevent races
294 * with reboot recovery!
295 */
296struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
297{
298 struct nfs4_client *clp = server->nfs4_state;
299 struct nfs4_state_owner *sp, *new;
300
301 get_rpccred(cred);
302 new = nfs4_alloc_state_owner();
303 spin_lock(&clp->cl_lock);
304 sp = nfs4_find_state_owner(clp, cred);
305 if (sp == NULL)
306 sp = nfs4_client_grab_unused(clp, cred);
307 if (sp == NULL && new != NULL) {
308 list_add(&new->so_list, &clp->cl_state_owners);
309 new->so_client = clp;
310 new->so_id = nfs4_alloc_lockowner_id(clp);
311 new->so_cred = cred;
312 sp = new;
313 new = NULL;
314 }
315 spin_unlock(&clp->cl_lock);
316 if (new)
317 kfree(new);
318 if (sp != NULL)
319 return sp;
320 put_rpccred(cred);
321 return NULL;
322}
323
324/*
325 * Must be called with clp->cl_sem held in order to avoid races
326 * with state recovery...
327 */
328void nfs4_put_state_owner(struct nfs4_state_owner *sp)
329{
330 struct nfs4_client *clp = sp->so_client;
331 struct rpc_cred *cred = sp->so_cred;
332
333 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
334 return;
335 if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
336 goto out_free;
337 if (list_empty(&sp->so_list))
338 goto out_free;
339 list_move(&sp->so_list, &clp->cl_unused);
340 clp->cl_nunused++;
341 spin_unlock(&clp->cl_lock);
342 put_rpccred(cred);
343 cred = NULL;
344 return;
345out_free:
346 list_del(&sp->so_list);
347 spin_unlock(&clp->cl_lock);
348 put_rpccred(cred);
349 kfree(sp);
350}
351
352static struct nfs4_state *
353nfs4_alloc_open_state(void)
354{
355 struct nfs4_state *state;
356
357 state = kmalloc(sizeof(*state), GFP_KERNEL);
358 if (!state)
359 return NULL;
360 state->state = 0;
361 state->nreaders = 0;
362 state->nwriters = 0;
363 state->flags = 0;
364 memset(state->stateid.data, 0, sizeof(state->stateid.data));
365 atomic_set(&state->count, 1);
366 INIT_LIST_HEAD(&state->lock_states);
367 init_MUTEX(&state->lock_sema);
368 rwlock_init(&state->state_lock);
369 return state;
370}
371
372static struct nfs4_state *
373__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
374{
375 struct nfs_inode *nfsi = NFS_I(inode);
376 struct nfs4_state *state;
377
378 mode &= (FMODE_READ|FMODE_WRITE);
379 list_for_each_entry(state, &nfsi->open_states, inode_states) {
380 if (state->owner->so_cred != cred)
381 continue;
382 if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
383 continue;
384 if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
385 continue;
386 if ((state->state & mode) != mode)
387 continue;
388 atomic_inc(&state->count);
389 if (mode & FMODE_READ)
390 state->nreaders++;
391 if (mode & FMODE_WRITE)
392 state->nwriters++;
393 return state;
394 }
395 return NULL;
396}
397
398static struct nfs4_state *
399__nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
400{
401 struct nfs_inode *nfsi = NFS_I(inode);
402 struct nfs4_state *state;
403
404 list_for_each_entry(state, &nfsi->open_states, inode_states) {
405 /* Is this in the process of being freed? */
406 if (state->nreaders == 0 && state->nwriters == 0)
407 continue;
408 if (state->owner == owner) {
409 atomic_inc(&state->count);
410 return state;
411 }
412 }
413 return NULL;
414}
415
416struct nfs4_state *
417nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
418{
419 struct nfs4_state *state;
420
421 spin_lock(&inode->i_lock);
422 state = __nfs4_find_state(inode, cred, mode);
423 spin_unlock(&inode->i_lock);
424 return state;
425}
426
427static void
428nfs4_free_open_state(struct nfs4_state *state)
429{
430 kfree(state);
431}
432
433struct nfs4_state *
434nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
435{
436 struct nfs4_state *state, *new;
437 struct nfs_inode *nfsi = NFS_I(inode);
438
439 spin_lock(&inode->i_lock);
440 state = __nfs4_find_state_byowner(inode, owner);
441 spin_unlock(&inode->i_lock);
442 if (state)
443 goto out;
444 new = nfs4_alloc_open_state();
445 spin_lock(&inode->i_lock);
446 state = __nfs4_find_state_byowner(inode, owner);
447 if (state == NULL && new != NULL) {
448 state = new;
449 /* Caller *must* be holding owner->so_sem */
450 /* Note: The reclaim code dictates that we add stateless
451 * and read-only stateids to the end of the list */
452 list_add_tail(&state->open_states, &owner->so_states);
453 state->owner = owner;
454 atomic_inc(&owner->so_count);
455 list_add(&state->inode_states, &nfsi->open_states);
456 state->inode = igrab(inode);
457 spin_unlock(&inode->i_lock);
458 } else {
459 spin_unlock(&inode->i_lock);
460 if (new)
461 nfs4_free_open_state(new);
462 }
463out:
464 return state;
465}
466
467/*
468 * Beware! Caller must be holding exactly one
469 * reference to clp->cl_sem and owner->so_sema!
470 */
471void nfs4_put_open_state(struct nfs4_state *state)
472{
473 struct inode *inode = state->inode;
474 struct nfs4_state_owner *owner = state->owner;
475
476 if (!atomic_dec_and_lock(&state->count, &inode->i_lock))
477 return;
478 if (!list_empty(&state->inode_states))
479 list_del(&state->inode_states);
480 spin_unlock(&inode->i_lock);
481 list_del(&state->open_states);
482 iput(inode);
483 BUG_ON (state->state != 0);
484 nfs4_free_open_state(state);
485 nfs4_put_state_owner(owner);
486}
487
488/*
489 * Beware! Caller must be holding no references to clp->cl_sem!
490 * of owner->so_sema!
491 */
492void nfs4_close_state(struct nfs4_state *state, mode_t mode)
493{
494 struct inode *inode = state->inode;
495 struct nfs4_state_owner *owner = state->owner;
496 struct nfs4_client *clp = owner->so_client;
497 int newstate;
498
499 atomic_inc(&owner->so_count);
500 down_read(&clp->cl_sem);
501 down(&owner->so_sema);
502 /* Protect against nfs4_find_state() */
503 spin_lock(&inode->i_lock);
504 if (mode & FMODE_READ)
505 state->nreaders--;
506 if (mode & FMODE_WRITE)
507 state->nwriters--;
508 if (state->nwriters == 0) {
509 if (state->nreaders == 0)
510 list_del_init(&state->inode_states);
511 /* See reclaim code */
512 list_move_tail(&state->open_states, &owner->so_states);
513 }
514 spin_unlock(&inode->i_lock);
515 newstate = 0;
516 if (state->state != 0) {
517 if (state->nreaders)
518 newstate |= FMODE_READ;
519 if (state->nwriters)
520 newstate |= FMODE_WRITE;
521 if (state->state == newstate)
522 goto out;
523 if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
524 return;
525 }
526out:
527 nfs4_put_open_state(state);
528 up(&owner->so_sema);
529 nfs4_put_state_owner(owner);
530 up_read(&clp->cl_sem);
531}
532
533/*
534 * Search the state->lock_states for an existing lock_owner
535 * that is compatible with current->files
536 */
537static struct nfs4_lock_state *
538__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
539{
540 struct nfs4_lock_state *pos;
541 list_for_each_entry(pos, &state->lock_states, ls_locks) {
542 if (pos->ls_owner != fl_owner)
543 continue;
544 atomic_inc(&pos->ls_count);
545 return pos;
546 }
547 return NULL;
548}
549
550struct nfs4_lock_state *
551nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
552{
553 struct nfs4_lock_state *lsp;
554 read_lock(&state->state_lock);
555 lsp = __nfs4_find_lock_state(state, fl_owner);
556 read_unlock(&state->state_lock);
557 return lsp;
558}
559
560/*
561 * Return a compatible lock_state. If no initialized lock_state structure
562 * exists, return an uninitialized one.
563 *
564 * The caller must be holding state->lock_sema
565 */
566static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
567{
568 struct nfs4_lock_state *lsp;
569 struct nfs4_client *clp = state->owner->so_client;
570
571 lsp = kmalloc(sizeof(*lsp), GFP_KERNEL);
572 if (lsp == NULL)
573 return NULL;
574 lsp->ls_flags = 0;
575 lsp->ls_seqid = 0; /* arbitrary */
576 lsp->ls_id = -1;
577 memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
578 atomic_set(&lsp->ls_count, 1);
579 lsp->ls_owner = fl_owner;
580 INIT_LIST_HEAD(&lsp->ls_locks);
581 spin_lock(&clp->cl_lock);
582 lsp->ls_id = nfs4_alloc_lockowner_id(clp);
583 spin_unlock(&clp->cl_lock);
584 return lsp;
585}
586
587/*
588 * Return a compatible lock_state. If no initialized lock_state structure
589 * exists, return an uninitialized one.
590 *
591 * The caller must be holding state->lock_sema and clp->cl_sem
592 */
593struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
594{
595 struct nfs4_lock_state * lsp;
596
597 lsp = nfs4_find_lock_state(state, owner);
598 if (lsp == NULL)
599 lsp = nfs4_alloc_lock_state(state, owner);
600 return lsp;
601}
602
603/*
604 * Byte-range lock aware utility to initialize the stateid of read/write
605 * requests.
606 */
607void
608nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
609{
610 if (test_bit(LK_STATE_IN_USE, &state->flags)) {
611 struct nfs4_lock_state *lsp;
612
613 lsp = nfs4_find_lock_state(state, fl_owner);
614 if (lsp) {
615 memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
616 nfs4_put_lock_state(lsp);
617 return;
618 }
619 }
620 memcpy(dst, &state->stateid, sizeof(*dst));
621}
622
623/*
624* Called with state->lock_sema and clp->cl_sem held.
625*/
626void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
627{
628 if (status == NFS_OK || seqid_mutating_err(-status))
629 lsp->ls_seqid++;
630}
631
632/*
633* Check to see if the request lock (type FL_UNLK) effects the fl lock.
634*
635* fl and request must have the same posix owner
636*
637* return:
638* 0 -> fl not effected by request
639* 1 -> fl consumed by request
640*/
641
642static int
643nfs4_check_unlock(struct file_lock *fl, struct file_lock *request)
644{
645 if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end)
646 return 1;
647 return 0;
648}
649
650/*
651 * Post an initialized lock_state on the state->lock_states list.
652 */
653void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
654{
655 if (!list_empty(&lsp->ls_locks))
656 return;
657 atomic_inc(&lsp->ls_count);
658 write_lock(&state->state_lock);
659 list_add(&lsp->ls_locks, &state->lock_states);
660 set_bit(LK_STATE_IN_USE, &state->flags);
661 write_unlock(&state->state_lock);
662}
663
664/*
665 * to decide to 'reap' lock state:
666 * 1) search i_flock for file_locks with fl.lock_state = to ls.
667 * 2) determine if unlock will consume found lock.
668 * if so, reap
669 *
670 * else, don't reap.
671 *
672 */
673void
674nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
675{
676 struct inode *inode = state->inode;
677 struct file_lock *fl;
678
679 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
680 if (!(fl->fl_flags & FL_POSIX))
681 continue;
682 if (fl->fl_owner != lsp->ls_owner)
683 continue;
684 /* Exit if we find at least one lock which is not consumed */
685 if (nfs4_check_unlock(fl,request) == 0)
686 return;
687 }
688
689 write_lock(&state->state_lock);
690 list_del_init(&lsp->ls_locks);
691 if (list_empty(&state->lock_states))
692 clear_bit(LK_STATE_IN_USE, &state->flags);
693 write_unlock(&state->state_lock);
694 nfs4_put_lock_state(lsp);
695}
696
697/*
698 * Release reference to lock_state, and free it if we see that
699 * it is no longer in use
700 */
701void
702nfs4_put_lock_state(struct nfs4_lock_state *lsp)
703{
704 if (!atomic_dec_and_test(&lsp->ls_count))
705 return;
706 BUG_ON (!list_empty(&lsp->ls_locks));
707 kfree(lsp);
708}
709
710/*
711* Called with sp->so_sema and clp->cl_sem held.
712*
713* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
714* failed with a seqid incrementing error -
715* see comments nfs_fs.h:seqid_mutating_error()
716*/
717void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
718{
719 if (status == NFS_OK || seqid_mutating_err(-status))
720 sp->so_seqid++;
721 /* If the server returns BAD_SEQID, unhash state_owner here */
722 if (status == -NFS4ERR_BAD_SEQID)
723 nfs4_drop_state_owner(sp);
724}
725
726static int reclaimer(void *);
727struct reclaimer_args {
728 struct nfs4_client *clp;
729 struct completion complete;
730};
731
732/*
733 * State recovery routine
734 */
735void
736nfs4_recover_state(void *data)
737{
738 struct nfs4_client *clp = (struct nfs4_client *)data;
739 struct reclaimer_args args = {
740 .clp = clp,
741 };
742 might_sleep();
743
744 init_completion(&args.complete);
745
746 if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0)
747 goto out_failed_clear;
748 wait_for_completion(&args.complete);
749 return;
750out_failed_clear:
751 set_bit(NFS4CLNT_OK, &clp->cl_state);
752 wake_up_all(&clp->cl_waitq);
753 rpc_wake_up(&clp->cl_rpcwaitq);
754}
755
756/*
757 * Schedule a state recovery attempt
758 */
759void
760nfs4_schedule_state_recovery(struct nfs4_client *clp)
761{
762 if (!clp)
763 return;
764 if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state))
765 schedule_work(&clp->cl_recoverd);
766}
767
768static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state)
769{
770 struct inode *inode = state->inode;
771 struct file_lock *fl;
772 int status = 0;
773
774 for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
775 if (!(fl->fl_flags & FL_POSIX))
776 continue;
777 if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
778 continue;
779 status = ops->recover_lock(state, fl);
780 if (status >= 0)
781 continue;
782 switch (status) {
783 default:
784 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
785 __FUNCTION__, status);
786 case -NFS4ERR_EXPIRED:
787 case -NFS4ERR_NO_GRACE:
788 case -NFS4ERR_RECLAIM_BAD:
789 case -NFS4ERR_RECLAIM_CONFLICT:
790 /* kill_proc(fl->fl_owner, SIGLOST, 1); */
791 break;
792 case -NFS4ERR_STALE_CLIENTID:
793 goto out_err;
794 }
795 }
796 return 0;
797out_err:
798 return status;
799}
800
801static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp)
802{
803 struct nfs4_state *state;
804 struct nfs4_lock_state *lock;
805 int status = 0;
806
807 /* Note: we rely on the sp->so_states list being ordered
808 * so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
809 * states first.
810 * This is needed to ensure that the server won't give us any
811 * read delegations that we have to return if, say, we are
812 * recovering after a network partition or a reboot from a
813 * server that doesn't support a grace period.
814 */
815 list_for_each_entry(state, &sp->so_states, open_states) {
816 if (state->state == 0)
817 continue;
818 status = ops->recover_open(sp, state);
819 list_for_each_entry(lock, &state->lock_states, ls_locks)
820 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
821 if (status >= 0) {
822 status = nfs4_reclaim_locks(ops, state);
823 if (status < 0)
824 goto out_err;
825 list_for_each_entry(lock, &state->lock_states, ls_locks) {
826 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
827 printk("%s: Lock reclaim failed!\n",
828 __FUNCTION__);
829 }
830 continue;
831 }
832 switch (status) {
833 default:
834 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
835 __FUNCTION__, status);
836 case -ENOENT:
837 case -NFS4ERR_RECLAIM_BAD:
838 case -NFS4ERR_RECLAIM_CONFLICT:
839 /*
840 * Open state on this file cannot be recovered
841 * All we can do is revert to using the zero stateid.
842 */
843 memset(state->stateid.data, 0,
844 sizeof(state->stateid.data));
845 /* Mark the file as being 'closed' */
846 state->state = 0;
847 break;
848 case -NFS4ERR_EXPIRED:
849 case -NFS4ERR_NO_GRACE:
850 case -NFS4ERR_STALE_CLIENTID:
851 goto out_err;
852 }
853 }
854 return 0;
855out_err:
856 return status;
857}
858
859static int reclaimer(void *ptr)
860{
861 struct reclaimer_args *args = (struct reclaimer_args *)ptr;
862 struct nfs4_client *clp = args->clp;
863 struct nfs4_state_owner *sp;
864 struct nfs4_state_recovery_ops *ops;
865 int status = 0;
866
867 daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr));
868 allow_signal(SIGKILL);
869
870 atomic_inc(&clp->cl_count);
871 complete(&args->complete);
872
873 /* Ensure exclusive access to NFSv4 state */
874 lock_kernel();
875 down_write(&clp->cl_sem);
876 /* Are there any NFS mounts out there? */
877 if (list_empty(&clp->cl_superblocks))
878 goto out;
879restart_loop:
880 status = nfs4_proc_renew(clp);
881 switch (status) {
882 case 0:
883 case -NFS4ERR_CB_PATH_DOWN:
884 goto out;
885 case -NFS4ERR_STALE_CLIENTID:
886 case -NFS4ERR_LEASE_MOVED:
887 ops = &nfs4_reboot_recovery_ops;
888 break;
889 default:
890 ops = &nfs4_network_partition_recovery_ops;
891 };
892 status = __nfs4_init_client(clp);
893 if (status)
894 goto out_error;
895 /* Mark all delegations for reclaim */
896 nfs_delegation_mark_reclaim(clp);
897 /* Note: list is protected by exclusive lock on cl->cl_sem */
898 list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
899 status = nfs4_reclaim_open_state(ops, sp);
900 if (status < 0) {
901 if (status == -NFS4ERR_NO_GRACE) {
902 ops = &nfs4_network_partition_recovery_ops;
903 status = nfs4_reclaim_open_state(ops, sp);
904 }
905 if (status == -NFS4ERR_STALE_CLIENTID)
906 goto restart_loop;
907 if (status == -NFS4ERR_EXPIRED)
908 goto restart_loop;
909 }
910 }
911 nfs_delegation_reap_unclaimed(clp);
912out:
913 set_bit(NFS4CLNT_OK, &clp->cl_state);
914 up_write(&clp->cl_sem);
915 unlock_kernel();
916 wake_up_all(&clp->cl_waitq);
917 rpc_wake_up(&clp->cl_rpcwaitq);
918 if (status == -NFS4ERR_CB_PATH_DOWN)
919 nfs_handle_cb_pathdown(clp);
920 nfs4_put_client(clp);
921 return 0;
922out_error:
923 printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
924 NIPQUAD(clp->cl_addr.s_addr), -status);
925 goto out;
926}
927
928/*
929 * Local variables:
930 * c-basic-offset: 8
931 * End:
932 */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
new file mode 100644
index 000000000000..5f4de05763c9
--- /dev/null
+++ b/fs/nfs/nfs4xdr.c
@@ -0,0 +1,4034 @@
1/*
2 * fs/nfs/nfs4xdr.c
3 *
4 * Client-side XDR for NFSv4.
5 *
6 * Copyright (c) 2002 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Kendrick Smith <kmsmith@umich.edu>
10 * Andy Adamson <andros@umich.edu>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
27 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <linux/param.h>
39#include <linux/time.h>
40#include <linux/mm.h>
41#include <linux/slab.h>
42#include <linux/utsname.h>
43#include <linux/errno.h>
44#include <linux/string.h>
45#include <linux/in.h>
46#include <linux/pagemap.h>
47#include <linux/proc_fs.h>
48#include <linux/kdev_t.h>
49#include <linux/sunrpc/clnt.h>
50#include <linux/nfs.h>
51#include <linux/nfs4.h>
52#include <linux/nfs_fs.h>
53#include <linux/nfs_idmap.h>
54
55#define NFSDBG_FACILITY NFSDBG_XDR
56
57/* Mapping from NFS error code to "errno" error code. */
58#define errno_NFSERR_IO EIO
59
60static int nfs_stat_to_errno(int);
61
62/* NFSv4 COMPOUND tags are only wanted for debugging purposes */
63#ifdef DEBUG
64#define NFS4_MAXTAGLEN 20
65#else
66#define NFS4_MAXTAGLEN 0
67#endif
68
69/* lock,open owner id:
70 * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2)
71 */
72#define owner_id_maxsz (1 + 1)
73#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
74#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
75#define op_encode_hdr_maxsz (1)
76#define op_decode_hdr_maxsz (2)
77#define encode_putfh_maxsz (op_encode_hdr_maxsz + 1 + \
78 (NFS4_FHSIZE >> 2))
79#define decode_putfh_maxsz (op_decode_hdr_maxsz)
80#define encode_putrootfh_maxsz (op_encode_hdr_maxsz)
81#define decode_putrootfh_maxsz (op_decode_hdr_maxsz)
82#define encode_getfh_maxsz (op_encode_hdr_maxsz)
83#define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \
84 ((3+NFS4_FHSIZE) >> 2))
85#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3)
86#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
87#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
88#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz)
89#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \
90 nfs4_fattr_bitmap_maxsz)
91#define encode_savefh_maxsz (op_encode_hdr_maxsz)
92#define decode_savefh_maxsz (op_decode_hdr_maxsz)
93#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2)
94#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11)
95#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
96#define decode_renew_maxsz (op_decode_hdr_maxsz)
97#define encode_setclientid_maxsz \
98 (op_encode_hdr_maxsz + \
99 4 /*server->ip_addr*/ + \
100 1 /*Netid*/ + \
101 6 /*uaddr*/ + \
102 6 + (NFS4_VERIFIER_SIZE >> 2))
103#define decode_setclientid_maxsz \
104 (op_decode_hdr_maxsz + \
105 2 + \
106 1024) /* large value for CLID_INUSE */
107#define encode_setclientid_confirm_maxsz \
108 (op_encode_hdr_maxsz + \
109 3 + (NFS4_VERIFIER_SIZE >> 2))
110#define decode_setclientid_confirm_maxsz \
111 (op_decode_hdr_maxsz)
112#define encode_lookup_maxsz (op_encode_hdr_maxsz + \
113 1 + ((3 + NFS4_FHSIZE) >> 2))
114#define encode_remove_maxsz (op_encode_hdr_maxsz + \
115 nfs4_name_maxsz)
116#define encode_rename_maxsz (op_encode_hdr_maxsz + \
117 2 * nfs4_name_maxsz)
118#define decode_rename_maxsz (op_decode_hdr_maxsz + 5 + 5)
119#define encode_link_maxsz (op_encode_hdr_maxsz + \
120 nfs4_name_maxsz)
121#define decode_link_maxsz (op_decode_hdr_maxsz + 5)
122#define encode_symlink_maxsz (op_encode_hdr_maxsz + \
123 1 + nfs4_name_maxsz + \
124 nfs4_path_maxsz + \
125 nfs4_fattr_bitmap_maxsz)
126#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8)
127#define encode_create_maxsz (op_encode_hdr_maxsz + \
128 2 + nfs4_name_maxsz + \
129 nfs4_fattr_bitmap_maxsz)
130#define decode_create_maxsz (op_decode_hdr_maxsz + 8)
131#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
132#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
133#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */
134#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */
135#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \
136 encode_putfh_maxsz + \
137 op_encode_hdr_maxsz + 7)
138#define NFS4_dec_read_sz (compound_decode_hdr_maxsz + \
139 decode_putfh_maxsz + \
140 op_decode_hdr_maxsz + 2)
141#define NFS4_enc_readlink_sz (compound_encode_hdr_maxsz + \
142 encode_putfh_maxsz + \
143 op_encode_hdr_maxsz)
144#define NFS4_dec_readlink_sz (compound_decode_hdr_maxsz + \
145 decode_putfh_maxsz + \
146 op_decode_hdr_maxsz)
147#define NFS4_enc_readdir_sz (compound_encode_hdr_maxsz + \
148 encode_putfh_maxsz + \
149 op_encode_hdr_maxsz + 9)
150#define NFS4_dec_readdir_sz (compound_decode_hdr_maxsz + \
151 decode_putfh_maxsz + \
152 op_decode_hdr_maxsz + 2)
153#define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \
154 encode_putfh_maxsz + \
155 op_encode_hdr_maxsz + 8)
156#define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \
157 decode_putfh_maxsz + \
158 op_decode_hdr_maxsz + 4)
159#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
160 encode_putfh_maxsz + \
161 op_encode_hdr_maxsz + 3)
162#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
163 decode_putfh_maxsz + \
164 op_decode_hdr_maxsz + 2)
165#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
166 encode_putfh_maxsz + \
167 op_encode_hdr_maxsz + \
168 13 + 3 + 2 + 64 + \
169 encode_getattr_maxsz + \
170 encode_getfh_maxsz)
171#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
172 decode_putfh_maxsz + \
173 op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
174 decode_getattr_maxsz + \
175 decode_getfh_maxsz)
176#define NFS4_enc_open_confirm_sz \
177 (compound_encode_hdr_maxsz + \
178 encode_putfh_maxsz + \
179 op_encode_hdr_maxsz + 5)
180#define NFS4_dec_open_confirm_sz (compound_decode_hdr_maxsz + \
181 decode_putfh_maxsz + \
182 op_decode_hdr_maxsz + 4)
183#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \
184 encode_putfh_maxsz + \
185 op_encode_hdr_maxsz + \
186 11)
187#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
188 decode_putfh_maxsz + \
189 op_decode_hdr_maxsz + \
190 4 + 5 + 2 + 3)
191#define NFS4_enc_open_downgrade_sz \
192 (compound_encode_hdr_maxsz + \
193 encode_putfh_maxsz + \
194 op_encode_hdr_maxsz + 7)
195#define NFS4_dec_open_downgrade_sz \
196 (compound_decode_hdr_maxsz + \
197 decode_putfh_maxsz + \
198 op_decode_hdr_maxsz + 4)
199#define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \
200 encode_putfh_maxsz + \
201 op_encode_hdr_maxsz + 5)
202#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \
203 decode_putfh_maxsz + \
204 op_decode_hdr_maxsz + 4)
205#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \
206 encode_putfh_maxsz + \
207 op_encode_hdr_maxsz + 4 + \
208 nfs4_fattr_bitmap_maxsz + \
209 encode_getattr_maxsz)
210#define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \
211 decode_putfh_maxsz + \
212 op_decode_hdr_maxsz + 3)
213#define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \
214 encode_putfh_maxsz + \
215 encode_fsinfo_maxsz)
216#define NFS4_dec_fsinfo_sz (compound_decode_hdr_maxsz + \
217 decode_putfh_maxsz + \
218 decode_fsinfo_maxsz)
219#define NFS4_enc_renew_sz (compound_encode_hdr_maxsz + \
220 encode_renew_maxsz)
221#define NFS4_dec_renew_sz (compound_decode_hdr_maxsz + \
222 decode_renew_maxsz)
223#define NFS4_enc_setclientid_sz (compound_encode_hdr_maxsz + \
224 encode_setclientid_maxsz)
225#define NFS4_dec_setclientid_sz (compound_decode_hdr_maxsz + \
226 decode_setclientid_maxsz)
227#define NFS4_enc_setclientid_confirm_sz \
228 (compound_encode_hdr_maxsz + \
229 encode_setclientid_confirm_maxsz + \
230 encode_putrootfh_maxsz + \
231 encode_fsinfo_maxsz)
232#define NFS4_dec_setclientid_confirm_sz \
233 (compound_decode_hdr_maxsz + \
234 decode_setclientid_confirm_maxsz + \
235 decode_putrootfh_maxsz + \
236 decode_fsinfo_maxsz)
237#define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \
238 encode_putfh_maxsz + \
239 encode_getattr_maxsz + \
240 op_encode_hdr_maxsz + \
241 1 + 1 + 2 + 2 + \
242 1 + 4 + 1 + 2 + \
243 owner_id_maxsz)
244#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \
245 decode_putfh_maxsz + \
246 decode_getattr_maxsz + \
247 op_decode_hdr_maxsz + \
248 2 + 2 + 1 + 2 + \
249 owner_id_maxsz)
250#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \
251 encode_putfh_maxsz + \
252 encode_getattr_maxsz + \
253 op_encode_hdr_maxsz + \
254 1 + 2 + 2 + 2 + \
255 owner_id_maxsz)
256#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz)
257#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \
258 encode_putfh_maxsz + \
259 encode_getattr_maxsz + \
260 op_encode_hdr_maxsz + \
261 1 + 1 + 4 + 2 + 2)
262#define NFS4_dec_locku_sz (compound_decode_hdr_maxsz + \
263 decode_putfh_maxsz + \
264 decode_getattr_maxsz + \
265 op_decode_hdr_maxsz + 4)
266#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
267 encode_putfh_maxsz + \
268 op_encode_hdr_maxsz + 1)
269#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \
270 decode_putfh_maxsz + \
271 op_decode_hdr_maxsz + 2)
272#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \
273 encode_putfh_maxsz + \
274 encode_getattr_maxsz)
275#define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \
276 decode_putfh_maxsz + \
277 decode_getattr_maxsz)
278#define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \
279 encode_putfh_maxsz + \
280 encode_lookup_maxsz + \
281 encode_getattr_maxsz + \
282 encode_getfh_maxsz)
283#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \
284 decode_putfh_maxsz + \
285 op_decode_hdr_maxsz + \
286 decode_getattr_maxsz + \
287 decode_getfh_maxsz)
288#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
289 encode_putrootfh_maxsz + \
290 encode_getattr_maxsz + \
291 encode_getfh_maxsz)
292#define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \
293 decode_putrootfh_maxsz + \
294 decode_getattr_maxsz + \
295 decode_getfh_maxsz)
296#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
297 encode_putfh_maxsz + \
298 encode_remove_maxsz)
299#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
300 decode_putfh_maxsz + \
301 op_decode_hdr_maxsz + 5)
302#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
303 encode_putfh_maxsz + \
304 encode_savefh_maxsz + \
305 encode_putfh_maxsz + \
306 encode_rename_maxsz)
307#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
308 decode_putfh_maxsz + \
309 decode_savefh_maxsz + \
310 decode_putfh_maxsz + \
311 decode_rename_maxsz)
312#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
313 encode_putfh_maxsz + \
314 encode_savefh_maxsz + \
315 encode_putfh_maxsz + \
316 encode_link_maxsz)
317#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
318 decode_putfh_maxsz + \
319 decode_savefh_maxsz + \
320 decode_putfh_maxsz + \
321 decode_link_maxsz)
322#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
323 encode_putfh_maxsz + \
324 encode_symlink_maxsz + \
325 encode_getattr_maxsz + \
326 encode_getfh_maxsz)
327#define NFS4_dec_symlink_sz (compound_decode_hdr_maxsz + \
328 decode_putfh_maxsz + \
329 decode_symlink_maxsz + \
330 decode_getattr_maxsz + \
331 decode_getfh_maxsz)
332#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
333 encode_putfh_maxsz + \
334 encode_create_maxsz + \
335 encode_getattr_maxsz + \
336 encode_getfh_maxsz)
337#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
338 decode_putfh_maxsz + \
339 decode_create_maxsz + \
340 decode_getattr_maxsz + \
341 decode_getfh_maxsz)
342#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
343 encode_putfh_maxsz + \
344 encode_getattr_maxsz)
345#define NFS4_dec_pathconf_sz (compound_decode_hdr_maxsz + \
346 decode_putfh_maxsz + \
347 decode_getattr_maxsz)
348#define NFS4_enc_statfs_sz (compound_encode_hdr_maxsz + \
349 encode_putfh_maxsz + \
350 encode_getattr_maxsz)
351#define NFS4_dec_statfs_sz (compound_decode_hdr_maxsz + \
352 decode_putfh_maxsz + \
353 op_decode_hdr_maxsz + 12)
354#define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \
355 encode_getattr_maxsz)
356#define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \
357 decode_getattr_maxsz)
358#define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \
359 encode_putfh_maxsz + \
360 encode_delegreturn_maxsz)
361#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
362 decode_delegreturn_maxsz)
363
364static struct {
365 unsigned int mode;
366 unsigned int nfs2type;
367} nfs_type2fmt[] = {
368 { 0, NFNON },
369 { S_IFREG, NFREG },
370 { S_IFDIR, NFDIR },
371 { S_IFBLK, NFBLK },
372 { S_IFCHR, NFCHR },
373 { S_IFLNK, NFLNK },
374 { S_IFSOCK, NFSOCK },
375 { S_IFIFO, NFFIFO },
376 { 0, NFNON },
377 { 0, NFNON },
378};
379
380struct compound_hdr {
381 int32_t status;
382 uint32_t nops;
383 uint32_t taglen;
384 char * tag;
385};
386
387/*
388 * START OF "GENERIC" ENCODE ROUTINES.
389 * These may look a little ugly since they are imported from a "generic"
390 * set of XDR encode/decode routines which are intended to be shared by
391 * all of our NFSv4 implementations (OpenBSD, MacOS X...).
392 *
393 * If the pain of reading these is too great, it should be a straightforward
394 * task to translate them into Linux-specific versions which are more
395 * consistent with the style used in NFSv2/v3...
396 */
397#define WRITE32(n) *p++ = htonl(n)
398#define WRITE64(n) do { \
399 *p++ = htonl((uint32_t)((n) >> 32)); \
400 *p++ = htonl((uint32_t)(n)); \
401} while (0)
402#define WRITEMEM(ptr,nbytes) do { \
403 p = xdr_encode_opaque_fixed(p, ptr, nbytes); \
404} while (0)
405
406#define RESERVE_SPACE(nbytes) do { \
407 p = xdr_reserve_space(xdr, nbytes); \
408 if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
409 BUG_ON(!p); \
410} while (0)
411
412static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
413{
414 uint32_t *p;
415
416 p = xdr_reserve_space(xdr, 4 + len);
417 BUG_ON(p == NULL);
418 xdr_encode_opaque(p, str, len);
419}
420
421static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
422{
423 uint32_t *p;
424
425 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
426 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
427 RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2));
428 WRITE32(hdr->taglen);
429 WRITEMEM(hdr->tag, hdr->taglen);
430 WRITE32(NFS4_MINOR_VERSION);
431 WRITE32(hdr->nops);
432 return 0;
433}
434
435static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
436{
437 uint32_t *p;
438
439 p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
440 BUG_ON(p == NULL);
441 xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE);
442}
443
444static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
445{
446 char owner_name[IDMAP_NAMESZ];
447 char owner_group[IDMAP_NAMESZ];
448 int owner_namelen = 0;
449 int owner_grouplen = 0;
450 uint32_t *p;
451 uint32_t *q;
452 int len;
453 uint32_t bmval0 = 0;
454 uint32_t bmval1 = 0;
455 int status;
456
457 /*
458 * We reserve enough space to write the entire attribute buffer at once.
459 * In the worst-case, this would be
460 * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
461 * = 36 bytes, plus any contribution from variable-length fields
462 * such as owner/group/acl's.
463 */
464 len = 16;
465
466 /* Sigh */
467 if (iap->ia_valid & ATTR_SIZE)
468 len += 8;
469 if (iap->ia_valid & ATTR_MODE)
470 len += 4;
471 if (iap->ia_valid & ATTR_UID) {
472 owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name);
473 if (owner_namelen < 0) {
474 printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
475 iap->ia_uid);
476 /* XXX */
477 strcpy(owner_name, "nobody");
478 owner_namelen = sizeof("nobody") - 1;
479 /* goto out; */
480 }
481 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
482 }
483 if (iap->ia_valid & ATTR_GID) {
484 owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group);
485 if (owner_grouplen < 0) {
486 printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
487 iap->ia_gid);
488 strcpy(owner_group, "nobody");
489 owner_grouplen = sizeof("nobody") - 1;
490 /* goto out; */
491 }
492 len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
493 }
494 if (iap->ia_valid & ATTR_ATIME_SET)
495 len += 16;
496 else if (iap->ia_valid & ATTR_ATIME)
497 len += 4;
498 if (iap->ia_valid & ATTR_MTIME_SET)
499 len += 16;
500 else if (iap->ia_valid & ATTR_MTIME)
501 len += 4;
502 RESERVE_SPACE(len);
503
504 /*
505 * We write the bitmap length now, but leave the bitmap and the attribute
506 * buffer length to be backfilled at the end of this routine.
507 */
508 WRITE32(2);
509 q = p;
510 p += 3;
511
512 if (iap->ia_valid & ATTR_SIZE) {
513 bmval0 |= FATTR4_WORD0_SIZE;
514 WRITE64(iap->ia_size);
515 }
516 if (iap->ia_valid & ATTR_MODE) {
517 bmval1 |= FATTR4_WORD1_MODE;
518 WRITE32(iap->ia_mode);
519 }
520 if (iap->ia_valid & ATTR_UID) {
521 bmval1 |= FATTR4_WORD1_OWNER;
522 WRITE32(owner_namelen);
523 WRITEMEM(owner_name, owner_namelen);
524 }
525 if (iap->ia_valid & ATTR_GID) {
526 bmval1 |= FATTR4_WORD1_OWNER_GROUP;
527 WRITE32(owner_grouplen);
528 WRITEMEM(owner_group, owner_grouplen);
529 }
530 if (iap->ia_valid & ATTR_ATIME_SET) {
531 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
532 WRITE32(NFS4_SET_TO_CLIENT_TIME);
533 WRITE32(0);
534 WRITE32(iap->ia_mtime.tv_sec);
535 WRITE32(iap->ia_mtime.tv_nsec);
536 }
537 else if (iap->ia_valid & ATTR_ATIME) {
538 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
539 WRITE32(NFS4_SET_TO_SERVER_TIME);
540 }
541 if (iap->ia_valid & ATTR_MTIME_SET) {
542 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
543 WRITE32(NFS4_SET_TO_CLIENT_TIME);
544 WRITE32(0);
545 WRITE32(iap->ia_mtime.tv_sec);
546 WRITE32(iap->ia_mtime.tv_nsec);
547 }
548 else if (iap->ia_valid & ATTR_MTIME) {
549 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
550 WRITE32(NFS4_SET_TO_SERVER_TIME);
551 }
552
553 /*
554 * Now we backfill the bitmap and the attribute buffer length.
555 */
556 if (len != ((char *)p - (char *)q) + 4) {
557 printk ("encode_attr: Attr length calculation error! %u != %Zu\n",
558 len, ((char *)p - (char *)q) + 4);
559 BUG();
560 }
561 len = (char *)p - (char *)q - 12;
562 *q++ = htonl(bmval0);
563 *q++ = htonl(bmval1);
564 *q++ = htonl(len);
565
566 status = 0;
567/* out: */
568 return status;
569}
570
571static int encode_access(struct xdr_stream *xdr, u32 access)
572{
573 uint32_t *p;
574
575 RESERVE_SPACE(8);
576 WRITE32(OP_ACCESS);
577 WRITE32(access);
578
579 return 0;
580}
581
582static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
583{
584 uint32_t *p;
585
586 RESERVE_SPACE(8+sizeof(arg->stateid.data));
587 WRITE32(OP_CLOSE);
588 WRITE32(arg->seqid);
589 WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
590
591 return 0;
592}
593
594static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args)
595{
596 uint32_t *p;
597
598 RESERVE_SPACE(16);
599 WRITE32(OP_COMMIT);
600 WRITE64(args->offset);
601 WRITE32(args->count);
602
603 return 0;
604}
605
606static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create)
607{
608 uint32_t *p;
609
610 RESERVE_SPACE(8);
611 WRITE32(OP_CREATE);
612 WRITE32(create->ftype);
613
614 switch (create->ftype) {
615 case NF4LNK:
616 RESERVE_SPACE(4 + create->u.symlink->len);
617 WRITE32(create->u.symlink->len);
618 WRITEMEM(create->u.symlink->name, create->u.symlink->len);
619 break;
620
621 case NF4BLK: case NF4CHR:
622 RESERVE_SPACE(8);
623 WRITE32(create->u.device.specdata1);
624 WRITE32(create->u.device.specdata2);
625 break;
626
627 default:
628 break;
629 }
630
631 RESERVE_SPACE(4 + create->name->len);
632 WRITE32(create->name->len);
633 WRITEMEM(create->name->name, create->name->len);
634
635 return encode_attrs(xdr, create->attrs, create->server);
636}
637
638static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap)
639{
640 uint32_t *p;
641
642 RESERVE_SPACE(12);
643 WRITE32(OP_GETATTR);
644 WRITE32(1);
645 WRITE32(bitmap);
646 return 0;
647}
648
649static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1)
650{
651 uint32_t *p;
652
653 RESERVE_SPACE(16);
654 WRITE32(OP_GETATTR);
655 WRITE32(2);
656 WRITE32(bm0);
657 WRITE32(bm1);
658 return 0;
659}
660
661static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
662{
663 extern u32 nfs4_fattr_bitmap[];
664
665 return encode_getattr_two(xdr,
666 bitmask[0] & nfs4_fattr_bitmap[0],
667 bitmask[1] & nfs4_fattr_bitmap[1]);
668}
669
670static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask)
671{
672 extern u32 nfs4_fsinfo_bitmap[];
673
674 return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
675 bitmask[1] & nfs4_fsinfo_bitmap[1]);
676}
677
678static int encode_getfh(struct xdr_stream *xdr)
679{
680 uint32_t *p;
681
682 RESERVE_SPACE(4);
683 WRITE32(OP_GETFH);
684
685 return 0;
686}
687
688static int encode_link(struct xdr_stream *xdr, const struct qstr *name)
689{
690 uint32_t *p;
691
692 RESERVE_SPACE(8 + name->len);
693 WRITE32(OP_LINK);
694 WRITE32(name->len);
695 WRITEMEM(name->name, name->len);
696
697 return 0;
698}
699
700/*
701 * opcode,type,reclaim,offset,length,new_lock_owner = 32
702 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
703 */
704static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
705{
706 uint32_t *p;
707 struct nfs_lock_opargs *opargs = arg->u.lock;
708
709 RESERVE_SPACE(32);
710 WRITE32(OP_LOCK);
711 WRITE32(arg->type);
712 WRITE32(opargs->reclaim);
713 WRITE64(arg->offset);
714 WRITE64(arg->length);
715 WRITE32(opargs->new_lock_owner);
716 if (opargs->new_lock_owner){
717 struct nfs_open_to_lock *ol = opargs->u.open_lock;
718
719 RESERVE_SPACE(40);
720 WRITE32(ol->open_seqid);
721 WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid));
722 WRITE32(ol->lock_seqid);
723 WRITE64(ol->lock_owner.clientid);
724 WRITE32(4);
725 WRITE32(ol->lock_owner.id);
726 }
727 else {
728 struct nfs_exist_lock *el = opargs->u.exist_lock;
729
730 RESERVE_SPACE(20);
731 WRITEMEM(&el->stateid, sizeof(el->stateid));
732 WRITE32(el->seqid);
733 }
734
735 return 0;
736}
737
738static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
739{
740 uint32_t *p;
741 struct nfs_lowner *opargs = arg->u.lockt;
742
743 RESERVE_SPACE(40);
744 WRITE32(OP_LOCKT);
745 WRITE32(arg->type);
746 WRITE64(arg->offset);
747 WRITE64(arg->length);
748 WRITE64(opargs->clientid);
749 WRITE32(4);
750 WRITE32(opargs->id);
751
752 return 0;
753}
754
755static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
756{
757 uint32_t *p;
758 struct nfs_locku_opargs *opargs = arg->u.locku;
759
760 RESERVE_SPACE(44);
761 WRITE32(OP_LOCKU);
762 WRITE32(arg->type);
763 WRITE32(opargs->seqid);
764 WRITEMEM(&opargs->stateid, sizeof(opargs->stateid));
765 WRITE64(arg->offset);
766 WRITE64(arg->length);
767
768 return 0;
769}
770
771static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name)
772{
773 int len = name->len;
774 uint32_t *p;
775
776 RESERVE_SPACE(8 + len);
777 WRITE32(OP_LOOKUP);
778 WRITE32(len);
779 WRITEMEM(name->name, len);
780
781 return 0;
782}
783
784static void encode_share_access(struct xdr_stream *xdr, int open_flags)
785{
786 uint32_t *p;
787
788 RESERVE_SPACE(8);
789 switch (open_flags & (FMODE_READ|FMODE_WRITE)) {
790 case FMODE_READ:
791 WRITE32(NFS4_SHARE_ACCESS_READ);
792 break;
793 case FMODE_WRITE:
794 WRITE32(NFS4_SHARE_ACCESS_WRITE);
795 break;
796 case FMODE_READ|FMODE_WRITE:
797 WRITE32(NFS4_SHARE_ACCESS_BOTH);
798 break;
799 default:
800 BUG();
801 }
802 WRITE32(0); /* for linux, share_deny = 0 always */
803}
804
805static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
806{
807 uint32_t *p;
808 /*
809 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
810 * owner 4 = 32
811 */
812 RESERVE_SPACE(8);
813 WRITE32(OP_OPEN);
814 WRITE32(arg->seqid);
815 encode_share_access(xdr, arg->open_flags);
816 RESERVE_SPACE(16);
817 WRITE64(arg->clientid);
818 WRITE32(4);
819 WRITE32(arg->id);
820}
821
822static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
823{
824 uint32_t *p;
825
826 RESERVE_SPACE(4);
827 switch(arg->open_flags & O_EXCL) {
828 case 0:
829 WRITE32(NFS4_CREATE_UNCHECKED);
830 encode_attrs(xdr, arg->u.attrs, arg->server);
831 break;
832 default:
833 WRITE32(NFS4_CREATE_EXCLUSIVE);
834 encode_nfs4_verifier(xdr, &arg->u.verifier);
835 }
836}
837
838static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *arg)
839{
840 uint32_t *p;
841
842 RESERVE_SPACE(4);
843 switch (arg->open_flags & O_CREAT) {
844 case 0:
845 WRITE32(NFS4_OPEN_NOCREATE);
846 break;
847 default:
848 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
849 WRITE32(NFS4_OPEN_CREATE);
850 encode_createmode(xdr, arg);
851 }
852}
853
854static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type)
855{
856 uint32_t *p;
857
858 RESERVE_SPACE(4);
859 switch (delegation_type) {
860 case 0:
861 WRITE32(NFS4_OPEN_DELEGATE_NONE);
862 break;
863 case FMODE_READ:
864 WRITE32(NFS4_OPEN_DELEGATE_READ);
865 break;
866 case FMODE_WRITE|FMODE_READ:
867 WRITE32(NFS4_OPEN_DELEGATE_WRITE);
868 break;
869 default:
870 BUG();
871 }
872}
873
874static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *name)
875{
876 uint32_t *p;
877
878 RESERVE_SPACE(4);
879 WRITE32(NFS4_OPEN_CLAIM_NULL);
880 encode_string(xdr, name->len, name->name);
881}
882
883static inline void encode_claim_previous(struct xdr_stream *xdr, int type)
884{
885 uint32_t *p;
886
887 RESERVE_SPACE(4);
888 WRITE32(NFS4_OPEN_CLAIM_PREVIOUS);
889 encode_delegation_type(xdr, type);
890}
891
892static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struct qstr *name, const nfs4_stateid *stateid)
893{
894 uint32_t *p;
895
896 RESERVE_SPACE(4+sizeof(stateid->data));
897 WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
898 WRITEMEM(stateid->data, sizeof(stateid->data));
899 encode_string(xdr, name->len, name->name);
900}
901
902static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg)
903{
904 encode_openhdr(xdr, arg);
905 encode_opentype(xdr, arg);
906 switch (arg->claim) {
907 case NFS4_OPEN_CLAIM_NULL:
908 encode_claim_null(xdr, arg->name);
909 break;
910 case NFS4_OPEN_CLAIM_PREVIOUS:
911 encode_claim_previous(xdr, arg->u.delegation_type);
912 break;
913 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
914 encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation);
915 break;
916 default:
917 BUG();
918 }
919 return 0;
920}
921
922static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg)
923{
924 uint32_t *p;
925
926 RESERVE_SPACE(8+sizeof(arg->stateid.data));
927 WRITE32(OP_OPEN_CONFIRM);
928 WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
929 WRITE32(arg->seqid);
930
931 return 0;
932}
933
934static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
935{
936 uint32_t *p;
937
938 RESERVE_SPACE(8+sizeof(arg->stateid.data));
939 WRITE32(OP_OPEN_DOWNGRADE);
940 WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
941 WRITE32(arg->seqid);
942 encode_share_access(xdr, arg->open_flags);
943 return 0;
944}
945
946static int
947encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh)
948{
949 int len = fh->size;
950 uint32_t *p;
951
952 RESERVE_SPACE(8 + len);
953 WRITE32(OP_PUTFH);
954 WRITE32(len);
955 WRITEMEM(fh->data, len);
956
957 return 0;
958}
959
960static int encode_putrootfh(struct xdr_stream *xdr)
961{
962 uint32_t *p;
963
964 RESERVE_SPACE(4);
965 WRITE32(OP_PUTROOTFH);
966
967 return 0;
968}
969
970static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
971{
972 extern nfs4_stateid zero_stateid;
973 nfs4_stateid stateid;
974 uint32_t *p;
975
976 RESERVE_SPACE(16);
977 if (ctx->state != NULL) {
978 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
979 WRITEMEM(stateid.data, sizeof(stateid.data));
980 } else
981 WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
982}
983
984static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
985{
986 uint32_t *p;
987
988 RESERVE_SPACE(4);
989 WRITE32(OP_READ);
990
991 encode_stateid(xdr, args->context);
992
993 RESERVE_SPACE(12);
994 WRITE64(args->offset);
995 WRITE32(args->count);
996
997 return 0;
998}
999
1000static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
1001{
1002 struct rpc_auth *auth = req->rq_task->tk_auth;
1003 int replen;
1004 uint32_t *p;
1005
1006 RESERVE_SPACE(32+sizeof(nfs4_verifier));
1007 WRITE32(OP_READDIR);
1008 WRITE64(readdir->cookie);
1009 WRITEMEM(readdir->verifier.data, sizeof(readdir->verifier.data));
1010 WRITE32(readdir->count >> 1); /* We're not doing readdirplus */
1011 WRITE32(readdir->count);
1012 WRITE32(2);
1013 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) {
1014 WRITE32(0);
1015 WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID);
1016 } else {
1017 WRITE32(FATTR4_WORD0_FILEID);
1018 WRITE32(0);
1019 }
1020
1021 /* set up reply kvec
1022 * toplevel_status + taglen + rescount + OP_PUTFH + status
1023 * + OP_READDIR + status + verifer(2) = 9
1024 */
1025 replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2;
1026 xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages,
1027 readdir->pgbase, readdir->count);
1028
1029 return 0;
1030}
1031
1032static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
1033{
1034 struct rpc_auth *auth = req->rq_task->tk_auth;
1035 unsigned int replen;
1036 uint32_t *p;
1037
1038 RESERVE_SPACE(4);
1039 WRITE32(OP_READLINK);
1040
1041 /* set up reply kvec
1042 * toplevel_status + taglen + rescount + OP_PUTFH + status
1043 * + OP_READLINK + status + string length = 8
1044 */
1045 replen = (RPC_REPHDRSIZE + auth->au_rslack + 8) << 2;
1046 xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->pages,
1047 readlink->pgbase, readlink->pglen);
1048
1049 return 0;
1050}
1051
1052static int encode_remove(struct xdr_stream *xdr, const struct qstr *name)
1053{
1054 uint32_t *p;
1055
1056 RESERVE_SPACE(8 + name->len);
1057 WRITE32(OP_REMOVE);
1058 WRITE32(name->len);
1059 WRITEMEM(name->name, name->len);
1060
1061 return 0;
1062}
1063
1064static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname)
1065{
1066 uint32_t *p;
1067
1068 RESERVE_SPACE(8 + oldname->len);
1069 WRITE32(OP_RENAME);
1070 WRITE32(oldname->len);
1071 WRITEMEM(oldname->name, oldname->len);
1072
1073 RESERVE_SPACE(4 + newname->len);
1074 WRITE32(newname->len);
1075 WRITEMEM(newname->name, newname->len);
1076
1077 return 0;
1078}
1079
1080static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid)
1081{
1082 uint32_t *p;
1083
1084 RESERVE_SPACE(12);
1085 WRITE32(OP_RENEW);
1086 WRITE64(client_stateid->cl_clientid);
1087
1088 return 0;
1089}
1090
1091static int
1092encode_savefh(struct xdr_stream *xdr)
1093{
1094 uint32_t *p;
1095
1096 RESERVE_SPACE(4);
1097 WRITE32(OP_SAVEFH);
1098
1099 return 0;
1100}
1101
1102static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server)
1103{
1104 int status;
1105 uint32_t *p;
1106
1107 RESERVE_SPACE(4+sizeof(arg->stateid.data));
1108 WRITE32(OP_SETATTR);
1109 WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
1110
1111 if ((status = encode_attrs(xdr, arg->iap, server)))
1112 return status;
1113
1114 return 0;
1115}
1116
1117static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid)
1118{
1119 uint32_t *p;
1120
1121 RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data));
1122 WRITE32(OP_SETCLIENTID);
1123 WRITEMEM(setclientid->sc_verifier->data, sizeof(setclientid->sc_verifier->data));
1124
1125 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
1126 RESERVE_SPACE(4);
1127 WRITE32(setclientid->sc_prog);
1128 encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
1129 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
1130 RESERVE_SPACE(4);
1131 WRITE32(setclientid->sc_cb_ident);
1132
1133 return 0;
1134}
1135
1136static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state)
1137{
1138 uint32_t *p;
1139
1140 RESERVE_SPACE(12 + sizeof(client_state->cl_confirm.data));
1141 WRITE32(OP_SETCLIENTID_CONFIRM);
1142 WRITE64(client_state->cl_clientid);
1143 WRITEMEM(client_state->cl_confirm.data, sizeof(client_state->cl_confirm.data));
1144
1145 return 0;
1146}
1147
1148static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args)
1149{
1150 uint32_t *p;
1151
1152 RESERVE_SPACE(4);
1153 WRITE32(OP_WRITE);
1154
1155 encode_stateid(xdr, args->context);
1156
1157 RESERVE_SPACE(16);
1158 WRITE64(args->offset);
1159 WRITE32(args->stable);
1160 WRITE32(args->count);
1161
1162 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1163
1164 return 0;
1165}
1166
1167static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid)
1168{
1169 uint32_t *p;
1170
1171 RESERVE_SPACE(20);
1172
1173 WRITE32(OP_DELEGRETURN);
1174 WRITEMEM(stateid->data, sizeof(stateid->data));
1175 return 0;
1176
1177}
1178/*
1179 * END OF "GENERIC" ENCODE ROUTINES.
1180 */
1181
1182/*
1183 * Encode an ACCESS request
1184 */
1185static int nfs4_xdr_enc_access(struct rpc_rqst *req, uint32_t *p, const struct nfs4_accessargs *args)
1186{
1187 struct xdr_stream xdr;
1188 struct compound_hdr hdr = {
1189 .nops = 2,
1190 };
1191 int status;
1192
1193 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1194 encode_compound_hdr(&xdr, &hdr);
1195 if ((status = encode_putfh(&xdr, args->fh)) == 0)
1196 status = encode_access(&xdr, args->access);
1197 return status;
1198}
1199
1200/*
1201 * Encode LOOKUP request
1202 */
1203static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_arg *args)
1204{
1205 struct xdr_stream xdr;
1206 struct compound_hdr hdr = {
1207 .nops = 4,
1208 };
1209 int status;
1210
1211 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1212 encode_compound_hdr(&xdr, &hdr);
1213 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
1214 goto out;
1215 if ((status = encode_lookup(&xdr, args->name)) != 0)
1216 goto out;
1217 if ((status = encode_getfh(&xdr)) != 0)
1218 goto out;
1219 status = encode_getfattr(&xdr, args->bitmask);
1220out:
1221 return status;
1222}
1223
1224/*
1225 * Encode LOOKUP_ROOT request
1226 */
1227static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_root_arg *args)
1228{
1229 struct xdr_stream xdr;
1230 struct compound_hdr hdr = {
1231 .nops = 3,
1232 };
1233 int status;
1234
1235 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1236 encode_compound_hdr(&xdr, &hdr);
1237 if ((status = encode_putrootfh(&xdr)) != 0)
1238 goto out;
1239 if ((status = encode_getfh(&xdr)) == 0)
1240 status = encode_getfattr(&xdr, args->bitmask);
1241out:
1242 return status;
1243}
1244
1245/*
1246 * Encode REMOVE request
1247 */
1248static int nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, const struct nfs4_remove_arg *args)
1249{
1250 struct xdr_stream xdr;
1251 struct compound_hdr hdr = {
1252 .nops = 2,
1253 };
1254 int status;
1255
1256 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1257 encode_compound_hdr(&xdr, &hdr);
1258 if ((status = encode_putfh(&xdr, args->fh)) == 0)
1259 status = encode_remove(&xdr, args->name);
1260 return status;
1261}
1262
1263/*
1264 * Encode RENAME request
1265 */
1266static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct nfs4_rename_arg *args)
1267{
1268 struct xdr_stream xdr;
1269 struct compound_hdr hdr = {
1270 .nops = 4,
1271 };
1272 int status;
1273
1274 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1275 encode_compound_hdr(&xdr, &hdr);
1276 if ((status = encode_putfh(&xdr, args->old_dir)) != 0)
1277 goto out;
1278 if ((status = encode_savefh(&xdr)) != 0)
1279 goto out;
1280 if ((status = encode_putfh(&xdr, args->new_dir)) != 0)
1281 goto out;
1282 status = encode_rename(&xdr, args->old_name, args->new_name);
1283out:
1284 return status;
1285}
1286
1287/*
1288 * Encode LINK request
1289 */
1290static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs4_link_arg *args)
1291{
1292 struct xdr_stream xdr;
1293 struct compound_hdr hdr = {
1294 .nops = 4,
1295 };
1296 int status;
1297
1298 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1299 encode_compound_hdr(&xdr, &hdr);
1300 if ((status = encode_putfh(&xdr, args->fh)) != 0)
1301 goto out;
1302 if ((status = encode_savefh(&xdr)) != 0)
1303 goto out;
1304 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
1305 goto out;
1306 status = encode_link(&xdr, args->name);
1307out:
1308 return status;
1309}
1310
1311/*
1312 * Encode CREATE request
1313 */
1314static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args)
1315{
1316 struct xdr_stream xdr;
1317 struct compound_hdr hdr = {
1318 .nops = 4,
1319 };
1320 int status;
1321
1322 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1323 encode_compound_hdr(&xdr, &hdr);
1324 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
1325 goto out;
1326 if ((status = encode_create(&xdr, args)) != 0)
1327 goto out;
1328 if ((status = encode_getfh(&xdr)) != 0)
1329 goto out;
1330 status = encode_getfattr(&xdr, args->bitmask);
1331out:
1332 return status;
1333}
1334
1335/*
1336 * Encode SYMLINK request
1337 */
1338static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args)
1339{
1340 return nfs4_xdr_enc_create(req, p, args);
1341}
1342
1343/*
1344 * Encode GETATTR request
1345 */
1346static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, const struct nfs4_getattr_arg *args)
1347{
1348 struct xdr_stream xdr;
1349 struct compound_hdr hdr = {
1350 .nops = 2,
1351 };
1352 int status;
1353
1354 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1355 encode_compound_hdr(&xdr, &hdr);
1356 if ((status = encode_putfh(&xdr, args->fh)) == 0)
1357 status = encode_getfattr(&xdr, args->bitmask);
1358 return status;
1359}
1360
1361/*
1362 * Encode a CLOSE request
1363 */
1364static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
1365{
1366 struct xdr_stream xdr;
1367 struct compound_hdr hdr = {
1368 .nops = 2,
1369 };
1370 int status;
1371
1372 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1373 encode_compound_hdr(&xdr, &hdr);
1374 status = encode_putfh(&xdr, args->fh);
1375 if(status)
1376 goto out;
1377 status = encode_close(&xdr, args);
1378out:
1379 return status;
1380}
1381
1382/*
1383 * Encode an OPEN request
1384 */
1385static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
1386{
1387 struct xdr_stream xdr;
1388 struct compound_hdr hdr = {
1389 .nops = 4,
1390 };
1391 int status;
1392
1393 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1394 encode_compound_hdr(&xdr, &hdr);
1395 status = encode_putfh(&xdr, args->fh);
1396 if (status)
1397 goto out;
1398 status = encode_open(&xdr, args);
1399 if (status)
1400 goto out;
1401 status = encode_getfh(&xdr);
1402 if (status)
1403 goto out;
1404 status = encode_getfattr(&xdr, args->bitmask);
1405out:
1406 return status;
1407}
1408
1409/*
1410 * Encode an OPEN_CONFIRM request
1411 */
1412static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args)
1413{
1414 struct xdr_stream xdr;
1415 struct compound_hdr hdr = {
1416 .nops = 2,
1417 };
1418 int status;
1419
1420 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1421 encode_compound_hdr(&xdr, &hdr);
1422 status = encode_putfh(&xdr, args->fh);
1423 if(status)
1424 goto out;
1425 status = encode_open_confirm(&xdr, args);
1426out:
1427 return status;
1428}
1429
1430/*
1431 * Encode an OPEN request with no attributes.
1432 */
1433static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
1434{
1435 struct xdr_stream xdr;
1436 struct compound_hdr hdr = {
1437 .nops = 2,
1438 };
1439 int status;
1440
1441 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1442 encode_compound_hdr(&xdr, &hdr);
1443 status = encode_putfh(&xdr, args->fh);
1444 if (status)
1445 goto out;
1446 status = encode_open(&xdr, args);
1447out:
1448 return status;
1449}
1450
1451/*
1452 * Encode an OPEN_DOWNGRADE request
1453 */
1454static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
1455{
1456 struct xdr_stream xdr;
1457 struct compound_hdr hdr = {
1458 .nops = 2,
1459 };
1460 int status;
1461
1462 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1463 encode_compound_hdr(&xdr, &hdr);
1464 status = encode_putfh(&xdr, args->fh);
1465 if (status)
1466 goto out;
1467 status = encode_open_downgrade(&xdr, args);
1468out:
1469 return status;
1470}
1471
1472/*
1473 * Encode a LOCK request
1474 */
1475static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
1476{
1477 struct xdr_stream xdr;
1478 struct compound_hdr hdr = {
1479 .nops = 2,
1480 };
1481 int status;
1482
1483 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1484 encode_compound_hdr(&xdr, &hdr);
1485 status = encode_putfh(&xdr, args->fh);
1486 if(status)
1487 goto out;
1488 status = encode_lock(&xdr, args);
1489out:
1490 return status;
1491}
1492
1493/*
1494 * Encode a LOCKT request
1495 */
1496static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
1497{
1498 struct xdr_stream xdr;
1499 struct compound_hdr hdr = {
1500 .nops = 2,
1501 };
1502 int status;
1503
1504 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1505 encode_compound_hdr(&xdr, &hdr);
1506 status = encode_putfh(&xdr, args->fh);
1507 if(status)
1508 goto out;
1509 status = encode_lockt(&xdr, args);
1510out:
1511 return status;
1512}
1513
1514/*
1515 * Encode a LOCKU request
1516 */
1517static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
1518{
1519 struct xdr_stream xdr;
1520 struct compound_hdr hdr = {
1521 .nops = 2,
1522 };
1523 int status;
1524
1525 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1526 encode_compound_hdr(&xdr, &hdr);
1527 status = encode_putfh(&xdr, args->fh);
1528 if(status)
1529 goto out;
1530 status = encode_locku(&xdr, args);
1531out:
1532 return status;
1533}
1534
1535/*
1536 * Encode a READLINK request
1537 */
1538static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readlink *args)
1539{
1540 struct xdr_stream xdr;
1541 struct compound_hdr hdr = {
1542 .nops = 2,
1543 };
1544 int status;
1545
1546 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1547 encode_compound_hdr(&xdr, &hdr);
1548 status = encode_putfh(&xdr, args->fh);
1549 if(status)
1550 goto out;
1551 status = encode_readlink(&xdr, args, req);
1552out:
1553 return status;
1554}
1555
1556/*
1557 * Encode a READDIR request
1558 */
1559static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readdir_arg *args)
1560{
1561 struct xdr_stream xdr;
1562 struct compound_hdr hdr = {
1563 .nops = 2,
1564 };
1565 int status;
1566
1567 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1568 encode_compound_hdr(&xdr, &hdr);
1569 status = encode_putfh(&xdr, args->fh);
1570 if(status)
1571 goto out;
1572 status = encode_readdir(&xdr, args, req);
1573out:
1574 return status;
1575}
1576
1577/*
1578 * Encode a READ request
1579 */
1580static int nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args)
1581{
1582 struct rpc_auth *auth = req->rq_task->tk_auth;
1583 struct xdr_stream xdr;
1584 struct compound_hdr hdr = {
1585 .nops = 2,
1586 };
1587 int replen, status;
1588
1589 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1590 encode_compound_hdr(&xdr, &hdr);
1591 status = encode_putfh(&xdr, args->fh);
1592 if (status)
1593 goto out;
1594 status = encode_read(&xdr, args);
1595 if (status)
1596 goto out;
1597
1598 /* set up reply kvec
1599 * toplevel status + taglen=0 + rescount + OP_PUTFH + status
1600 * + OP_READ + status + eof + datalen = 9
1601 */
1602 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2;
1603 xdr_inline_pages(&req->rq_rcv_buf, replen,
1604 args->pages, args->pgbase, args->count);
1605out:
1606 return status;
1607}
1608
1609/*
1610 * Encode an SETATTR request
1611 */
1612static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, uint32_t *p, struct nfs_setattrargs *args)
1613
1614{
1615 struct xdr_stream xdr;
1616 struct compound_hdr hdr = {
1617 .nops = 3,
1618 };
1619 int status;
1620
1621 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1622 encode_compound_hdr(&xdr, &hdr);
1623 status = encode_putfh(&xdr, args->fh);
1624 if(status)
1625 goto out;
1626 status = encode_setattr(&xdr, args, args->server);
1627 if(status)
1628 goto out;
1629 status = encode_getfattr(&xdr, args->bitmask);
1630out:
1631 return status;
1632}
1633
1634/*
1635 * Encode a WRITE request
1636 */
1637static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
1638{
1639 struct xdr_stream xdr;
1640 struct compound_hdr hdr = {
1641 .nops = 2,
1642 };
1643 int status;
1644
1645 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1646 encode_compound_hdr(&xdr, &hdr);
1647 status = encode_putfh(&xdr, args->fh);
1648 if (status)
1649 goto out;
1650 status = encode_write(&xdr, args);
1651out:
1652 return status;
1653}
1654
1655/*
1656 * a COMMIT request
1657 */
1658static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
1659{
1660 struct xdr_stream xdr;
1661 struct compound_hdr hdr = {
1662 .nops = 2,
1663 };
1664 int status;
1665
1666 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1667 encode_compound_hdr(&xdr, &hdr);
1668 status = encode_putfh(&xdr, args->fh);
1669 if (status)
1670 goto out;
1671 status = encode_commit(&xdr, args);
1672out:
1673 return status;
1674}
1675
1676/*
1677 * FSINFO request
1678 */
1679static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fsinfo_arg *args)
1680{
1681 struct xdr_stream xdr;
1682 struct compound_hdr hdr = {
1683 .nops = 2,
1684 };
1685 int status;
1686
1687 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1688 encode_compound_hdr(&xdr, &hdr);
1689 status = encode_putfh(&xdr, args->fh);
1690 if (!status)
1691 status = encode_fsinfo(&xdr, args->bitmask);
1692 return status;
1693}
1694
1695/*
1696 * a PATHCONF request
1697 */
1698static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args)
1699{
1700 extern u32 nfs4_pathconf_bitmap[2];
1701 struct xdr_stream xdr;
1702 struct compound_hdr hdr = {
1703 .nops = 2,
1704 };
1705 int status;
1706
1707 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1708 encode_compound_hdr(&xdr, &hdr);
1709 status = encode_putfh(&xdr, args->fh);
1710 if (!status)
1711 status = encode_getattr_one(&xdr,
1712 args->bitmask[0] & nfs4_pathconf_bitmap[0]);
1713 return status;
1714}
1715
1716/*
1717 * a STATFS request
1718 */
1719static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args)
1720{
1721 extern u32 nfs4_statfs_bitmap[];
1722 struct xdr_stream xdr;
1723 struct compound_hdr hdr = {
1724 .nops = 2,
1725 };
1726 int status;
1727
1728 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1729 encode_compound_hdr(&xdr, &hdr);
1730 status = encode_putfh(&xdr, args->fh);
1731 if (status == 0)
1732 status = encode_getattr_two(&xdr,
1733 args->bitmask[0] & nfs4_statfs_bitmap[0],
1734 args->bitmask[1] & nfs4_statfs_bitmap[1]);
1735 return status;
1736}
1737
1738/*
1739 * GETATTR_BITMAP request
1740 */
1741static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const struct nfs_fh *fhandle)
1742{
1743 struct xdr_stream xdr;
1744 struct compound_hdr hdr = {
1745 .nops = 2,
1746 };
1747 int status;
1748
1749 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1750 encode_compound_hdr(&xdr, &hdr);
1751 status = encode_putfh(&xdr, fhandle);
1752 if (status == 0)
1753 status = encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
1754 FATTR4_WORD0_LINK_SUPPORT|
1755 FATTR4_WORD0_SYMLINK_SUPPORT|
1756 FATTR4_WORD0_ACLSUPPORT);
1757 return status;
1758}
1759
1760/*
1761 * a RENEW request
1762 */
1763static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
1764{
1765 struct xdr_stream xdr;
1766 struct compound_hdr hdr = {
1767 .nops = 1,
1768 };
1769
1770 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1771 encode_compound_hdr(&xdr, &hdr);
1772 return encode_renew(&xdr, clp);
1773}
1774
1775/*
1776 * a SETCLIENTID request
1777 */
1778static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nfs4_setclientid *sc)
1779{
1780 struct xdr_stream xdr;
1781 struct compound_hdr hdr = {
1782 .nops = 1,
1783 };
1784
1785 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1786 encode_compound_hdr(&xdr, &hdr);
1787 return encode_setclientid(&xdr, sc);
1788}
1789
1790/*
1791 * a SETCLIENTID_CONFIRM request
1792 */
1793static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
1794{
1795 struct xdr_stream xdr;
1796 struct compound_hdr hdr = {
1797 .nops = 3,
1798 };
1799 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
1800 int status;
1801
1802 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1803 encode_compound_hdr(&xdr, &hdr);
1804 status = encode_setclientid_confirm(&xdr, clp);
1805 if (!status)
1806 status = encode_putrootfh(&xdr);
1807 if (!status)
1808 status = encode_fsinfo(&xdr, lease_bitmap);
1809 return status;
1810}
1811
1812/*
1813 * DELEGRETURN request
1814 */
1815static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const struct nfs4_delegreturnargs *args)
1816{
1817 struct xdr_stream xdr;
1818 struct compound_hdr hdr = {
1819 .nops = 2,
1820 };
1821 int status;
1822
1823 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1824 encode_compound_hdr(&xdr, &hdr);
1825 if ((status = encode_putfh(&xdr, args->fhandle)) == 0)
1826 status = encode_delegreturn(&xdr, args->stateid);
1827 return status;
1828}
1829
1830/*
1831 * START OF "GENERIC" DECODE ROUTINES.
1832 * These may look a little ugly since they are imported from a "generic"
1833 * set of XDR encode/decode routines which are intended to be shared by
1834 * all of our NFSv4 implementations (OpenBSD, MacOS X...).
1835 *
1836 * If the pain of reading these is too great, it should be a straightforward
1837 * task to translate them into Linux-specific versions which are more
1838 * consistent with the style used in NFSv2/v3...
1839 */
1840#define READ32(x) (x) = ntohl(*p++)
1841#define READ64(x) do { \
1842 (x) = (u64)ntohl(*p++) << 32; \
1843 (x) |= ntohl(*p++); \
1844} while (0)
1845#define READTIME(x) do { \
1846 p++; \
1847 (x.tv_sec) = ntohl(*p++); \
1848 (x.tv_nsec) = ntohl(*p++); \
1849} while (0)
1850#define COPYMEM(x,nbytes) do { \
1851 memcpy((x), p, nbytes); \
1852 p += XDR_QUADLEN(nbytes); \
1853} while (0)
1854
1855#define READ_BUF(nbytes) do { \
1856 p = xdr_inline_decode(xdr, nbytes); \
1857 if (!p) { \
1858 printk(KERN_WARNING "%s: reply buffer overflowed in line %d.", \
1859 __FUNCTION__, __LINE__); \
1860 return -EIO; \
1861 } \
1862} while (0)
1863
1864static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string)
1865{
1866 uint32_t *p;
1867
1868 READ_BUF(4);
1869 READ32(*len);
1870 READ_BUF(*len);
1871 *string = (char *)p;
1872 return 0;
1873}
1874
1875static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
1876{
1877 uint32_t *p;
1878
1879 READ_BUF(8);
1880 READ32(hdr->status);
1881 READ32(hdr->taglen);
1882
1883 READ_BUF(hdr->taglen + 4);
1884 hdr->tag = (char *)p;
1885 p += XDR_QUADLEN(hdr->taglen);
1886 READ32(hdr->nops);
1887 return 0;
1888}
1889
1890static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
1891{
1892 uint32_t *p;
1893 uint32_t opnum;
1894 int32_t nfserr;
1895
1896 READ_BUF(8);
1897 READ32(opnum);
1898 if (opnum != expected) {
1899 printk(KERN_NOTICE
1900 "nfs4_decode_op_hdr: Server returned operation"
1901 " %d but we issued a request for %d\n",
1902 opnum, expected);
1903 return -EIO;
1904 }
1905 READ32(nfserr);
1906 if (nfserr != NFS_OK)
1907 return -nfs_stat_to_errno(nfserr);
1908 return 0;
1909}
1910
1911/* Dummy routine */
1912static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
1913{
1914 uint32_t *p;
1915 uint32_t strlen;
1916 char *str;
1917
1918 READ_BUF(12);
1919 return decode_opaque_inline(xdr, &strlen, &str);
1920}
1921
1922static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
1923{
1924 uint32_t bmlen, *p;
1925
1926 READ_BUF(4);
1927 READ32(bmlen);
1928
1929 bitmap[0] = bitmap[1] = 0;
1930 READ_BUF((bmlen << 2));
1931 if (bmlen > 0) {
1932 READ32(bitmap[0]);
1933 if (bmlen > 1)
1934 READ32(bitmap[1]);
1935 }
1936 return 0;
1937}
1938
1939static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, uint32_t **savep)
1940{
1941 uint32_t *p;
1942
1943 READ_BUF(4);
1944 READ32(*attrlen);
1945 *savep = xdr->p;
1946 return 0;
1947}
1948
1949static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
1950{
1951 if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) {
1952 decode_attr_bitmap(xdr, bitmask);
1953 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
1954 } else
1955 bitmask[0] = bitmask[1] = 0;
1956 dprintk("%s: bitmask=0x%x%x\n", __FUNCTION__, bitmask[0], bitmask[1]);
1957 return 0;
1958}
1959
1960static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type)
1961{
1962 uint32_t *p;
1963
1964 *type = 0;
1965 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
1966 return -EIO;
1967 if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
1968 READ_BUF(4);
1969 READ32(*type);
1970 if (*type < NF4REG || *type > NF4NAMEDATTR) {
1971 dprintk("%s: bad type %d\n", __FUNCTION__, *type);
1972 return -EIO;
1973 }
1974 bitmap[0] &= ~FATTR4_WORD0_TYPE;
1975 }
1976 dprintk("%s: type=0%o\n", __FUNCTION__, nfs_type2fmt[*type].nfs2type);
1977 return 0;
1978}
1979
1980static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
1981{
1982 uint32_t *p;
1983
1984 *change = 0;
1985 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
1986 return -EIO;
1987 if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
1988 READ_BUF(8);
1989 READ64(*change);
1990 bitmap[0] &= ~FATTR4_WORD0_CHANGE;
1991 }
1992 dprintk("%s: change attribute=%Lu\n", __FUNCTION__,
1993 (unsigned long long)*change);
1994 return 0;
1995}
1996
1997static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
1998{
1999 uint32_t *p;
2000
2001 *size = 0;
2002 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
2003 return -EIO;
2004 if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
2005 READ_BUF(8);
2006 READ64(*size);
2007 bitmap[0] &= ~FATTR4_WORD0_SIZE;
2008 }
2009 dprintk("%s: file size=%Lu\n", __FUNCTION__, (unsigned long long)*size);
2010 return 0;
2011}
2012
2013static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2014{
2015 uint32_t *p;
2016
2017 *res = 0;
2018 if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
2019 return -EIO;
2020 if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
2021 READ_BUF(4);
2022 READ32(*res);
2023 bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
2024 }
2025 dprintk("%s: link support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true");
2026 return 0;
2027}
2028
2029static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2030{
2031 uint32_t *p;
2032
2033 *res = 0;
2034 if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
2035 return -EIO;
2036 if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
2037 READ_BUF(4);
2038 READ32(*res);
2039 bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
2040 }
2041 dprintk("%s: symlink support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true");
2042 return 0;
2043}
2044
2045static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid)
2046{
2047 uint32_t *p;
2048
2049 fsid->major = 0;
2050 fsid->minor = 0;
2051 if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U)))
2052 return -EIO;
2053 if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
2054 READ_BUF(16);
2055 READ64(fsid->major);
2056 READ64(fsid->minor);
2057 bitmap[0] &= ~FATTR4_WORD0_FSID;
2058 }
2059 dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __FUNCTION__,
2060 (unsigned long long)fsid->major,
2061 (unsigned long long)fsid->minor);
2062 return 0;
2063}
2064
2065static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2066{
2067 uint32_t *p;
2068
2069 *res = 60;
2070 if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
2071 return -EIO;
2072 if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
2073 READ_BUF(4);
2074 READ32(*res);
2075 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
2076 }
2077 dprintk("%s: file size=%u\n", __FUNCTION__, (unsigned int)*res);
2078 return 0;
2079}
2080
2081static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2082{
2083 uint32_t *p;
2084
2085 *res = ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL;
2086 if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
2087 return -EIO;
2088 if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
2089 READ_BUF(4);
2090 READ32(*res);
2091 bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
2092 }
2093 dprintk("%s: ACLs supported=%u\n", __FUNCTION__, (unsigned int)*res);
2094 return 0;
2095}
2096
2097static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
2098{
2099 uint32_t *p;
2100
2101 *fileid = 0;
2102 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
2103 return -EIO;
2104 if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
2105 READ_BUF(8);
2106 READ64(*fileid);
2107 bitmap[0] &= ~FATTR4_WORD0_FILEID;
2108 }
2109 dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid);
2110 return 0;
2111}
2112
2113static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
2114{
2115 uint32_t *p;
2116 int status = 0;
2117
2118 *res = 0;
2119 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U)))
2120 return -EIO;
2121 if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
2122 READ_BUF(8);
2123 READ64(*res);
2124 bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
2125 }
2126 dprintk("%s: files avail=%Lu\n", __FUNCTION__, (unsigned long long)*res);
2127 return status;
2128}
2129
2130static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
2131{
2132 uint32_t *p;
2133 int status = 0;
2134
2135 *res = 0;
2136 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U)))
2137 return -EIO;
2138 if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
2139 READ_BUF(8);
2140 READ64(*res);
2141 bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
2142 }
2143 dprintk("%s: files free=%Lu\n", __FUNCTION__, (unsigned long long)*res);
2144 return status;
2145}
2146
2147static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
2148{
2149 uint32_t *p;
2150 int status = 0;
2151
2152 *res = 0;
2153 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U)))
2154 return -EIO;
2155 if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
2156 READ_BUF(8);
2157 READ64(*res);
2158 bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
2159 }
2160 dprintk("%s: files total=%Lu\n", __FUNCTION__, (unsigned long long)*res);
2161 return status;
2162}
2163
2164static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
2165{
2166 uint32_t *p;
2167 int status = 0;
2168
2169 *res = 0;
2170 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U)))
2171 return -EIO;
2172 if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
2173 READ_BUF(8);
2174 READ64(*res);
2175 bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
2176 }
2177 dprintk("%s: maxfilesize=%Lu\n", __FUNCTION__, (unsigned long long)*res);
2178 return status;
2179}
2180
2181static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
2182{
2183 uint32_t *p;
2184 int status = 0;
2185
2186 *maxlink = 1;
2187 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U)))
2188 return -EIO;
2189 if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
2190 READ_BUF(4);
2191 READ32(*maxlink);
2192 bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
2193 }
2194 dprintk("%s: maxlink=%u\n", __FUNCTION__, *maxlink);
2195 return status;
2196}
2197
2198static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
2199{
2200 uint32_t *p;
2201 int status = 0;
2202
2203 *maxname = 1024;
2204 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U)))
2205 return -EIO;
2206 if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
2207 READ_BUF(4);
2208 READ32(*maxname);
2209 bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
2210 }
2211 dprintk("%s: maxname=%u\n", __FUNCTION__, *maxname);
2212 return status;
2213}
2214
2215static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2216{
2217 uint32_t *p;
2218 int status = 0;
2219
2220 *res = 1024;
2221 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXREAD - 1U)))
2222 return -EIO;
2223 if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) {
2224 uint64_t maxread;
2225 READ_BUF(8);
2226 READ64(maxread);
2227 if (maxread > 0x7FFFFFFF)
2228 maxread = 0x7FFFFFFF;
2229 *res = (uint32_t)maxread;
2230 bitmap[0] &= ~FATTR4_WORD0_MAXREAD;
2231 }
2232 dprintk("%s: maxread=%lu\n", __FUNCTION__, (unsigned long)*res);
2233 return status;
2234}
2235
2236static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2237{
2238 uint32_t *p;
2239 int status = 0;
2240
2241 *res = 1024;
2242 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXWRITE - 1U)))
2243 return -EIO;
2244 if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) {
2245 uint64_t maxwrite;
2246 READ_BUF(8);
2247 READ64(maxwrite);
2248 if (maxwrite > 0x7FFFFFFF)
2249 maxwrite = 0x7FFFFFFF;
2250 *res = (uint32_t)maxwrite;
2251 bitmap[0] &= ~FATTR4_WORD0_MAXWRITE;
2252 }
2253 dprintk("%s: maxwrite=%lu\n", __FUNCTION__, (unsigned long)*res);
2254 return status;
2255}
2256
2257static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode)
2258{
2259 uint32_t *p;
2260
2261 *mode = 0;
2262 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
2263 return -EIO;
2264 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
2265 READ_BUF(4);
2266 READ32(*mode);
2267 *mode &= ~S_IFMT;
2268 bitmap[1] &= ~FATTR4_WORD1_MODE;
2269 }
2270 dprintk("%s: file mode=0%o\n", __FUNCTION__, (unsigned int)*mode);
2271 return 0;
2272}
2273
2274static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
2275{
2276 uint32_t *p;
2277
2278 *nlink = 1;
2279 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
2280 return -EIO;
2281 if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
2282 READ_BUF(4);
2283 READ32(*nlink);
2284 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
2285 }
2286 dprintk("%s: nlink=%u\n", __FUNCTION__, (unsigned int)*nlink);
2287 return 0;
2288}
2289
2290static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid)
2291{
2292 uint32_t len, *p;
2293
2294 *uid = -2;
2295 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
2296 return -EIO;
2297 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
2298 READ_BUF(4);
2299 READ32(len);
2300 READ_BUF(len);
2301 if (len < XDR_MAX_NETOBJ) {
2302 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0)
2303 dprintk("%s: nfs_map_name_to_uid failed!\n",
2304 __FUNCTION__);
2305 } else
2306 printk(KERN_WARNING "%s: name too long (%u)!\n",
2307 __FUNCTION__, len);
2308 bitmap[1] &= ~FATTR4_WORD1_OWNER;
2309 }
2310 dprintk("%s: uid=%d\n", __FUNCTION__, (int)*uid);
2311 return 0;
2312}
2313
2314static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid)
2315{
2316 uint32_t len, *p;
2317
2318 *gid = -2;
2319 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
2320 return -EIO;
2321 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
2322 READ_BUF(4);
2323 READ32(len);
2324 READ_BUF(len);
2325 if (len < XDR_MAX_NETOBJ) {
2326 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0)
2327 dprintk("%s: nfs_map_group_to_gid failed!\n",
2328 __FUNCTION__);
2329 } else
2330 printk(KERN_WARNING "%s: name too long (%u)!\n",
2331 __FUNCTION__, len);
2332 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
2333 }
2334 dprintk("%s: gid=%d\n", __FUNCTION__, (int)*gid);
2335 return 0;
2336}
2337
2338static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
2339{
2340 uint32_t major = 0, minor = 0, *p;
2341
2342 *rdev = MKDEV(0,0);
2343 if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U)))
2344 return -EIO;
2345 if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) {
2346 dev_t tmp;
2347
2348 READ_BUF(8);
2349 READ32(major);
2350 READ32(minor);
2351 tmp = MKDEV(major, minor);
2352 if (MAJOR(tmp) == major && MINOR(tmp) == minor)
2353 *rdev = tmp;
2354 bitmap[1] &= ~ FATTR4_WORD1_RAWDEV;
2355 }
2356 dprintk("%s: rdev=(0x%x:0x%x)\n", __FUNCTION__, major, minor);
2357 return 0;
2358}
2359
2360static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
2361{
2362 uint32_t *p;
2363 int status = 0;
2364
2365 *res = 0;
2366 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U)))
2367 return -EIO;
2368 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
2369 READ_BUF(8);
2370 READ64(*res);
2371 bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
2372 }
2373 dprintk("%s: space avail=%Lu\n", __FUNCTION__, (unsigned long long)*res);
2374 return status;
2375}
2376
2377static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
2378{
2379 uint32_t *p;
2380 int status = 0;
2381
2382 *res = 0;
2383 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U)))
2384 return -EIO;
2385 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
2386 READ_BUF(8);
2387 READ64(*res);
2388 bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
2389 }
2390 dprintk("%s: space free=%Lu\n", __FUNCTION__, (unsigned long long)*res);
2391 return status;
2392}
2393
2394static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
2395{
2396 uint32_t *p;
2397 int status = 0;
2398
2399 *res = 0;
2400 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U)))
2401 return -EIO;
2402 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
2403 READ_BUF(8);
2404 READ64(*res);
2405 bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
2406 }
2407 dprintk("%s: space total=%Lu\n", __FUNCTION__, (unsigned long long)*res);
2408 return status;
2409}
2410
2411static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
2412{
2413 uint32_t *p;
2414
2415 *used = 0;
2416 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
2417 return -EIO;
2418 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
2419 READ_BUF(8);
2420 READ64(*used);
2421 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
2422 }
2423 dprintk("%s: space used=%Lu\n", __FUNCTION__,
2424 (unsigned long long)*used);
2425 return 0;
2426}
2427
2428static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
2429{
2430 uint32_t *p;
2431 uint64_t sec;
2432 uint32_t nsec;
2433
2434 READ_BUF(12);
2435 READ64(sec);
2436 READ32(nsec);
2437 time->tv_sec = (time_t)sec;
2438 time->tv_nsec = (long)nsec;
2439 return 0;
2440}
2441
2442static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
2443{
2444 int status = 0;
2445
2446 time->tv_sec = 0;
2447 time->tv_nsec = 0;
2448 if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_ACCESS - 1U)))
2449 return -EIO;
2450 if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) {
2451 status = decode_attr_time(xdr, time);
2452 bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS;
2453 }
2454 dprintk("%s: atime=%ld\n", __FUNCTION__, (long)time->tv_sec);
2455 return status;
2456}
2457
2458static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
2459{
2460 int status = 0;
2461
2462 time->tv_sec = 0;
2463 time->tv_nsec = 0;
2464 if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_METADATA - 1U)))
2465 return -EIO;
2466 if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) {
2467 status = decode_attr_time(xdr, time);
2468 bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA;
2469 }
2470 dprintk("%s: ctime=%ld\n", __FUNCTION__, (long)time->tv_sec);
2471 return status;
2472}
2473
2474static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
2475{
2476 int status = 0;
2477
2478 time->tv_sec = 0;
2479 time->tv_nsec = 0;
2480 if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_MODIFY - 1U)))
2481 return -EIO;
2482 if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) {
2483 status = decode_attr_time(xdr, time);
2484 bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY;
2485 }
2486 dprintk("%s: mtime=%ld\n", __FUNCTION__, (long)time->tv_sec);
2487 return status;
2488}
2489
2490static int verify_attr_len(struct xdr_stream *xdr, uint32_t *savep, uint32_t attrlen)
2491{
2492 unsigned int attrwords = XDR_QUADLEN(attrlen);
2493 unsigned int nwords = xdr->p - savep;
2494
2495 if (unlikely(attrwords != nwords)) {
2496 printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n",
2497 __FUNCTION__,
2498 attrwords << 2,
2499 (attrwords < nwords) ? '<' : '>',
2500 nwords << 2);
2501 return -EIO;
2502 }
2503 return 0;
2504}
2505
2506static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
2507{
2508 uint32_t *p;
2509
2510 READ_BUF(20);
2511 READ32(cinfo->atomic);
2512 READ64(cinfo->before);
2513 READ64(cinfo->after);
2514 return 0;
2515}
2516
2517static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
2518{
2519 uint32_t *p;
2520 uint32_t supp, acc;
2521 int status;
2522
2523 status = decode_op_hdr(xdr, OP_ACCESS);
2524 if (status)
2525 return status;
2526 READ_BUF(8);
2527 READ32(supp);
2528 READ32(acc);
2529 access->supported = supp;
2530 access->access = acc;
2531 return 0;
2532}
2533
2534static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
2535{
2536 uint32_t *p;
2537 int status;
2538
2539 status = decode_op_hdr(xdr, OP_CLOSE);
2540 if (status)
2541 return status;
2542 READ_BUF(sizeof(res->stateid.data));
2543 COPYMEM(res->stateid.data, sizeof(res->stateid.data));
2544 return 0;
2545}
2546
2547static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
2548{
2549 uint32_t *p;
2550 int status;
2551
2552 status = decode_op_hdr(xdr, OP_COMMIT);
2553 if (status)
2554 return status;
2555 READ_BUF(8);
2556 COPYMEM(res->verf->verifier, 8);
2557 return 0;
2558}
2559
2560static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
2561{
2562 uint32_t *p;
2563 uint32_t bmlen;
2564 int status;
2565
2566 status = decode_op_hdr(xdr, OP_CREATE);
2567 if (status)
2568 return status;
2569 if ((status = decode_change_info(xdr, cinfo)))
2570 return status;
2571 READ_BUF(4);
2572 READ32(bmlen);
2573 READ_BUF(bmlen << 2);
2574 return 0;
2575}
2576
2577static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
2578{
2579 uint32_t *savep;
2580 uint32_t attrlen,
2581 bitmap[2] = {0};
2582 int status;
2583
2584 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
2585 goto xdr_error;
2586 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
2587 goto xdr_error;
2588 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
2589 goto xdr_error;
2590 if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0)
2591 goto xdr_error;
2592 if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0)
2593 goto xdr_error;
2594 if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0)
2595 goto xdr_error;
2596 if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0)
2597 goto xdr_error;
2598 status = verify_attr_len(xdr, savep, attrlen);
2599xdr_error:
2600 if (status != 0)
2601 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2602 return status;
2603}
2604
2605static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
2606{
2607 uint32_t *savep;
2608 uint32_t attrlen,
2609 bitmap[2] = {0};
2610 int status;
2611
2612 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
2613 goto xdr_error;
2614 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
2615 goto xdr_error;
2616 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
2617 goto xdr_error;
2618
2619 if ((status = decode_attr_files_avail(xdr, bitmap, &fsstat->afiles)) != 0)
2620 goto xdr_error;
2621 if ((status = decode_attr_files_free(xdr, bitmap, &fsstat->ffiles)) != 0)
2622 goto xdr_error;
2623 if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0)
2624 goto xdr_error;
2625 if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0)
2626 goto xdr_error;
2627 if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0)
2628 goto xdr_error;
2629 if ((status = decode_attr_space_total(xdr, bitmap, &fsstat->tbytes)) != 0)
2630 goto xdr_error;
2631
2632 status = verify_attr_len(xdr, savep, attrlen);
2633xdr_error:
2634 if (status != 0)
2635 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2636 return status;
2637}
2638
2639static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
2640{
2641 uint32_t *savep;
2642 uint32_t attrlen,
2643 bitmap[2] = {0};
2644 int status;
2645
2646 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
2647 goto xdr_error;
2648 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
2649 goto xdr_error;
2650 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
2651 goto xdr_error;
2652
2653 if ((status = decode_attr_maxlink(xdr, bitmap, &pathconf->max_link)) != 0)
2654 goto xdr_error;
2655 if ((status = decode_attr_maxname(xdr, bitmap, &pathconf->max_namelen)) != 0)
2656 goto xdr_error;
2657
2658 status = verify_attr_len(xdr, savep, attrlen);
2659xdr_error:
2660 if (status != 0)
2661 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2662 return status;
2663}
2664
2665static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server)
2666{
2667 uint32_t *savep;
2668 uint32_t attrlen,
2669 bitmap[2] = {0},
2670 type;
2671 int status, fmode = 0;
2672
2673 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
2674 goto xdr_error;
2675 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
2676 goto xdr_error;
2677
2678 fattr->bitmap[0] = bitmap[0];
2679 fattr->bitmap[1] = bitmap[1];
2680
2681 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
2682 goto xdr_error;
2683
2684
2685 if ((status = decode_attr_type(xdr, bitmap, &type)) != 0)
2686 goto xdr_error;
2687 fattr->type = nfs_type2fmt[type].nfs2type;
2688 fmode = nfs_type2fmt[type].mode;
2689
2690 if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0)
2691 goto xdr_error;
2692 if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0)
2693 goto xdr_error;
2694 if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0)
2695 goto xdr_error;
2696 if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0)
2697 goto xdr_error;
2698 if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0)
2699 goto xdr_error;
2700 fattr->mode |= fmode;
2701 if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
2702 goto xdr_error;
2703 if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0)
2704 goto xdr_error;
2705 if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0)
2706 goto xdr_error;
2707 if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
2708 goto xdr_error;
2709 if ((status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used)) != 0)
2710 goto xdr_error;
2711 if ((status = decode_attr_time_access(xdr, bitmap, &fattr->atime)) != 0)
2712 goto xdr_error;
2713 if ((status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime)) != 0)
2714 goto xdr_error;
2715 if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
2716 goto xdr_error;
2717 if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) {
2718 fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
2719 fattr->timestamp = jiffies;
2720 }
2721xdr_error:
2722 if (status != 0)
2723 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2724 return status;
2725}
2726
2727
2728static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
2729{
2730 uint32_t *savep;
2731 uint32_t attrlen, bitmap[2];
2732 int status;
2733
2734 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
2735 goto xdr_error;
2736 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
2737 goto xdr_error;
2738 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
2739 goto xdr_error;
2740
2741 fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */
2742
2743 if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
2744 goto xdr_error;
2745 if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0)
2746 goto xdr_error;
2747 if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0)
2748 goto xdr_error;
2749 fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax;
2750 if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
2751 goto xdr_error;
2752 fsinfo->wtpref = fsinfo->wtmax;
2753
2754 status = verify_attr_len(xdr, savep, attrlen);
2755xdr_error:
2756 if (status != 0)
2757 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2758 return status;
2759}
2760
2761static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
2762{
2763 uint32_t *p;
2764 uint32_t len;
2765 int status;
2766
2767 status = decode_op_hdr(xdr, OP_GETFH);
2768 if (status)
2769 return status;
2770 /* Zero handle first to allow comparisons */
2771 memset(fh, 0, sizeof(*fh));
2772
2773 READ_BUF(4);
2774 READ32(len);
2775 if (len > NFS4_FHSIZE)
2776 return -EIO;
2777 fh->size = len;
2778 READ_BUF(len);
2779 COPYMEM(fh->data, len);
2780 return 0;
2781}
2782
2783static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
2784{
2785 int status;
2786
2787 status = decode_op_hdr(xdr, OP_LINK);
2788 if (status)
2789 return status;
2790 return decode_change_info(xdr, cinfo);
2791}
2792
2793/*
2794 * We create the owner, so we know a proper owner.id length is 4.
2795 */
2796static int decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied)
2797{
2798 uint32_t *p;
2799 uint32_t namelen;
2800
2801 READ_BUF(32);
2802 READ64(denied->offset);
2803 READ64(denied->length);
2804 READ32(denied->type);
2805 READ64(denied->owner.clientid);
2806 READ32(namelen);
2807 READ_BUF(namelen);
2808 if (namelen == 4)
2809 READ32(denied->owner.id);
2810 return -NFS4ERR_DENIED;
2811}
2812
2813static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res)
2814{
2815 uint32_t *p;
2816 int status;
2817
2818 status = decode_op_hdr(xdr, OP_LOCK);
2819 if (status == 0) {
2820 READ_BUF(sizeof(nfs4_stateid));
2821 COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
2822 } else if (status == -NFS4ERR_DENIED)
2823 return decode_lock_denied(xdr, &res->u.denied);
2824 return status;
2825}
2826
2827static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res)
2828{
2829 int status;
2830 status = decode_op_hdr(xdr, OP_LOCKT);
2831 if (status == -NFS4ERR_DENIED)
2832 return decode_lock_denied(xdr, &res->u.denied);
2833 return status;
2834}
2835
2836static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res)
2837{
2838 uint32_t *p;
2839 int status;
2840
2841 status = decode_op_hdr(xdr, OP_LOCKU);
2842 if (status == 0) {
2843 READ_BUF(sizeof(nfs4_stateid));
2844 COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
2845 }
2846 return status;
2847}
2848
2849static int decode_lookup(struct xdr_stream *xdr)
2850{
2851 return decode_op_hdr(xdr, OP_LOOKUP);
2852}
2853
2854/* This is too sick! */
2855static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
2856{
2857 uint32_t *p;
2858 uint32_t limit_type, nblocks, blocksize;
2859
2860 READ_BUF(12);
2861 READ32(limit_type);
2862 switch (limit_type) {
2863 case 1:
2864 READ64(*maxsize);
2865 break;
2866 case 2:
2867 READ32(nblocks);
2868 READ32(blocksize);
2869 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
2870 }
2871 return 0;
2872}
2873
2874static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
2875{
2876 uint32_t *p;
2877 uint32_t delegation_type;
2878
2879 READ_BUF(4);
2880 READ32(delegation_type);
2881 if (delegation_type == NFS4_OPEN_DELEGATE_NONE) {
2882 res->delegation_type = 0;
2883 return 0;
2884 }
2885 READ_BUF(20);
2886 COPYMEM(res->delegation.data, sizeof(res->delegation.data));
2887 READ32(res->do_recall);
2888 switch (delegation_type) {
2889 case NFS4_OPEN_DELEGATE_READ:
2890 res->delegation_type = FMODE_READ;
2891 break;
2892 case NFS4_OPEN_DELEGATE_WRITE:
2893 res->delegation_type = FMODE_WRITE|FMODE_READ;
2894 if (decode_space_limit(xdr, &res->maxsize) < 0)
2895 return -EIO;
2896 }
2897 return decode_ace(xdr, NULL, res->server->nfs4_state);
2898}
2899
2900static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
2901{
2902 uint32_t *p;
2903 uint32_t bmlen;
2904 int status;
2905
2906 status = decode_op_hdr(xdr, OP_OPEN);
2907 if (status)
2908 return status;
2909 READ_BUF(sizeof(res->stateid.data));
2910 COPYMEM(res->stateid.data, sizeof(res->stateid.data));
2911
2912 decode_change_info(xdr, &res->cinfo);
2913
2914 READ_BUF(8);
2915 READ32(res->rflags);
2916 READ32(bmlen);
2917 if (bmlen > 10)
2918 goto xdr_error;
2919
2920 READ_BUF(bmlen << 2);
2921 p += bmlen;
2922 return decode_delegation(xdr, res);
2923xdr_error:
2924 printk(KERN_NOTICE "%s: xdr error!\n", __FUNCTION__);
2925 return -EIO;
2926}
2927
2928static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
2929{
2930 uint32_t *p;
2931 int status;
2932
2933 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
2934 if (status)
2935 return status;
2936 READ_BUF(sizeof(res->stateid.data));
2937 COPYMEM(res->stateid.data, sizeof(res->stateid.data));
2938 return 0;
2939}
2940
2941static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
2942{
2943 uint32_t *p;
2944 int status;
2945
2946 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
2947 if (status)
2948 return status;
2949 READ_BUF(sizeof(res->stateid.data));
2950 COPYMEM(res->stateid.data, sizeof(res->stateid.data));
2951 return 0;
2952}
2953
2954static int decode_putfh(struct xdr_stream *xdr)
2955{
2956 return decode_op_hdr(xdr, OP_PUTFH);
2957}
2958
2959static int decode_putrootfh(struct xdr_stream *xdr)
2960{
2961 return decode_op_hdr(xdr, OP_PUTROOTFH);
2962}
2963
2964static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
2965{
2966 struct kvec *iov = req->rq_rcv_buf.head;
2967 uint32_t *p;
2968 uint32_t count, eof, recvd, hdrlen;
2969 int status;
2970
2971 status = decode_op_hdr(xdr, OP_READ);
2972 if (status)
2973 return status;
2974 READ_BUF(8);
2975 READ32(eof);
2976 READ32(count);
2977 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
2978 recvd = req->rq_rcv_buf.len - hdrlen;
2979 if (count > recvd) {
2980 printk(KERN_WARNING "NFS: server cheating in read reply: "
2981 "count %u > recvd %u\n", count, recvd);
2982 count = recvd;
2983 eof = 0;
2984 }
2985 xdr_read_pages(xdr, count);
2986 res->eof = eof;
2987 res->count = count;
2988 return 0;
2989}
2990
2991static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
2992{
2993 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
2994 struct page *page = *rcvbuf->pages;
2995 struct kvec *iov = rcvbuf->head;
2996 unsigned int nr, pglen = rcvbuf->page_len;
2997 uint32_t *end, *entry, *p, *kaddr;
2998 uint32_t len, attrlen;
2999 int hdrlen, recvd, status;
3000
3001 status = decode_op_hdr(xdr, OP_READDIR);
3002 if (status)
3003 return status;
3004 READ_BUF(8);
3005 COPYMEM(readdir->verifier.data, 8);
3006
3007 hdrlen = (char *) p - (char *) iov->iov_base;
3008 recvd = rcvbuf->len - hdrlen;
3009 if (pglen > recvd)
3010 pglen = recvd;
3011 xdr_read_pages(xdr, pglen);
3012
3013 BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
3014 kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
3015 end = (uint32_t *) ((char *)p + pglen + readdir->pgbase);
3016 entry = p;
3017 for (nr = 0; *p++; nr++) {
3018 if (p + 3 > end)
3019 goto short_pkt;
3020 p += 2; /* cookie */
3021 len = ntohl(*p++); /* filename length */
3022 if (len > NFS4_MAXNAMLEN) {
3023 printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
3024 goto err_unmap;
3025 }
3026 p += XDR_QUADLEN(len);
3027 if (p + 1 > end)
3028 goto short_pkt;
3029 len = ntohl(*p++); /* bitmap length */
3030 p += len;
3031 if (p + 1 > end)
3032 goto short_pkt;
3033 attrlen = XDR_QUADLEN(ntohl(*p++));
3034 p += attrlen; /* attributes */
3035 if (p + 2 > end)
3036 goto short_pkt;
3037 entry = p;
3038 }
3039 if (!nr && (entry[0] != 0 || entry[1] == 0))
3040 goto short_pkt;
3041out:
3042 kunmap_atomic(kaddr, KM_USER0);
3043 return 0;
3044short_pkt:
3045 entry[0] = entry[1] = 0;
3046 /* truncate listing ? */
3047 if (!nr) {
3048 printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
3049 entry[1] = 1;
3050 }
3051 goto out;
3052err_unmap:
3053 kunmap_atomic(kaddr, KM_USER0);
3054 return -errno_NFSERR_IO;
3055}
3056
3057static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
3058{
3059 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
3060 struct kvec *iov = rcvbuf->head;
3061 int hdrlen, len, recvd;
3062 uint32_t *p;
3063 char *kaddr;
3064 int status;
3065
3066 status = decode_op_hdr(xdr, OP_READLINK);
3067 if (status)
3068 return status;
3069
3070 /* Convert length of symlink */
3071 READ_BUF(4);
3072 READ32(len);
3073 if (len >= rcvbuf->page_len || len <= 0) {
3074 dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
3075 return -ENAMETOOLONG;
3076 }
3077 hdrlen = (char *) xdr->p - (char *) iov->iov_base;
3078 recvd = req->rq_rcv_buf.len - hdrlen;
3079 if (recvd < len) {
3080 printk(KERN_WARNING "NFS: server cheating in readlink reply: "
3081 "count %u > recvd %u\n", len, recvd);
3082 return -EIO;
3083 }
3084 xdr_read_pages(xdr, len);
3085 /*
3086 * The XDR encode routine has set things up so that
3087 * the link text will be copied directly into the
3088 * buffer. We just have to do overflow-checking,
3089 * and and null-terminate the text (the VFS expects
3090 * null-termination).
3091 */
3092 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
3093 kaddr[len+rcvbuf->page_base] = '\0';
3094 kunmap_atomic(kaddr, KM_USER0);
3095 return 0;
3096}
3097
3098static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
3099{
3100 int status;
3101
3102 status = decode_op_hdr(xdr, OP_REMOVE);
3103 if (status)
3104 goto out;
3105 status = decode_change_info(xdr, cinfo);
3106out:
3107 return status;
3108}
3109
3110static int decode_rename(struct xdr_stream *xdr, struct nfs4_change_info *old_cinfo,
3111 struct nfs4_change_info *new_cinfo)
3112{
3113 int status;
3114
3115 status = decode_op_hdr(xdr, OP_RENAME);
3116 if (status)
3117 goto out;
3118 if ((status = decode_change_info(xdr, old_cinfo)))
3119 goto out;
3120 status = decode_change_info(xdr, new_cinfo);
3121out:
3122 return status;
3123}
3124
3125static int decode_renew(struct xdr_stream *xdr)
3126{
3127 return decode_op_hdr(xdr, OP_RENEW);
3128}
3129
3130static int
3131decode_savefh(struct xdr_stream *xdr)
3132{
3133 return decode_op_hdr(xdr, OP_SAVEFH);
3134}
3135
3136static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
3137{
3138 uint32_t *p;
3139 uint32_t bmlen;
3140 int status;
3141
3142
3143 status = decode_op_hdr(xdr, OP_SETATTR);
3144 if (status)
3145 return status;
3146 READ_BUF(4);
3147 READ32(bmlen);
3148 READ_BUF(bmlen << 2);
3149 return 0;
3150}
3151
3152static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
3153{
3154 uint32_t *p;
3155 uint32_t opnum;
3156 int32_t nfserr;
3157
3158 READ_BUF(8);
3159 READ32(opnum);
3160 if (opnum != OP_SETCLIENTID) {
3161 printk(KERN_NOTICE
3162 "nfs4_decode_setclientid: Server returned operation"
3163 " %d\n", opnum);
3164 return -EIO;
3165 }
3166 READ32(nfserr);
3167 if (nfserr == NFS_OK) {
3168 READ_BUF(8 + sizeof(clp->cl_confirm.data));
3169 READ64(clp->cl_clientid);
3170 COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data));
3171 } else if (nfserr == NFSERR_CLID_INUSE) {
3172 uint32_t len;
3173
3174 /* skip netid string */
3175 READ_BUF(4);
3176 READ32(len);
3177 READ_BUF(len);
3178
3179 /* skip uaddr string */
3180 READ_BUF(4);
3181 READ32(len);
3182 READ_BUF(len);
3183 return -NFSERR_CLID_INUSE;
3184 } else
3185 return -nfs_stat_to_errno(nfserr);
3186
3187 return 0;
3188}
3189
3190static int decode_setclientid_confirm(struct xdr_stream *xdr)
3191{
3192 return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
3193}
3194
3195static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
3196{
3197 uint32_t *p;
3198 int status;
3199
3200 status = decode_op_hdr(xdr, OP_WRITE);
3201 if (status)
3202 return status;
3203
3204 READ_BUF(16);
3205 READ32(res->count);
3206 READ32(res->verf->committed);
3207 COPYMEM(res->verf->verifier, 8);
3208 return 0;
3209}
3210
3211static int decode_delegreturn(struct xdr_stream *xdr)
3212{
3213 return decode_op_hdr(xdr, OP_DELEGRETURN);
3214}
3215
3216/*
3217 * Decode OPEN_DOWNGRADE response
3218 */
3219static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
3220{
3221 struct xdr_stream xdr;
3222 struct compound_hdr hdr;
3223 int status;
3224
3225 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3226 status = decode_compound_hdr(&xdr, &hdr);
3227 if (status)
3228 goto out;
3229 status = decode_putfh(&xdr);
3230 if (status)
3231 goto out;
3232 status = decode_open_downgrade(&xdr, res);
3233out:
3234 return status;
3235}
3236
3237/*
3238 * END OF "GENERIC" DECODE ROUTINES.
3239 */
3240
3241/*
3242 * Decode ACCESS response
3243 */
3244static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_accessres *res)
3245{
3246 struct xdr_stream xdr;
3247 struct compound_hdr hdr;
3248 int status;
3249
3250 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3251 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3252 goto out;
3253 if ((status = decode_putfh(&xdr)) == 0)
3254 status = decode_access(&xdr, res);
3255out:
3256 return status;
3257}
3258
3259/*
3260 * Decode LOOKUP response
3261 */
3262static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res)
3263{
3264 struct xdr_stream xdr;
3265 struct compound_hdr hdr;
3266 int status;
3267
3268 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3269 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3270 goto out;
3271 if ((status = decode_putfh(&xdr)) != 0)
3272 goto out;
3273 if ((status = decode_lookup(&xdr)) != 0)
3274 goto out;
3275 if ((status = decode_getfh(&xdr, res->fh)) != 0)
3276 goto out;
3277 status = decode_getfattr(&xdr, res->fattr, res->server);
3278out:
3279 return status;
3280}
3281
3282/*
3283 * Decode LOOKUP_ROOT response
3284 */
3285static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res)
3286{
3287 struct xdr_stream xdr;
3288 struct compound_hdr hdr;
3289 int status;
3290
3291 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3292 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3293 goto out;
3294 if ((status = decode_putrootfh(&xdr)) != 0)
3295 goto out;
3296 if ((status = decode_getfh(&xdr, res->fh)) == 0)
3297 status = decode_getfattr(&xdr, res->fattr, res->server);
3298out:
3299 return status;
3300}
3301
3302/*
3303 * Decode REMOVE response
3304 */
3305static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
3306{
3307 struct xdr_stream xdr;
3308 struct compound_hdr hdr;
3309 int status;
3310
3311 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3312 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3313 goto out;
3314 if ((status = decode_putfh(&xdr)) == 0)
3315 status = decode_remove(&xdr, cinfo);
3316out:
3317 return status;
3318}
3319
3320/*
3321 * Decode RENAME response
3322 */
3323static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_rename_res *res)
3324{
3325 struct xdr_stream xdr;
3326 struct compound_hdr hdr;
3327 int status;
3328
3329 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3330 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3331 goto out;
3332 if ((status = decode_putfh(&xdr)) != 0)
3333 goto out;
3334 if ((status = decode_savefh(&xdr)) != 0)
3335 goto out;
3336 if ((status = decode_putfh(&xdr)) != 0)
3337 goto out;
3338 status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo);
3339out:
3340 return status;
3341}
3342
3343/*
3344 * Decode LINK response
3345 */
3346static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
3347{
3348 struct xdr_stream xdr;
3349 struct compound_hdr hdr;
3350 int status;
3351
3352 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3353 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3354 goto out;
3355 if ((status = decode_putfh(&xdr)) != 0)
3356 goto out;
3357 if ((status = decode_savefh(&xdr)) != 0)
3358 goto out;
3359 if ((status = decode_putfh(&xdr)) != 0)
3360 goto out;
3361 status = decode_link(&xdr, cinfo);
3362out:
3363 return status;
3364}
3365
3366/*
3367 * Decode CREATE response
3368 */
3369static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res)
3370{
3371 struct xdr_stream xdr;
3372 struct compound_hdr hdr;
3373 int status;
3374
3375 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3376 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3377 goto out;
3378 if ((status = decode_putfh(&xdr)) != 0)
3379 goto out;
3380 if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
3381 goto out;
3382 if ((status = decode_getfh(&xdr, res->fh)) != 0)
3383 goto out;
3384 status = decode_getfattr(&xdr, res->fattr, res->server);
3385 if (status == NFS4ERR_DELAY)
3386 status = 0;
3387out:
3388 return status;
3389}
3390
3391/*
3392 * Decode SYMLINK response
3393 */
3394static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res)
3395{
3396 return nfs4_xdr_dec_create(rqstp, p, res);
3397}
3398
3399/*
3400 * Decode GETATTR response
3401 */
3402static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getattr_res *res)
3403{
3404 struct xdr_stream xdr;
3405 struct compound_hdr hdr;
3406 int status;
3407
3408 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3409 status = decode_compound_hdr(&xdr, &hdr);
3410 if (status)
3411 goto out;
3412 status = decode_putfh(&xdr);
3413 if (status)
3414 goto out;
3415 status = decode_getfattr(&xdr, res->fattr, res->server);
3416out:
3417 return status;
3418
3419}
3420
3421
3422/*
3423 * Decode CLOSE response
3424 */
3425static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
3426{
3427 struct xdr_stream xdr;
3428 struct compound_hdr hdr;
3429 int status;
3430
3431 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3432 status = decode_compound_hdr(&xdr, &hdr);
3433 if (status)
3434 goto out;
3435 status = decode_putfh(&xdr);
3436 if (status)
3437 goto out;
3438 status = decode_close(&xdr, res);
3439out:
3440 return status;
3441}
3442
3443/*
3444 * Decode OPEN response
3445 */
3446static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res)
3447{
3448 struct xdr_stream xdr;
3449 struct compound_hdr hdr;
3450 int status;
3451
3452 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3453 status = decode_compound_hdr(&xdr, &hdr);
3454 if (status)
3455 goto out;
3456 status = decode_putfh(&xdr);
3457 if (status)
3458 goto out;
3459 status = decode_open(&xdr, res);
3460 if (status)
3461 goto out;
3462 status = decode_getfh(&xdr, &res->fh);
3463 if (status)
3464 goto out;
3465 status = decode_getfattr(&xdr, res->f_attr, res->server);
3466 if (status == NFS4ERR_DELAY)
3467 status = 0;
3468out:
3469 return status;
3470}
3471
3472/*
3473 * Decode OPEN_CONFIRM response
3474 */
3475static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_open_confirmres *res)
3476{
3477 struct xdr_stream xdr;
3478 struct compound_hdr hdr;
3479 int status;
3480
3481 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3482 status = decode_compound_hdr(&xdr, &hdr);
3483 if (status)
3484 goto out;
3485 status = decode_putfh(&xdr);
3486 if (status)
3487 goto out;
3488 status = decode_open_confirm(&xdr, res);
3489out:
3490 return status;
3491}
3492
3493/*
3494 * Decode OPEN response
3495 */
3496static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res)
3497{
3498 struct xdr_stream xdr;
3499 struct compound_hdr hdr;
3500 int status;
3501
3502 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3503 status = decode_compound_hdr(&xdr, &hdr);
3504 if (status)
3505 goto out;
3506 status = decode_putfh(&xdr);
3507 if (status)
3508 goto out;
3509 status = decode_open(&xdr, res);
3510out:
3511 return status;
3512}
3513
3514/*
3515 * Decode SETATTR response
3516 */
3517static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_setattrres *res)
3518{
3519 struct xdr_stream xdr;
3520 struct compound_hdr hdr;
3521 int status;
3522
3523 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3524 status = decode_compound_hdr(&xdr, &hdr);
3525 if (status)
3526 goto out;
3527 status = decode_putfh(&xdr);
3528 if (status)
3529 goto out;
3530 status = decode_setattr(&xdr, res);
3531 if (status)
3532 goto out;
3533 status = decode_getfattr(&xdr, res->fattr, res->server);
3534 if (status == NFS4ERR_DELAY)
3535 status = 0;
3536out:
3537 return status;
3538}
3539
3540/*
3541 * Decode LOCK response
3542 */
3543static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
3544{
3545 struct xdr_stream xdr;
3546 struct compound_hdr hdr;
3547 int status;
3548
3549 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3550 status = decode_compound_hdr(&xdr, &hdr);
3551 if (status)
3552 goto out;
3553 status = decode_putfh(&xdr);
3554 if (status)
3555 goto out;
3556 status = decode_lock(&xdr, res);
3557out:
3558 return status;
3559}
3560
3561/*
3562 * Decode LOCKT response
3563 */
3564static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
3565{
3566 struct xdr_stream xdr;
3567 struct compound_hdr hdr;
3568 int status;
3569
3570 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3571 status = decode_compound_hdr(&xdr, &hdr);
3572 if (status)
3573 goto out;
3574 status = decode_putfh(&xdr);
3575 if (status)
3576 goto out;
3577 status = decode_lockt(&xdr, res);
3578out:
3579 return status;
3580}
3581
3582/*
3583 * Decode LOCKU response
3584 */
3585static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
3586{
3587 struct xdr_stream xdr;
3588 struct compound_hdr hdr;
3589 int status;
3590
3591 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3592 status = decode_compound_hdr(&xdr, &hdr);
3593 if (status)
3594 goto out;
3595 status = decode_putfh(&xdr);
3596 if (status)
3597 goto out;
3598 status = decode_locku(&xdr, res);
3599out:
3600 return status;
3601}
3602
3603/*
3604 * Decode READLINK response
3605 */
3606static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, uint32_t *p, void *res)
3607{
3608 struct xdr_stream xdr;
3609 struct compound_hdr hdr;
3610 int status;
3611
3612 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3613 status = decode_compound_hdr(&xdr, &hdr);
3614 if (status)
3615 goto out;
3616 status = decode_putfh(&xdr);
3617 if (status)
3618 goto out;
3619 status = decode_readlink(&xdr, rqstp);
3620out:
3621 return status;
3622}
3623
3624/*
3625 * Decode READDIR response
3626 */
3627static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_readdir_res *res)
3628{
3629 struct xdr_stream xdr;
3630 struct compound_hdr hdr;
3631 int status;
3632
3633 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3634 status = decode_compound_hdr(&xdr, &hdr);
3635 if (status)
3636 goto out;
3637 status = decode_putfh(&xdr);
3638 if (status)
3639 goto out;
3640 status = decode_readdir(&xdr, rqstp, res);
3641out:
3642 return status;
3643}
3644
3645/*
3646 * Decode Read response
3647 */
3648static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_readres *res)
3649{
3650 struct xdr_stream xdr;
3651 struct compound_hdr hdr;
3652 int status;
3653
3654 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3655 status = decode_compound_hdr(&xdr, &hdr);
3656 if (status)
3657 goto out;
3658 status = decode_putfh(&xdr);
3659 if (status)
3660 goto out;
3661 status = decode_read(&xdr, rqstp, res);
3662 if (!status)
3663 status = res->count;
3664out:
3665 return status;
3666}
3667
3668/*
3669 * Decode WRITE response
3670 */
3671static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res)
3672{
3673 struct xdr_stream xdr;
3674 struct compound_hdr hdr;
3675 int status;
3676
3677 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3678 status = decode_compound_hdr(&xdr, &hdr);
3679 if (status)
3680 goto out;
3681 status = decode_putfh(&xdr);
3682 if (status)
3683 goto out;
3684 status = decode_write(&xdr, res);
3685 if (!status)
3686 status = res->count;
3687out:
3688 return status;
3689}
3690
3691/*
3692 * Decode COMMIT response
3693 */
3694static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res)
3695{
3696 struct xdr_stream xdr;
3697 struct compound_hdr hdr;
3698 int status;
3699
3700 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3701 status = decode_compound_hdr(&xdr, &hdr);
3702 if (status)
3703 goto out;
3704 status = decode_putfh(&xdr);
3705 if (status)
3706 goto out;
3707 status = decode_commit(&xdr, res);
3708out:
3709 return status;
3710}
3711
3712/*
3713 * FSINFO request
3714 */
3715static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo)
3716{
3717 struct xdr_stream xdr;
3718 struct compound_hdr hdr;
3719 int status;
3720
3721 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
3722 status = decode_compound_hdr(&xdr, &hdr);
3723 if (!status)
3724 status = decode_putfh(&xdr);
3725 if (!status)
3726 status = decode_fsinfo(&xdr, fsinfo);
3727 if (!status)
3728 status = -nfs_stat_to_errno(hdr.status);
3729 return status;
3730}
3731
3732/*
3733 * PATHCONF request
3734 */
3735static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_pathconf *pathconf)
3736{
3737 struct xdr_stream xdr;
3738 struct compound_hdr hdr;
3739 int status;
3740
3741 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
3742 status = decode_compound_hdr(&xdr, &hdr);
3743 if (!status)
3744 status = decode_putfh(&xdr);
3745 if (!status)
3746 status = decode_pathconf(&xdr, pathconf);
3747 return status;
3748}
3749
3750/*
3751 * STATFS request
3752 */
3753static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fsstat *fsstat)
3754{
3755 struct xdr_stream xdr;
3756 struct compound_hdr hdr;
3757 int status;
3758
3759 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
3760 status = decode_compound_hdr(&xdr, &hdr);
3761 if (!status)
3762 status = decode_putfh(&xdr);
3763 if (!status)
3764 status = decode_statfs(&xdr, fsstat);
3765 return status;
3766}
3767
3768/*
3769 * GETATTR_BITMAP request
3770 */
3771static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, uint32_t *p, struct nfs4_server_caps_res *res)
3772{
3773 struct xdr_stream xdr;
3774 struct compound_hdr hdr;
3775 int status;
3776
3777 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
3778 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3779 goto out;
3780 if ((status = decode_putfh(&xdr)) != 0)
3781 goto out;
3782 status = decode_server_caps(&xdr, res);
3783out:
3784 return status;
3785}
3786
3787/*
3788 * Decode RENEW response
3789 */
3790static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
3791{
3792 struct xdr_stream xdr;
3793 struct compound_hdr hdr;
3794 int status;
3795
3796 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3797 status = decode_compound_hdr(&xdr, &hdr);
3798 if (!status)
3799 status = decode_renew(&xdr);
3800 return status;
3801}
3802
3803/*
3804 * a SETCLIENTID request
3805 */
3806static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
3807 struct nfs4_client *clp)
3808{
3809 struct xdr_stream xdr;
3810 struct compound_hdr hdr;
3811 int status;
3812
3813 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
3814 status = decode_compound_hdr(&xdr, &hdr);
3815 if (!status)
3816 status = decode_setclientid(&xdr, clp);
3817 if (!status)
3818 status = -nfs_stat_to_errno(hdr.status);
3819 return status;
3820}
3821
3822/*
3823 * a SETCLIENTID_CONFIRM request
3824 */
3825static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo)
3826{
3827 struct xdr_stream xdr;
3828 struct compound_hdr hdr;
3829 int status;
3830
3831 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
3832 status = decode_compound_hdr(&xdr, &hdr);
3833 if (!status)
3834 status = decode_setclientid_confirm(&xdr);
3835 if (!status)
3836 status = decode_putrootfh(&xdr);
3837 if (!status)
3838 status = decode_fsinfo(&xdr, fsinfo);
3839 if (!status)
3840 status = -nfs_stat_to_errno(hdr.status);
3841 return status;
3842}
3843
3844/*
3845 * DELEGRETURN request
3846 */
3847static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
3848{
3849 struct xdr_stream xdr;
3850 struct compound_hdr hdr;
3851 int status;
3852
3853 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3854 status = decode_compound_hdr(&xdr, &hdr);
3855 if (status == 0) {
3856 status = decode_putfh(&xdr);
3857 if (status == 0)
3858 status = decode_delegreturn(&xdr);
3859 }
3860 return status;
3861}
3862
3863uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
3864{
3865 uint32_t bitmap[2] = {0};
3866 uint32_t len;
3867
3868 if (!*p++) {
3869 if (!*p)
3870 return ERR_PTR(-EAGAIN);
3871 entry->eof = 1;
3872 return ERR_PTR(-EBADCOOKIE);
3873 }
3874
3875 entry->prev_cookie = entry->cookie;
3876 p = xdr_decode_hyper(p, &entry->cookie);
3877 entry->len = ntohl(*p++);
3878 entry->name = (const char *) p;
3879 p += XDR_QUADLEN(entry->len);
3880
3881 /*
3882 * In case the server doesn't return an inode number,
3883 * we fake one here. (We don't use inode number 0,
3884 * since glibc seems to choke on it...)
3885 */
3886 entry->ino = 1;
3887
3888 len = ntohl(*p++); /* bitmap length */
3889 if (len-- > 0) {
3890 bitmap[0] = ntohl(*p++);
3891 if (len-- > 0) {
3892 bitmap[1] = ntohl(*p++);
3893 p += len;
3894 }
3895 }
3896 len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */
3897 if (len > 0) {
3898 if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID)
3899 xdr_decode_hyper(p, &entry->ino);
3900 else if (bitmap[0] == FATTR4_WORD0_FILEID)
3901 xdr_decode_hyper(p, &entry->ino);
3902 p += len;
3903 }
3904
3905 entry->eof = !p[0] && p[1];
3906 return p;
3907}
3908
3909/*
3910 * We need to translate between nfs status return values and
3911 * the local errno values which may not be the same.
3912 */
3913static struct {
3914 int stat;
3915 int errno;
3916} nfs_errtbl[] = {
3917 { NFS4_OK, 0 },
3918 { NFS4ERR_PERM, EPERM },
3919 { NFS4ERR_NOENT, ENOENT },
3920 { NFS4ERR_IO, errno_NFSERR_IO },
3921 { NFS4ERR_NXIO, ENXIO },
3922 { NFS4ERR_ACCESS, EACCES },
3923 { NFS4ERR_EXIST, EEXIST },
3924 { NFS4ERR_XDEV, EXDEV },
3925 { NFS4ERR_NOTDIR, ENOTDIR },
3926 { NFS4ERR_ISDIR, EISDIR },
3927 { NFS4ERR_INVAL, EINVAL },
3928 { NFS4ERR_FBIG, EFBIG },
3929 { NFS4ERR_NOSPC, ENOSPC },
3930 { NFS4ERR_ROFS, EROFS },
3931 { NFS4ERR_MLINK, EMLINK },
3932 { NFS4ERR_NAMETOOLONG, ENAMETOOLONG },
3933 { NFS4ERR_NOTEMPTY, ENOTEMPTY },
3934 { NFS4ERR_DQUOT, EDQUOT },
3935 { NFS4ERR_STALE, ESTALE },
3936 { NFS4ERR_BADHANDLE, EBADHANDLE },
3937 { NFS4ERR_BAD_COOKIE, EBADCOOKIE },
3938 { NFS4ERR_NOTSUPP, ENOTSUPP },
3939 { NFS4ERR_TOOSMALL, ETOOSMALL },
3940 { NFS4ERR_SERVERFAULT, ESERVERFAULT },
3941 { NFS4ERR_BADTYPE, EBADTYPE },
3942 { NFS4ERR_LOCKED, EAGAIN },
3943 { NFS4ERR_RESOURCE, EREMOTEIO },
3944 { NFS4ERR_SYMLINK, ELOOP },
3945 { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP },
3946 { NFS4ERR_DEADLOCK, EDEADLK },
3947 { NFS4ERR_WRONGSEC, EPERM }, /* FIXME: this needs
3948 * to be handled by a
3949 * middle-layer.
3950 */
3951 { -1, EIO }
3952};
3953
3954/*
3955 * Convert an NFS error code to a local one.
3956 * This one is used jointly by NFSv2 and NFSv3.
3957 */
3958static int
3959nfs_stat_to_errno(int stat)
3960{
3961 int i;
3962 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
3963 if (nfs_errtbl[i].stat == stat)
3964 return nfs_errtbl[i].errno;
3965 }
3966 if (stat <= 10000 || stat > 10100) {
3967 /* The server is looney tunes. */
3968 return ESERVERFAULT;
3969 }
3970 /* If we cannot translate the error, the recovery routines should
3971 * handle it.
3972 * Note: remaining NFSv4 error codes have values > 10000, so should
3973 * not conflict with native Linux error codes.
3974 */
3975 return stat;
3976}
3977
3978#ifndef MAX
3979# define MAX(a, b) (((a) > (b))? (a) : (b))
3980#endif
3981
3982#define PROC(proc, argtype, restype) \
3983[NFSPROC4_CLNT_##proc] = { \
3984 .p_proc = NFSPROC4_COMPOUND, \
3985 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
3986 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
3987 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
3988 }
3989
3990struct rpc_procinfo nfs4_procedures[] = {
3991 PROC(READ, enc_read, dec_read),
3992 PROC(WRITE, enc_write, dec_write),
3993 PROC(COMMIT, enc_commit, dec_commit),
3994 PROC(OPEN, enc_open, dec_open),
3995 PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm),
3996 PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr),
3997 PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade),
3998 PROC(CLOSE, enc_close, dec_close),
3999 PROC(SETATTR, enc_setattr, dec_setattr),
4000 PROC(FSINFO, enc_fsinfo, dec_fsinfo),
4001 PROC(RENEW, enc_renew, dec_renew),
4002 PROC(SETCLIENTID, enc_setclientid, dec_setclientid),
4003 PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm),
4004 PROC(LOCK, enc_lock, dec_lock),
4005 PROC(LOCKT, enc_lockt, dec_lockt),
4006 PROC(LOCKU, enc_locku, dec_locku),
4007 PROC(ACCESS, enc_access, dec_access),
4008 PROC(GETATTR, enc_getattr, dec_getattr),
4009 PROC(LOOKUP, enc_lookup, dec_lookup),
4010 PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
4011 PROC(REMOVE, enc_remove, dec_remove),
4012 PROC(RENAME, enc_rename, dec_rename),
4013 PROC(LINK, enc_link, dec_link),
4014 PROC(SYMLINK, enc_symlink, dec_symlink),
4015 PROC(CREATE, enc_create, dec_create),
4016 PROC(PATHCONF, enc_pathconf, dec_pathconf),
4017 PROC(STATFS, enc_statfs, dec_statfs),
4018 PROC(READLINK, enc_readlink, dec_readlink),
4019 PROC(READDIR, enc_readdir, dec_readdir),
4020 PROC(SERVER_CAPS, enc_server_caps, dec_server_caps),
4021 PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
4022};
4023
4024struct rpc_version nfs_version4 = {
4025 .number = 4,
4026 .nrprocs = sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]),
4027 .procs = nfs4_procedures
4028};
4029
4030/*
4031 * Local variables:
4032 * c-basic-offset: 8
4033 * End:
4034 */
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
new file mode 100644
index 000000000000..fd5bc596fe8a
--- /dev/null
+++ b/fs/nfs/nfsroot.c
@@ -0,0 +1,513 @@
1/*
2 * $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $
3 *
4 * Copyright (C) 1995, 1996 Gero Kuhlmann <gero@gkminix.han.de>
5 *
6 * Allow an NFS filesystem to be mounted as root. The way this works is:
7 * (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
8 * (2) Handle RPC negotiation with the system which replied to RARP or
9 * was reported as a boot server by BOOTP or manually.
10 * (3) The actual mounting is done later, when init() is running.
11 *
12 *
13 * Changes:
14 *
15 * Alan Cox : Removed get_address name clash with FPU.
16 * Alan Cox : Reformatted a bit.
17 * Gero Kuhlmann : Code cleanup
18 * Michael Rausch : Fixed recognition of an incoming RARP answer.
19 * Martin Mares : (2.0) Auto-configuration via BOOTP supported.
20 * Martin Mares : Manual selection of interface & BOOTP/RARP.
21 * Martin Mares : Using network routes instead of host routes,
22 * allowing the default configuration to be used
23 * for normal operation of the host.
24 * Martin Mares : Randomized timer with exponential backoff
25 * installed to minimize network congestion.
26 * Martin Mares : Code cleanup.
27 * Martin Mares : (2.1) BOOTP and RARP made configuration options.
28 * Martin Mares : Server hostname generation fixed.
29 * Gerd Knorr : Fixed wired inode handling
30 * Martin Mares : (2.2) "0.0.0.0" addresses from command line ignored.
31 * Martin Mares : RARP replies not tested for server address.
32 * Gero Kuhlmann : (2.3) Some bug fixes and code cleanup again (please
33 * send me your new patches _before_ bothering
34 * Linus so that I don' always have to cleanup
35 * _afterwards_ - thanks)
36 * Gero Kuhlmann : Last changes of Martin Mares undone.
37 * Gero Kuhlmann : RARP replies are tested for specified server
38 * again. However, it's now possible to have
39 * different RARP and NFS servers.
40 * Gero Kuhlmann : "0.0.0.0" addresses from command line are
41 * now mapped to INADDR_NONE.
42 * Gero Kuhlmann : Fixed a bug which prevented BOOTP path name
43 * from being used (thanks to Leo Spiekman)
44 * Andy Walker : Allow to specify the NFS server in nfs_root
45 * without giving a path name
46 * Swen Thümmler : Allow to specify the NFS options in nfs_root
47 * without giving a path name. Fix BOOTP request
48 * for domainname (domainname is NIS domain, not
49 * DNS domain!). Skip dummy devices for BOOTP.
50 * Jacek Zapala : Fixed a bug which prevented server-ip address
51 * from nfsroot parameter from being used.
52 * Olaf Kirch : Adapted to new NFS code.
53 * Jakub Jelinek : Free used code segment.
54 * Marko Kohtala : Fixed some bugs.
55 * Martin Mares : Debug message cleanup
56 * Martin Mares : Changed to use the new generic IP layer autoconfig
57 * code. BOOTP and RARP moved there.
58 * Martin Mares : Default path now contains host name instead of
59 * host IP address (but host name defaults to IP
60 * address anyway).
61 * Martin Mares : Use root_server_addr appropriately during setup.
62 * Martin Mares : Rewrote parameter parsing, now hopefully giving
63 * correct overriding.
64 * Trond Myklebust : Add in preliminary support for NFSv3 and TCP.
65 * Fix bug in root_nfs_addr(). nfs_data.namlen
66 * is NOT for the length of the hostname.
67 * Hua Qin : Support for mounting root file system via
68 * NFS over TCP.
69 * Fabian Frederick: Option parser rebuilt (using parser lib)
70*/
71
72#include <linux/config.h>
73#include <linux/types.h>
74#include <linux/string.h>
75#include <linux/kernel.h>
76#include <linux/time.h>
77#include <linux/fs.h>
78#include <linux/init.h>
79#include <linux/sunrpc/clnt.h>
80#include <linux/nfs.h>
81#include <linux/nfs_fs.h>
82#include <linux/nfs_mount.h>
83#include <linux/in.h>
84#include <linux/major.h>
85#include <linux/utsname.h>
86#include <linux/inet.h>
87#include <linux/root_dev.h>
88#include <net/ipconfig.h>
89#include <linux/parser.h>
90
91/* Define this to allow debugging output */
92#undef NFSROOT_DEBUG
93#define NFSDBG_FACILITY NFSDBG_ROOT
94
95/* Default path we try to mount. "%s" gets replaced by our IP address */
96#define NFS_ROOT "/tftpboot/%s"
97
98/* Parameters passed from the kernel command line */
99static char nfs_root_name[256] __initdata = "";
100
101/* Address of NFS server */
102static __u32 servaddr __initdata = 0;
103
104/* Name of directory to mount */
105static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, };
106
107/* NFS-related data */
108static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
109static int nfs_port __initdata = 0; /* Port to connect to for NFS */
110static int mount_port __initdata = 0; /* Mount daemon port number */
111
112
113/***************************************************************************
114
115 Parsing of options
116
117 ***************************************************************************/
118
119enum {
120 /* Options that take integer arguments */
121 Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
122 Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
123 /* Options that take no arguments */
124 Opt_soft, Opt_hard, Opt_intr,
125 Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac,
126 Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
127 /* Error token */
128 Opt_err
129};
130
131static match_table_t __initdata tokens = {
132 {Opt_port, "port=%u"},
133 {Opt_rsize, "rsize=%u"},
134 {Opt_wsize, "wsize=%u"},
135 {Opt_timeo, "timeo=%u"},
136 {Opt_retrans, "retrans=%u"},
137 {Opt_acregmin, "acregmin=%u"},
138 {Opt_acregmax, "acregmax=%u"},
139 {Opt_acdirmin, "acdirmin=%u"},
140 {Opt_acdirmax, "acdirmax=%u"},
141 {Opt_soft, "soft"},
142 {Opt_hard, "hard"},
143 {Opt_intr, "intr"},
144 {Opt_nointr, "nointr"},
145 {Opt_posix, "posix"},
146 {Opt_noposix, "noposix"},
147 {Opt_cto, "cto"},
148 {Opt_nocto, "nocto"},
149 {Opt_ac, "ac"},
150 {Opt_noac, "noac"},
151 {Opt_lock, "lock"},
152 {Opt_nolock, "nolock"},
153 {Opt_v2, "nfsvers=2"},
154 {Opt_v2, "v2"},
155 {Opt_v3, "nfsvers=3"},
156 {Opt_v3, "v3"},
157 {Opt_udp, "proto=udp"},
158 {Opt_udp, "udp"},
159 {Opt_tcp, "proto=tcp"},
160 {Opt_tcp, "tcp"},
161 {Opt_err, NULL}
162
163};
164
165/*
166 * Parse option string.
167 */
168
169static int __init root_nfs_parse(char *name, char *buf)
170{
171
172 char *p;
173 substring_t args[MAX_OPT_ARGS];
174 int option;
175
176 if (!name)
177 return 1;
178
179 /* Set the NFS remote path */
180 p = strsep(&name, ",");
181 if (p[0] != '\0' && strcmp(p, "default") != 0)
182 strlcpy(buf, p, NFS_MAXPATHLEN);
183
184 while ((p = strsep (&name, ",")) != NULL) {
185 int token;
186 if (!*p)
187 continue;
188 token = match_token(p, tokens, args);
189
190 /* %u tokens only. Beware if you add new tokens! */
191 if (token < Opt_soft && match_int(&args[0], &option))
192 return 0;
193 switch (token) {
194 case Opt_port:
195 nfs_port = option;
196 break;
197 case Opt_rsize:
198 nfs_data.rsize = option;
199 break;
200 case Opt_wsize:
201 nfs_data.wsize = option;
202 break;
203 case Opt_timeo:
204 nfs_data.timeo = option;
205 break;
206 case Opt_retrans:
207 nfs_data.retrans = option;
208 break;
209 case Opt_acregmin:
210 nfs_data.acregmin = option;
211 break;
212 case Opt_acregmax:
213 nfs_data.acregmax = option;
214 break;
215 case Opt_acdirmin:
216 nfs_data.acdirmin = option;
217 break;
218 case Opt_acdirmax:
219 nfs_data.acdirmax = option;
220 break;
221 case Opt_soft:
222 nfs_data.flags |= NFS_MOUNT_SOFT;
223 break;
224 case Opt_hard:
225 nfs_data.flags &= ~NFS_MOUNT_SOFT;
226 break;
227 case Opt_intr:
228 nfs_data.flags |= NFS_MOUNT_INTR;
229 break;
230 case Opt_nointr:
231 nfs_data.flags &= ~NFS_MOUNT_INTR;
232 break;
233 case Opt_posix:
234 nfs_data.flags |= NFS_MOUNT_POSIX;
235 break;
236 case Opt_noposix:
237 nfs_data.flags &= ~NFS_MOUNT_POSIX;
238 break;
239 case Opt_cto:
240 nfs_data.flags &= ~NFS_MOUNT_NOCTO;
241 break;
242 case Opt_nocto:
243 nfs_data.flags |= NFS_MOUNT_NOCTO;
244 break;
245 case Opt_ac:
246 nfs_data.flags &= ~NFS_MOUNT_NOAC;
247 break;
248 case Opt_noac:
249 nfs_data.flags |= NFS_MOUNT_NOAC;
250 break;
251 case Opt_lock:
252 nfs_data.flags &= ~NFS_MOUNT_NONLM;
253 break;
254 case Opt_nolock:
255 nfs_data.flags |= NFS_MOUNT_NONLM;
256 break;
257 case Opt_v2:
258 nfs_data.flags &= ~NFS_MOUNT_VER3;
259 break;
260 case Opt_v3:
261 nfs_data.flags |= NFS_MOUNT_VER3;
262 break;
263 case Opt_udp:
264 nfs_data.flags &= ~NFS_MOUNT_TCP;
265 break;
266 case Opt_tcp:
267 nfs_data.flags |= NFS_MOUNT_TCP;
268 break;
269 default :
270 return 0;
271 }
272 }
273
274 return 1;
275}
276
277/*
278 * Prepare the NFS data structure and parse all options.
279 */
280static int __init root_nfs_name(char *name)
281{
282 static char buf[NFS_MAXPATHLEN] __initdata;
283 char *cp;
284
285 /* Set some default values */
286 memset(&nfs_data, 0, sizeof(nfs_data));
287 nfs_port = -1;
288 nfs_data.version = NFS_MOUNT_VERSION;
289 nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */
290 nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
291 nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
292 nfs_data.acregmin = 3;
293 nfs_data.acregmax = 60;
294 nfs_data.acdirmin = 30;
295 nfs_data.acdirmax = 60;
296 strcpy(buf, NFS_ROOT);
297
298 /* Process options received from the remote server */
299 root_nfs_parse(root_server_path, buf);
300
301 /* Override them by options set on kernel command-line */
302 root_nfs_parse(name, buf);
303
304 cp = system_utsname.nodename;
305 if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
306 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
307 return -1;
308 }
309 sprintf(nfs_path, buf, cp);
310
311 return 1;
312}
313
314
315/*
316 * Get NFS server address.
317 */
318static int __init root_nfs_addr(void)
319{
320 if ((servaddr = root_server_addr) == INADDR_NONE) {
321 printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
322 return -1;
323 }
324
325 snprintf(nfs_data.hostname, sizeof(nfs_data.hostname),
326 "%u.%u.%u.%u", NIPQUAD(servaddr));
327 return 0;
328}
329
330/*
331 * Tell the user what's going on.
332 */
333#ifdef NFSROOT_DEBUG
334static void __init root_nfs_print(void)
335{
336 printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n",
337 nfs_path, nfs_data.hostname);
338 printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
339 nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
340 printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
341 nfs_data.acregmin, nfs_data.acregmax,
342 nfs_data.acdirmin, nfs_data.acdirmax);
343 printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n",
344 nfs_port, mount_port, nfs_data.flags);
345}
346#endif
347
348
349static int __init root_nfs_init(void)
350{
351#ifdef NFSROOT_DEBUG
352 nfs_debug |= NFSDBG_ROOT;
353#endif
354
355 /*
356 * Decode the root directory path name and NFS options from
357 * the kernel command line. This has to go here in order to
358 * be able to use the client IP address for the remote root
359 * directory (necessary for pure RARP booting).
360 */
361 if (root_nfs_name(nfs_root_name) < 0 ||
362 root_nfs_addr() < 0)
363 return -1;
364
365#ifdef NFSROOT_DEBUG
366 root_nfs_print();
367#endif
368
369 return 0;
370}
371
372
373/*
374 * Parse NFS server and directory information passed on the kernel
375 * command line.
376 */
377static int __init nfs_root_setup(char *line)
378{
379 ROOT_DEV = Root_NFS;
380 if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
381 strlcpy(nfs_root_name, line, sizeof(nfs_root_name));
382 } else {
383 int n = strlen(line) + sizeof(NFS_ROOT) - 1;
384 if (n >= sizeof(nfs_root_name))
385 line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0';
386 sprintf(nfs_root_name, NFS_ROOT, line);
387 }
388 root_server_addr = root_nfs_parse_addr(nfs_root_name);
389 return 1;
390}
391
392__setup("nfsroot=", nfs_root_setup);
393
394/***************************************************************************
395
396 Routines to actually mount the root directory
397
398 ***************************************************************************/
399
400/*
401 * Construct sockaddr_in from address and port number.
402 */
403static inline void
404set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port)
405{
406 sin->sin_family = AF_INET;
407 sin->sin_addr.s_addr = addr;
408 sin->sin_port = port;
409}
410
411/*
412 * Query server portmapper for the port of a daemon program.
413 */
414static int __init root_nfs_getport(int program, int version, int proto)
415{
416 struct sockaddr_in sin;
417
418 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
419 program, version, NIPQUAD(servaddr));
420 set_sockaddr(&sin, servaddr, 0);
421 return rpc_getport_external(&sin, program, version, proto);
422}
423
424
425/*
426 * Use portmapper to find mountd and nfsd port numbers if not overriden
427 * by the user. Use defaults if portmapper is not available.
428 * XXX: Is there any nfs server with no portmapper?
429 */
430static int __init root_nfs_ports(void)
431{
432 int port;
433 int nfsd_ver, mountd_ver;
434 int nfsd_port, mountd_port;
435 int proto;
436
437 if (nfs_data.flags & NFS_MOUNT_VER3) {
438 nfsd_ver = NFS3_VERSION;
439 mountd_ver = NFS_MNT3_VERSION;
440 nfsd_port = NFS_PORT;
441 mountd_port = NFS_MNT_PORT;
442 } else {
443 nfsd_ver = NFS2_VERSION;
444 mountd_ver = NFS_MNT_VERSION;
445 nfsd_port = NFS_PORT;
446 mountd_port = NFS_MNT_PORT;
447 }
448
449 proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
450
451 if (nfs_port < 0) {
452 if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
453 printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
454 "number from server, using default\n");
455 port = nfsd_port;
456 }
457 nfs_port = htons(port);
458 dprintk("Root-NFS: Portmapper on server returned %d "
459 "as nfsd port\n", port);
460 }
461
462 if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) {
463 printk(KERN_ERR "Root-NFS: Unable to get mountd port "
464 "number from server, using default\n");
465 port = mountd_port;
466 }
467 mount_port = htons(port);
468 dprintk("Root-NFS: mountd port is %d\n", port);
469
470 return 0;
471}
472
473
474/*
475 * Get a file handle from the server for the directory which is to be
476 * mounted.
477 */
478static int __init root_nfs_get_handle(void)
479{
480 struct nfs_fh fh;
481 struct sockaddr_in sin;
482 int status;
483 int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
484 IPPROTO_TCP : IPPROTO_UDP;
485 int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
486 NFS_MNT3_VERSION : NFS_MNT_VERSION;
487
488 set_sockaddr(&sin, servaddr, mount_port);
489 status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
490 if (status < 0)
491 printk(KERN_ERR "Root-NFS: Server returned error %d "
492 "while mounting %s\n", status, nfs_path);
493 else {
494 nfs_data.root.size = fh.size;
495 memcpy(nfs_data.root.data, fh.data, fh.size);
496 }
497
498 return status;
499}
500
501/*
502 * Get the NFS port numbers and file handle, and return the prepared 'data'
503 * argument for mount() if everything went OK. Return NULL otherwise.
504 */
505void * __init nfs_root_data(void)
506{
507 if (root_nfs_init() < 0
508 || root_nfs_ports() < 0
509 || root_nfs_get_handle() < 0)
510 return NULL;
511 set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port);
512 return (void*)&nfs_data;
513}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
new file mode 100644
index 000000000000..4f1ba723848d
--- /dev/null
+++ b/fs/nfs/pagelist.c
@@ -0,0 +1,309 @@
1/*
2 * linux/fs/nfs/pagelist.c
3 *
4 * A set of helper functions for managing NFS read and write requests.
5 * The main purpose of these routines is to provide support for the
6 * coalescing of several requests into a single RPC call.
7 *
8 * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
9 *
10 */
11
12#include <linux/config.h>
13#include <linux/slab.h>
14#include <linux/file.h>
15#include <linux/sunrpc/clnt.h>
16#include <linux/nfs3.h>
17#include <linux/nfs4.h>
18#include <linux/nfs_page.h>
19#include <linux/nfs_fs.h>
20#include <linux/nfs_mount.h>
21
22#define NFS_PARANOIA 1
23
24static kmem_cache_t *nfs_page_cachep;
25
26static inline struct nfs_page *
27nfs_page_alloc(void)
28{
29 struct nfs_page *p;
30 p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
31 if (p) {
32 memset(p, 0, sizeof(*p));
33 INIT_LIST_HEAD(&p->wb_list);
34 }
35 return p;
36}
37
38static inline void
39nfs_page_free(struct nfs_page *p)
40{
41 kmem_cache_free(nfs_page_cachep, p);
42}
43
44/**
45 * nfs_create_request - Create an NFS read/write request.
46 * @file: file descriptor to use
47 * @inode: inode to which the request is attached
48 * @page: page to write
49 * @offset: starting offset within the page for the write
50 * @count: number of bytes to read/write
51 *
52 * The page must be locked by the caller. This makes sure we never
53 * create two different requests for the same page, and avoids
54 * a possible deadlock when we reach the hard limit on the number
55 * of dirty pages.
56 * User should ensure it is safe to sleep in this function.
57 */
58struct nfs_page *
59nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
60 struct page *page,
61 unsigned int offset, unsigned int count)
62{
63 struct nfs_server *server = NFS_SERVER(inode);
64 struct nfs_page *req;
65
66 /* Deal with hard limits. */
67 for (;;) {
68 /* try to allocate the request struct */
69 req = nfs_page_alloc();
70 if (req != NULL)
71 break;
72
73 /* Try to free up at least one request in order to stay
74 * below the hard limit
75 */
76 if (signalled() && (server->flags & NFS_MOUNT_INTR))
77 return ERR_PTR(-ERESTARTSYS);
78 yield();
79 }
80
81 /* Initialize the request struct. Initially, we assume a
82 * long write-back delay. This will be adjusted in
83 * update_nfs_request below if the region is not locked. */
84 req->wb_page = page;
85 atomic_set(&req->wb_complete, 0);
86 req->wb_index = page->index;
87 page_cache_get(page);
88 req->wb_offset = offset;
89 req->wb_pgbase = offset;
90 req->wb_bytes = count;
91 atomic_set(&req->wb_count, 1);
92 req->wb_context = get_nfs_open_context(ctx);
93
94 return req;
95}
96
97/**
98 * nfs_unlock_request - Unlock request and wake up sleepers.
99 * @req:
100 */
101void nfs_unlock_request(struct nfs_page *req)
102{
103 if (!NFS_WBACK_BUSY(req)) {
104 printk(KERN_ERR "NFS: Invalid unlock attempted\n");
105 BUG();
106 }
107 smp_mb__before_clear_bit();
108 clear_bit(PG_BUSY, &req->wb_flags);
109 smp_mb__after_clear_bit();
110 wake_up_all(&req->wb_context->waitq);
111 nfs_release_request(req);
112}
113
114/**
115 * nfs_clear_request - Free up all resources allocated to the request
116 * @req:
117 *
118 * Release page resources associated with a write request after it
119 * has completed.
120 */
121void nfs_clear_request(struct nfs_page *req)
122{
123 if (req->wb_page) {
124 page_cache_release(req->wb_page);
125 req->wb_page = NULL;
126 }
127}
128
129
130/**
131 * nfs_release_request - Release the count on an NFS read/write request
132 * @req: request to release
133 *
134 * Note: Should never be called with the spinlock held!
135 */
136void
137nfs_release_request(struct nfs_page *req)
138{
139 if (!atomic_dec_and_test(&req->wb_count))
140 return;
141
142#ifdef NFS_PARANOIA
143 BUG_ON (!list_empty(&req->wb_list));
144 BUG_ON (NFS_WBACK_BUSY(req));
145#endif
146
147 /* Release struct file or cached credential */
148 nfs_clear_request(req);
149 put_nfs_open_context(req->wb_context);
150 nfs_page_free(req);
151}
152
153/**
154 * nfs_list_add_request - Insert a request into a sorted list
155 * @req: request
156 * @head: head of list into which to insert the request.
157 *
158 * Note that the wb_list is sorted by page index in order to facilitate
159 * coalescing of requests.
160 * We use an insertion sort that is optimized for the case of appended
161 * writes.
162 */
163void
164nfs_list_add_request(struct nfs_page *req, struct list_head *head)
165{
166 struct list_head *pos;
167
168#ifdef NFS_PARANOIA
169 if (!list_empty(&req->wb_list)) {
170 printk(KERN_ERR "NFS: Add to list failed!\n");
171 BUG();
172 }
173#endif
174 list_for_each_prev(pos, head) {
175 struct nfs_page *p = nfs_list_entry(pos);
176 if (p->wb_index < req->wb_index)
177 break;
178 }
179 list_add(&req->wb_list, pos);
180 req->wb_list_head = head;
181}
182
183/**
184 * nfs_wait_on_request - Wait for a request to complete.
185 * @req: request to wait upon.
186 *
187 * Interruptible by signals only if mounted with intr flag.
188 * The user is responsible for holding a count on the request.
189 */
190int
191nfs_wait_on_request(struct nfs_page *req)
192{
193 struct inode *inode = req->wb_context->dentry->d_inode;
194 struct rpc_clnt *clnt = NFS_CLIENT(inode);
195
196 if (!NFS_WBACK_BUSY(req))
197 return 0;
198 return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req));
199}
200
201/**
202 * nfs_coalesce_requests - Split coalesced requests out from a list.
203 * @head: source list
204 * @dst: destination list
205 * @nmax: maximum number of requests to coalesce
206 *
207 * Moves a maximum of 'nmax' elements from one list to another.
208 * The elements are checked to ensure that they form a contiguous set
209 * of pages, and that the RPC credentials are the same.
210 */
211int
212nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
213 unsigned int nmax)
214{
215 struct nfs_page *req = NULL;
216 unsigned int npages = 0;
217
218 while (!list_empty(head)) {
219 struct nfs_page *prev = req;
220
221 req = nfs_list_entry(head->next);
222 if (prev) {
223 if (req->wb_context->cred != prev->wb_context->cred)
224 break;
225 if (req->wb_context->lockowner != prev->wb_context->lockowner)
226 break;
227 if (req->wb_context->state != prev->wb_context->state)
228 break;
229 if (req->wb_index != (prev->wb_index + 1))
230 break;
231
232 if (req->wb_pgbase != 0)
233 break;
234 }
235 nfs_list_remove_request(req);
236 nfs_list_add_request(req, dst);
237 npages++;
238 if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE)
239 break;
240 if (npages >= nmax)
241 break;
242 }
243 return npages;
244}
245
246/**
247 * nfs_scan_list - Scan a list for matching requests
248 * @head: One of the NFS inode request lists
249 * @dst: Destination list
250 * @idx_start: lower bound of page->index to scan
251 * @npages: idx_start + npages sets the upper bound to scan.
252 *
253 * Moves elements from one of the inode request lists.
254 * If the number of requests is set to 0, the entire address_space
255 * starting at index idx_start, is scanned.
256 * The requests are *not* checked to ensure that they form a contiguous set.
257 * You must be holding the inode's req_lock when calling this function
258 */
259int
260nfs_scan_list(struct list_head *head, struct list_head *dst,
261 unsigned long idx_start, unsigned int npages)
262{
263 struct list_head *pos, *tmp;
264 struct nfs_page *req;
265 unsigned long idx_end;
266 int res;
267
268 res = 0;
269 if (npages == 0)
270 idx_end = ~0;
271 else
272 idx_end = idx_start + npages - 1;
273
274 list_for_each_safe(pos, tmp, head) {
275
276 req = nfs_list_entry(pos);
277
278 if (req->wb_index < idx_start)
279 continue;
280 if (req->wb_index > idx_end)
281 break;
282
283 if (!nfs_lock_request(req))
284 continue;
285 nfs_list_remove_request(req);
286 nfs_list_add_request(req, dst);
287 res++;
288 }
289 return res;
290}
291
292int nfs_init_nfspagecache(void)
293{
294 nfs_page_cachep = kmem_cache_create("nfs_page",
295 sizeof(struct nfs_page),
296 0, SLAB_HWCACHE_ALIGN,
297 NULL, NULL);
298 if (nfs_page_cachep == NULL)
299 return -ENOMEM;
300
301 return 0;
302}
303
304void nfs_destroy_nfspagecache(void)
305{
306 if (kmem_cache_destroy(nfs_page_cachep))
307 printk(KERN_INFO "nfs_page: not all structures were freed\n");
308}
309
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
new file mode 100644
index 000000000000..d31b4d6e5a5e
--- /dev/null
+++ b/fs/nfs/proc.c
@@ -0,0 +1,655 @@
1/*
2 * linux/fs/nfs/proc.c
3 *
4 * Copyright (C) 1992, 1993, 1994 Rick Sladkey
5 *
6 * OS-independent nfs remote procedure call functions
7 *
8 * Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers
9 * so at last we can have decent(ish) throughput off a
10 * Sun server.
11 *
12 * Coding optimized and cleaned up by Florian La Roche.
13 * Note: Error returns are optimized for NFS_OK, which isn't translated via
14 * nfs_stat_to_errno(), but happens to be already the right return code.
15 *
16 * Also, the code currently doesn't check the size of the packet, when
17 * it decodes the packet.
18 *
19 * Feel free to fix it and mail me the diffs if it worries you.
20 *
21 * Completely rewritten to support the new RPC call interface;
22 * rewrote and moved the entire XDR stuff to xdr.c
23 * --Olaf Kirch June 1996
24 *
25 * The code below initializes all auto variables explicitly, otherwise
26 * it will fail to work as a module (gcc generates a memset call for an
27 * incomplete struct).
28 */
29
30#include <linux/types.h>
31#include <linux/param.h>
32#include <linux/slab.h>
33#include <linux/time.h>
34#include <linux/mm.h>
35#include <linux/utsname.h>
36#include <linux/errno.h>
37#include <linux/string.h>
38#include <linux/in.h>
39#include <linux/pagemap.h>
40#include <linux/sunrpc/clnt.h>
41#include <linux/nfs.h>
42#include <linux/nfs2.h>
43#include <linux/nfs_fs.h>
44#include <linux/nfs_page.h>
45#include <linux/lockd/bind.h>
46#include <linux/smp_lock.h>
47
48#define NFSDBG_FACILITY NFSDBG_PROC
49
50extern struct rpc_procinfo nfs_procedures[];
51
52/*
53 * Bare-bones access to getattr: this is for nfs_read_super.
54 */
55static int
56nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
57 struct nfs_fsinfo *info)
58{
59 struct nfs_fattr *fattr = info->fattr;
60 struct nfs2_fsstat fsinfo;
61 int status;
62
63 dprintk("%s: call getattr\n", __FUNCTION__);
64 fattr->valid = 0;
65 status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
66 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
67 if (status)
68 return status;
69 dprintk("%s: call statfs\n", __FUNCTION__);
70 status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
71 dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
72 if (status)
73 return status;
74 info->rtmax = NFS_MAXDATA;
75 info->rtpref = fsinfo.tsize;
76 info->rtmult = fsinfo.bsize;
77 info->wtmax = NFS_MAXDATA;
78 info->wtpref = fsinfo.tsize;
79 info->wtmult = fsinfo.bsize;
80 info->dtpref = fsinfo.tsize;
81 info->maxfilesize = 0x7FFFFFFF;
82 info->lease_time = 0;
83 return 0;
84}
85
86/*
87 * One function for each procedure in the NFS protocol.
88 */
89static int
90nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
91 struct nfs_fattr *fattr)
92{
93 int status;
94
95 dprintk("NFS call getattr\n");
96 fattr->valid = 0;
97 status = rpc_call(server->client, NFSPROC_GETATTR,
98 fhandle, fattr, 0);
99 dprintk("NFS reply getattr: %d\n", status);
100 return status;
101}
102
103static int
104nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
105 struct iattr *sattr)
106{
107 struct inode *inode = dentry->d_inode;
108 struct nfs_sattrargs arg = {
109 .fh = NFS_FH(inode),
110 .sattr = sattr
111 };
112 int status;
113
114 dprintk("NFS call setattr\n");
115 fattr->valid = 0;
116 status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
117 dprintk("NFS reply setattr: %d\n", status);
118 return status;
119}
120
121static int
122nfs_proc_lookup(struct inode *dir, struct qstr *name,
123 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
124{
125 struct nfs_diropargs arg = {
126 .fh = NFS_FH(dir),
127 .name = name->name,
128 .len = name->len
129 };
130 struct nfs_diropok res = {
131 .fh = fhandle,
132 .fattr = fattr
133 };
134 int status;
135
136 dprintk("NFS call lookup %s\n", name->name);
137 fattr->valid = 0;
138 status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
139 dprintk("NFS reply lookup: %d\n", status);
140 return status;
141}
142
143static int nfs_proc_readlink(struct inode *inode, struct page *page,
144 unsigned int pgbase, unsigned int pglen)
145{
146 struct nfs_readlinkargs args = {
147 .fh = NFS_FH(inode),
148 .pgbase = pgbase,
149 .pglen = pglen,
150 .pages = &page
151 };
152 int status;
153
154 dprintk("NFS call readlink\n");
155 status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
156 dprintk("NFS reply readlink: %d\n", status);
157 return status;
158}
159
160static int nfs_proc_read(struct nfs_read_data *rdata)
161{
162 int flags = rdata->flags;
163 struct inode * inode = rdata->inode;
164 struct nfs_fattr * fattr = rdata->res.fattr;
165 struct rpc_message msg = {
166 .rpc_proc = &nfs_procedures[NFSPROC_READ],
167 .rpc_argp = &rdata->args,
168 .rpc_resp = &rdata->res,
169 .rpc_cred = rdata->cred,
170 };
171 int status;
172
173 dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
174 (long long) rdata->args.offset);
175 fattr->valid = 0;
176 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
177 if (status >= 0) {
178 nfs_refresh_inode(inode, fattr);
179 /* Emulate the eof flag, which isn't normally needed in NFSv2
180 * as it is guaranteed to always return the file attributes
181 */
182 if (rdata->args.offset + rdata->args.count >= fattr->size)
183 rdata->res.eof = 1;
184 }
185 dprintk("NFS reply read: %d\n", status);
186 return status;
187}
188
189static int nfs_proc_write(struct nfs_write_data *wdata)
190{
191 int flags = wdata->flags;
192 struct inode * inode = wdata->inode;
193 struct nfs_fattr * fattr = wdata->res.fattr;
194 struct rpc_message msg = {
195 .rpc_proc = &nfs_procedures[NFSPROC_WRITE],
196 .rpc_argp = &wdata->args,
197 .rpc_resp = &wdata->res,
198 .rpc_cred = wdata->cred,
199 };
200 int status;
201
202 dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
203 (long long) wdata->args.offset);
204 fattr->valid = 0;
205 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
206 if (status >= 0) {
207 nfs_refresh_inode(inode, fattr);
208 wdata->res.count = wdata->args.count;
209 wdata->verf.committed = NFS_FILE_SYNC;
210 }
211 dprintk("NFS reply write: %d\n", status);
212 return status < 0? status : wdata->res.count;
213}
214
215static int
216nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
217 int flags)
218{
219 struct nfs_fh fhandle;
220 struct nfs_fattr fattr;
221 struct nfs_createargs arg = {
222 .fh = NFS_FH(dir),
223 .name = dentry->d_name.name,
224 .len = dentry->d_name.len,
225 .sattr = sattr
226 };
227 struct nfs_diropok res = {
228 .fh = &fhandle,
229 .fattr = &fattr
230 };
231 int status;
232
233 fattr.valid = 0;
234 dprintk("NFS call create %s\n", dentry->d_name.name);
235 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
236 if (status == 0)
237 status = nfs_instantiate(dentry, &fhandle, &fattr);
238 dprintk("NFS reply create: %d\n", status);
239 return status;
240}
241
242/*
243 * In NFSv2, mknod is grafted onto the create call.
244 */
245static int
246nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
247 dev_t rdev)
248{
249 struct nfs_fh fhandle;
250 struct nfs_fattr fattr;
251 struct nfs_createargs arg = {
252 .fh = NFS_FH(dir),
253 .name = dentry->d_name.name,
254 .len = dentry->d_name.len,
255 .sattr = sattr
256 };
257 struct nfs_diropok res = {
258 .fh = &fhandle,
259 .fattr = &fattr
260 };
261 int status, mode;
262
263 dprintk("NFS call mknod %s\n", dentry->d_name.name);
264
265 mode = sattr->ia_mode;
266 if (S_ISFIFO(mode)) {
267 sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR;
268 sattr->ia_valid &= ~ATTR_SIZE;
269 } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
270 sattr->ia_valid |= ATTR_SIZE;
271 sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
272 }
273
274 fattr.valid = 0;
275 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
276
277 if (status == -EINVAL && S_ISFIFO(mode)) {
278 sattr->ia_mode = mode;
279 fattr.valid = 0;
280 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
281 }
282 if (status == 0)
283 status = nfs_instantiate(dentry, &fhandle, &fattr);
284 dprintk("NFS reply mknod: %d\n", status);
285 return status;
286}
287
288static int
289nfs_proc_remove(struct inode *dir, struct qstr *name)
290{
291 struct nfs_diropargs arg = {
292 .fh = NFS_FH(dir),
293 .name = name->name,
294 .len = name->len
295 };
296 struct rpc_message msg = {
297 .rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
298 .rpc_argp = &arg,
299 .rpc_resp = NULL,
300 .rpc_cred = NULL
301 };
302 int status;
303
304 dprintk("NFS call remove %s\n", name->name);
305 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
306
307 dprintk("NFS reply remove: %d\n", status);
308 return status;
309}
310
311static int
312nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
313{
314 struct nfs_diropargs *arg;
315
316 arg = (struct nfs_diropargs *)kmalloc(sizeof(*arg), GFP_KERNEL);
317 if (!arg)
318 return -ENOMEM;
319 arg->fh = NFS_FH(dir->d_inode);
320 arg->name = name->name;
321 arg->len = name->len;
322 msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
323 msg->rpc_argp = arg;
324 return 0;
325}
326
327static int
328nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
329{
330 struct rpc_message *msg = &task->tk_msg;
331
332 if (msg->rpc_argp)
333 kfree(msg->rpc_argp);
334 return 0;
335}
336
337static int
338nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
339 struct inode *new_dir, struct qstr *new_name)
340{
341 struct nfs_renameargs arg = {
342 .fromfh = NFS_FH(old_dir),
343 .fromname = old_name->name,
344 .fromlen = old_name->len,
345 .tofh = NFS_FH(new_dir),
346 .toname = new_name->name,
347 .tolen = new_name->len
348 };
349 int status;
350
351 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
352 status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
353 dprintk("NFS reply rename: %d\n", status);
354 return status;
355}
356
357static int
358nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
359{
360 struct nfs_linkargs arg = {
361 .fromfh = NFS_FH(inode),
362 .tofh = NFS_FH(dir),
363 .toname = name->name,
364 .tolen = name->len
365 };
366 int status;
367
368 dprintk("NFS call link %s\n", name->name);
369 status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
370 dprintk("NFS reply link: %d\n", status);
371 return status;
372}
373
374static int
375nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
376 struct iattr *sattr, struct nfs_fh *fhandle,
377 struct nfs_fattr *fattr)
378{
379 struct nfs_symlinkargs arg = {
380 .fromfh = NFS_FH(dir),
381 .fromname = name->name,
382 .fromlen = name->len,
383 .topath = path->name,
384 .tolen = path->len,
385 .sattr = sattr
386 };
387 int status;
388
389 if (path->len > NFS2_MAXPATHLEN)
390 return -ENAMETOOLONG;
391 dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
392 fattr->valid = 0;
393 fhandle->size = 0;
394 status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
395 dprintk("NFS reply symlink: %d\n", status);
396 return status;
397}
398
399static int
400nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
401{
402 struct nfs_fh fhandle;
403 struct nfs_fattr fattr;
404 struct nfs_createargs arg = {
405 .fh = NFS_FH(dir),
406 .name = dentry->d_name.name,
407 .len = dentry->d_name.len,
408 .sattr = sattr
409 };
410 struct nfs_diropok res = {
411 .fh = &fhandle,
412 .fattr = &fattr
413 };
414 int status;
415
416 dprintk("NFS call mkdir %s\n", dentry->d_name.name);
417 fattr.valid = 0;
418 status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
419 if (status == 0)
420 status = nfs_instantiate(dentry, &fhandle, &fattr);
421 dprintk("NFS reply mkdir: %d\n", status);
422 return status;
423}
424
425static int
426nfs_proc_rmdir(struct inode *dir, struct qstr *name)
427{
428 struct nfs_diropargs arg = {
429 .fh = NFS_FH(dir),
430 .name = name->name,
431 .len = name->len
432 };
433 int status;
434
435 dprintk("NFS call rmdir %s\n", name->name);
436 status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
437 dprintk("NFS reply rmdir: %d\n", status);
438 return status;
439}
440
441/*
442 * The READDIR implementation is somewhat hackish - we pass a temporary
443 * buffer to the encode function, which installs it in the receive
444 * the receive iovec. The decode function just parses the reply to make
445 * sure it is syntactically correct; the entries itself are decoded
446 * from nfs_readdir by calling the decode_entry function directly.
447 */
448static int
449nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
450 u64 cookie, struct page *page, unsigned int count, int plus)
451{
452 struct inode *dir = dentry->d_inode;
453 struct nfs_readdirargs arg = {
454 .fh = NFS_FH(dir),
455 .cookie = cookie,
456 .count = count,
457 .pages = &page
458 };
459 struct rpc_message msg = {
460 .rpc_proc = &nfs_procedures[NFSPROC_READDIR],
461 .rpc_argp = &arg,
462 .rpc_resp = NULL,
463 .rpc_cred = cred
464 };
465 int status;
466
467 lock_kernel();
468
469 dprintk("NFS call readdir %d\n", (unsigned int)cookie);
470 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
471
472 dprintk("NFS reply readdir: %d\n", status);
473 unlock_kernel();
474 return status;
475}
476
477static int
478nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
479 struct nfs_fsstat *stat)
480{
481 struct nfs2_fsstat fsinfo;
482 int status;
483
484 dprintk("NFS call statfs\n");
485 stat->fattr->valid = 0;
486 status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
487 dprintk("NFS reply statfs: %d\n", status);
488 if (status)
489 goto out;
490 stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize;
491 stat->fbytes = (u64)fsinfo.bfree * fsinfo.bsize;
492 stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize;
493 stat->tfiles = 0;
494 stat->ffiles = 0;
495 stat->afiles = 0;
496out:
497 return status;
498}
499
500static int
501nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
502 struct nfs_fsinfo *info)
503{
504 struct nfs2_fsstat fsinfo;
505 int status;
506
507 dprintk("NFS call fsinfo\n");
508 info->fattr->valid = 0;
509 status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
510 dprintk("NFS reply fsinfo: %d\n", status);
511 if (status)
512 goto out;
513 info->rtmax = NFS_MAXDATA;
514 info->rtpref = fsinfo.tsize;
515 info->rtmult = fsinfo.bsize;
516 info->wtmax = NFS_MAXDATA;
517 info->wtpref = fsinfo.tsize;
518 info->wtmult = fsinfo.bsize;
519 info->dtpref = fsinfo.tsize;
520 info->maxfilesize = 0x7FFFFFFF;
521 info->lease_time = 0;
522out:
523 return status;
524}
525
526static int
527nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
528 struct nfs_pathconf *info)
529{
530 info->max_link = 0;
531 info->max_namelen = NFS2_MAXNAMLEN;
532 return 0;
533}
534
535extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
536
537static void
538nfs_read_done(struct rpc_task *task)
539{
540 struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
541
542 if (task->tk_status >= 0) {
543 nfs_refresh_inode(data->inode, data->res.fattr);
544 /* Emulate the eof flag, which isn't normally needed in NFSv2
545 * as it is guaranteed to always return the file attributes
546 */
547 if (data->args.offset + data->args.count >= data->res.fattr->size)
548 data->res.eof = 1;
549 }
550 nfs_readpage_result(task);
551}
552
553static void
554nfs_proc_read_setup(struct nfs_read_data *data)
555{
556 struct rpc_task *task = &data->task;
557 struct inode *inode = data->inode;
558 int flags;
559 struct rpc_message msg = {
560 .rpc_proc = &nfs_procedures[NFSPROC_READ],
561 .rpc_argp = &data->args,
562 .rpc_resp = &data->res,
563 .rpc_cred = data->cred,
564 };
565
566 /* N.B. Do we need to test? Never called for swapfile inode */
567 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
568
569 /* Finalize the task. */
570 rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags);
571 rpc_call_setup(task, &msg, 0);
572}
573
574static void
575nfs_write_done(struct rpc_task *task)
576{
577 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
578
579 if (task->tk_status >= 0)
580 nfs_refresh_inode(data->inode, data->res.fattr);
581 nfs_writeback_done(task);
582}
583
584static void
585nfs_proc_write_setup(struct nfs_write_data *data, int how)
586{
587 struct rpc_task *task = &data->task;
588 struct inode *inode = data->inode;
589 int flags;
590 struct rpc_message msg = {
591 .rpc_proc = &nfs_procedures[NFSPROC_WRITE],
592 .rpc_argp = &data->args,
593 .rpc_resp = &data->res,
594 .rpc_cred = data->cred,
595 };
596
597 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
598 data->args.stable = NFS_FILE_SYNC;
599
600 /* Set the initial flags for the task. */
601 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
602
603 /* Finalize the task. */
604 rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags);
605 rpc_call_setup(task, &msg, 0);
606}
607
608static void
609nfs_proc_commit_setup(struct nfs_write_data *data, int how)
610{
611 BUG();
612}
613
614static int
615nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
616{
617 return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
618}
619
620
621struct nfs_rpc_ops nfs_v2_clientops = {
622 .version = 2, /* protocol version */
623 .dentry_ops = &nfs_dentry_operations,
624 .dir_inode_ops = &nfs_dir_inode_operations,
625 .getroot = nfs_proc_get_root,
626 .getattr = nfs_proc_getattr,
627 .setattr = nfs_proc_setattr,
628 .lookup = nfs_proc_lookup,
629 .access = NULL, /* access */
630 .readlink = nfs_proc_readlink,
631 .read = nfs_proc_read,
632 .write = nfs_proc_write,
633 .commit = NULL, /* commit */
634 .create = nfs_proc_create,
635 .remove = nfs_proc_remove,
636 .unlink_setup = nfs_proc_unlink_setup,
637 .unlink_done = nfs_proc_unlink_done,
638 .rename = nfs_proc_rename,
639 .link = nfs_proc_link,
640 .symlink = nfs_proc_symlink,
641 .mkdir = nfs_proc_mkdir,
642 .rmdir = nfs_proc_rmdir,
643 .readdir = nfs_proc_readdir,
644 .mknod = nfs_proc_mknod,
645 .statfs = nfs_proc_statfs,
646 .fsinfo = nfs_proc_fsinfo,
647 .pathconf = nfs_proc_pathconf,
648 .decode_dirent = nfs_decode_dirent,
649 .read_setup = nfs_proc_read_setup,
650 .write_setup = nfs_proc_write_setup,
651 .commit_setup = nfs_proc_commit_setup,
652 .file_open = nfs_open,
653 .file_release = nfs_release,
654 .lock = nfs_proc_lock,
655};
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
new file mode 100644
index 000000000000..a0042fb58634
--- /dev/null
+++ b/fs/nfs/read.c
@@ -0,0 +1,618 @@
1/*
2 * linux/fs/nfs/read.c
3 *
4 * Block I/O for NFS
5 *
6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7 * modified for async RPC by okir@monad.swb.de
8 *
9 * We do an ugly hack here in order to return proper error codes to the
10 * user program when a read request failed: since generic_file_read
11 * only checks the return value of inode->i_op->readpage() which is always 0
12 * for async RPC, we set the error bit of the page to 1 when an error occurs,
13 * and make nfs_readpage transmit requests synchronously when encountering this.
14 * This is only a small problem, though, since we now retry all operations
15 * within the RPC code when root squashing is suspected.
16 */
17
18#include <linux/config.h>
19#include <linux/time.h>
20#include <linux/kernel.h>
21#include <linux/errno.h>
22#include <linux/fcntl.h>
23#include <linux/stat.h>
24#include <linux/mm.h>
25#include <linux/slab.h>
26#include <linux/pagemap.h>
27#include <linux/sunrpc/clnt.h>
28#include <linux/nfs_fs.h>
29#include <linux/nfs_page.h>
30#include <linux/smp_lock.h>
31
32#include <asm/system.h>
33
34#define NFSDBG_FACILITY NFSDBG_PAGECACHE
35
36static int nfs_pagein_one(struct list_head *, struct inode *);
37static void nfs_readpage_result_partial(struct nfs_read_data *, int);
38static void nfs_readpage_result_full(struct nfs_read_data *, int);
39
40static kmem_cache_t *nfs_rdata_cachep;
41mempool_t *nfs_rdata_mempool;
42
43#define MIN_POOL_READ (32)
44
45void nfs_readdata_release(struct rpc_task *task)
46{
47 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
48 nfs_readdata_free(data);
49}
50
51static
52unsigned int nfs_page_length(struct inode *inode, struct page *page)
53{
54 loff_t i_size = i_size_read(inode);
55 unsigned long idx;
56
57 if (i_size <= 0)
58 return 0;
59 idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
60 if (page->index > idx)
61 return 0;
62 if (page->index != idx)
63 return PAGE_CACHE_SIZE;
64 return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
65}
66
67static
68int nfs_return_empty_page(struct page *page)
69{
70 memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE);
71 SetPageUptodate(page);
72 unlock_page(page);
73 return 0;
74}
75
76/*
77 * Read a page synchronously.
78 */
79static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
80 struct page *page)
81{
82 unsigned int rsize = NFS_SERVER(inode)->rsize;
83 unsigned int count = PAGE_CACHE_SIZE;
84 int result;
85 struct nfs_read_data *rdata;
86
87 rdata = nfs_readdata_alloc();
88 if (!rdata)
89 return -ENOMEM;
90
91 memset(rdata, 0, sizeof(*rdata));
92 rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
93 rdata->cred = ctx->cred;
94 rdata->inode = inode;
95 INIT_LIST_HEAD(&rdata->pages);
96 rdata->args.fh = NFS_FH(inode);
97 rdata->args.context = ctx;
98 rdata->args.pages = &page;
99 rdata->args.pgbase = 0UL;
100 rdata->args.count = rsize;
101 rdata->res.fattr = &rdata->fattr;
102
103 dprintk("NFS: nfs_readpage_sync(%p)\n", page);
104
105 /*
106 * This works now because the socket layer never tries to DMA
107 * into this buffer directly.
108 */
109 do {
110 if (count < rsize)
111 rdata->args.count = count;
112 rdata->res.count = rdata->args.count;
113 rdata->args.offset = page_offset(page) + rdata->args.pgbase;
114
115 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
116 NFS_SERVER(inode)->hostname,
117 inode->i_sb->s_id,
118 (long long)NFS_FILEID(inode),
119 (unsigned long long)rdata->args.pgbase,
120 rdata->args.count);
121
122 lock_kernel();
123 result = NFS_PROTO(inode)->read(rdata);
124 unlock_kernel();
125
126 /*
127 * Even if we had a partial success we can't mark the page
128 * cache valid.
129 */
130 if (result < 0) {
131 if (result == -EISDIR)
132 result = -EINVAL;
133 goto io_error;
134 }
135 count -= result;
136 rdata->args.pgbase += result;
137 /* Note: result == 0 should only happen if we're caching
138 * a write that extends the file and punches a hole.
139 */
140 if (rdata->res.eof != 0 || result == 0)
141 break;
142 } while (count);
143 NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
144
145 if (count)
146 memclear_highpage_flush(page, rdata->args.pgbase, count);
147 SetPageUptodate(page);
148 if (PageError(page))
149 ClearPageError(page);
150 result = 0;
151
152io_error:
153 unlock_page(page);
154 nfs_readdata_free(rdata);
155 return result;
156}
157
158static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
159 struct page *page)
160{
161 LIST_HEAD(one_request);
162 struct nfs_page *new;
163 unsigned int len;
164
165 len = nfs_page_length(inode, page);
166 if (len == 0)
167 return nfs_return_empty_page(page);
168 new = nfs_create_request(ctx, inode, page, 0, len);
169 if (IS_ERR(new)) {
170 unlock_page(page);
171 return PTR_ERR(new);
172 }
173 if (len < PAGE_CACHE_SIZE)
174 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
175
176 nfs_lock_request(new);
177 nfs_list_add_request(new, &one_request);
178 nfs_pagein_one(&one_request, inode);
179 return 0;
180}
181
182static void nfs_readpage_release(struct nfs_page *req)
183{
184 unlock_page(req->wb_page);
185
186 nfs_clear_request(req);
187 nfs_release_request(req);
188 nfs_unlock_request(req);
189
190 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
191 req->wb_context->dentry->d_inode->i_sb->s_id,
192 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
193 req->wb_bytes,
194 (long long)req_offset(req));
195}
196
197/*
198 * Set up the NFS read request struct
199 */
200static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
201 unsigned int count, unsigned int offset)
202{
203 struct inode *inode;
204
205 data->req = req;
206 data->inode = inode = req->wb_context->dentry->d_inode;
207 data->cred = req->wb_context->cred;
208
209 data->args.fh = NFS_FH(inode);
210 data->args.offset = req_offset(req) + offset;
211 data->args.pgbase = req->wb_pgbase + offset;
212 data->args.pages = data->pagevec;
213 data->args.count = count;
214 data->args.context = req->wb_context;
215
216 data->res.fattr = &data->fattr;
217 data->res.count = count;
218 data->res.eof = 0;
219
220 NFS_PROTO(inode)->read_setup(data);
221
222 data->task.tk_cookie = (unsigned long)inode;
223 data->task.tk_calldata = data;
224 /* Release requests */
225 data->task.tk_release = nfs_readdata_release;
226
227 dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
228 data->task.tk_pid,
229 inode->i_sb->s_id,
230 (long long)NFS_FILEID(inode),
231 count,
232 (unsigned long long)data->args.offset);
233}
234
235static void
236nfs_async_read_error(struct list_head *head)
237{
238 struct nfs_page *req;
239
240 while (!list_empty(head)) {
241 req = nfs_list_entry(head->next);
242 nfs_list_remove_request(req);
243 SetPageError(req->wb_page);
244 nfs_readpage_release(req);
245 }
246}
247
248/*
249 * Start an async read operation
250 */
251static void nfs_execute_read(struct nfs_read_data *data)
252{
253 struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
254 sigset_t oldset;
255
256 rpc_clnt_sigmask(clnt, &oldset);
257 lock_kernel();
258 rpc_execute(&data->task);
259 unlock_kernel();
260 rpc_clnt_sigunmask(clnt, &oldset);
261}
262
263/*
264 * Generate multiple requests to fill a single page.
265 *
266 * We optimize to reduce the number of read operations on the wire. If we
267 * detect that we're reading a page, or an area of a page, that is past the
268 * end of file, we do not generate NFS read operations but just clear the
269 * parts of the page that would have come back zero from the server anyway.
270 *
271 * We rely on the cached value of i_size to make this determination; another
272 * client can fill pages on the server past our cached end-of-file, but we
273 * won't see the new data until our attribute cache is updated. This is more
274 * or less conventional NFS client behavior.
275 */
276static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
277{
278 struct nfs_page *req = nfs_list_entry(head->next);
279 struct page *page = req->wb_page;
280 struct nfs_read_data *data;
281 unsigned int rsize = NFS_SERVER(inode)->rsize;
282 unsigned int nbytes, offset;
283 int requests = 0;
284 LIST_HEAD(list);
285
286 nfs_list_remove_request(req);
287
288 nbytes = req->wb_bytes;
289 for(;;) {
290 data = nfs_readdata_alloc();
291 if (!data)
292 goto out_bad;
293 INIT_LIST_HEAD(&data->pages);
294 list_add(&data->pages, &list);
295 requests++;
296 if (nbytes <= rsize)
297 break;
298 nbytes -= rsize;
299 }
300 atomic_set(&req->wb_complete, requests);
301
302 ClearPageError(page);
303 offset = 0;
304 nbytes = req->wb_bytes;
305 do {
306 data = list_entry(list.next, struct nfs_read_data, pages);
307 list_del_init(&data->pages);
308
309 data->pagevec[0] = page;
310 data->complete = nfs_readpage_result_partial;
311
312 if (nbytes > rsize) {
313 nfs_read_rpcsetup(req, data, rsize, offset);
314 offset += rsize;
315 nbytes -= rsize;
316 } else {
317 nfs_read_rpcsetup(req, data, nbytes, offset);
318 nbytes = 0;
319 }
320 nfs_execute_read(data);
321 } while (nbytes != 0);
322
323 return 0;
324
325out_bad:
326 while (!list_empty(&list)) {
327 data = list_entry(list.next, struct nfs_read_data, pages);
328 list_del(&data->pages);
329 nfs_readdata_free(data);
330 }
331 SetPageError(page);
332 nfs_readpage_release(req);
333 return -ENOMEM;
334}
335
336static int nfs_pagein_one(struct list_head *head, struct inode *inode)
337{
338 struct nfs_page *req;
339 struct page **pages;
340 struct nfs_read_data *data;
341 unsigned int count;
342
343 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
344 return nfs_pagein_multi(head, inode);
345
346 data = nfs_readdata_alloc();
347 if (!data)
348 goto out_bad;
349
350 INIT_LIST_HEAD(&data->pages);
351 pages = data->pagevec;
352 count = 0;
353 while (!list_empty(head)) {
354 req = nfs_list_entry(head->next);
355 nfs_list_remove_request(req);
356 nfs_list_add_request(req, &data->pages);
357 ClearPageError(req->wb_page);
358 *pages++ = req->wb_page;
359 count += req->wb_bytes;
360 }
361 req = nfs_list_entry(data->pages.next);
362
363 data->complete = nfs_readpage_result_full;
364 nfs_read_rpcsetup(req, data, count, 0);
365
366 nfs_execute_read(data);
367 return 0;
368out_bad:
369 nfs_async_read_error(head);
370 return -ENOMEM;
371}
372
373static int
374nfs_pagein_list(struct list_head *head, int rpages)
375{
376 LIST_HEAD(one_request);
377 struct nfs_page *req;
378 int error = 0;
379 unsigned int pages = 0;
380
381 while (!list_empty(head)) {
382 pages += nfs_coalesce_requests(head, &one_request, rpages);
383 req = nfs_list_entry(one_request.next);
384 error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
385 if (error < 0)
386 break;
387 }
388 if (error >= 0)
389 return pages;
390
391 nfs_async_read_error(head);
392 return error;
393}
394
395/*
396 * Handle a read reply that fills part of a page.
397 */
398static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
399{
400 struct nfs_page *req = data->req;
401 struct page *page = req->wb_page;
402
403 if (status >= 0) {
404 unsigned int request = data->args.count;
405 unsigned int result = data->res.count;
406
407 if (result < request) {
408 memclear_highpage_flush(page,
409 data->args.pgbase + result,
410 request - result);
411 }
412 } else
413 SetPageError(page);
414
415 if (atomic_dec_and_test(&req->wb_complete)) {
416 if (!PageError(page))
417 SetPageUptodate(page);
418 nfs_readpage_release(req);
419 }
420}
421
422/*
423 * This is the callback from RPC telling us whether a reply was
424 * received or some error occurred (timeout or socket shutdown).
425 */
426static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
427{
428 unsigned int count = data->res.count;
429
430 while (!list_empty(&data->pages)) {
431 struct nfs_page *req = nfs_list_entry(data->pages.next);
432 struct page *page = req->wb_page;
433 nfs_list_remove_request(req);
434
435 if (status >= 0) {
436 if (count < PAGE_CACHE_SIZE) {
437 if (count < req->wb_bytes)
438 memclear_highpage_flush(page,
439 req->wb_pgbase + count,
440 req->wb_bytes - count);
441 count = 0;
442 } else
443 count -= PAGE_CACHE_SIZE;
444 SetPageUptodate(page);
445 } else
446 SetPageError(page);
447 nfs_readpage_release(req);
448 }
449}
450
451/*
452 * This is the callback from RPC telling us whether a reply was
453 * received or some error occurred (timeout or socket shutdown).
454 */
455void nfs_readpage_result(struct rpc_task *task)
456{
457 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
458 struct nfs_readargs *argp = &data->args;
459 struct nfs_readres *resp = &data->res;
460 int status = task->tk_status;
461
462 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
463 task->tk_pid, status);
464
465 /* Is this a short read? */
466 if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
467 /* Has the server at least made some progress? */
468 if (resp->count != 0) {
469 /* Yes, so retry the read at the end of the data */
470 argp->offset += resp->count;
471 argp->pgbase += resp->count;
472 argp->count -= resp->count;
473 rpc_restart_call(task);
474 return;
475 }
476 task->tk_status = -EIO;
477 }
478 NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
479 data->complete(data, status);
480}
481
482/*
483 * Read a page over NFS.
484 * We read the page synchronously in the following case:
485 * - The error flag is set for this page. This happens only when a
486 * previous async read operation failed.
487 */
488int nfs_readpage(struct file *file, struct page *page)
489{
490 struct nfs_open_context *ctx;
491 struct inode *inode = page->mapping->host;
492 int error;
493
494 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
495 page, PAGE_CACHE_SIZE, page->index);
496 /*
497 * Try to flush any pending writes to the file..
498 *
499 * NOTE! Because we own the page lock, there cannot
500 * be any new pending writes generated at this point
501 * for this page (other pages can be written to).
502 */
503 error = nfs_wb_page(inode, page);
504 if (error)
505 goto out_error;
506
507 if (file == NULL) {
508 ctx = nfs_find_open_context(inode, FMODE_READ);
509 if (ctx == NULL)
510 return -EBADF;
511 } else
512 ctx = get_nfs_open_context((struct nfs_open_context *)
513 file->private_data);
514 if (!IS_SYNC(inode)) {
515 error = nfs_readpage_async(ctx, inode, page);
516 goto out;
517 }
518
519 error = nfs_readpage_sync(ctx, inode, page);
520 if (error < 0 && IS_SWAPFILE(inode))
521 printk("Aiee.. nfs swap-in of page failed!\n");
522out:
523 put_nfs_open_context(ctx);
524 return error;
525
526out_error:
527 unlock_page(page);
528 return error;
529}
530
531struct nfs_readdesc {
532 struct list_head *head;
533 struct nfs_open_context *ctx;
534};
535
536static int
537readpage_async_filler(void *data, struct page *page)
538{
539 struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
540 struct inode *inode = page->mapping->host;
541 struct nfs_page *new;
542 unsigned int len;
543
544 nfs_wb_page(inode, page);
545 len = nfs_page_length(inode, page);
546 if (len == 0)
547 return nfs_return_empty_page(page);
548 new = nfs_create_request(desc->ctx, inode, page, 0, len);
549 if (IS_ERR(new)) {
550 SetPageError(page);
551 unlock_page(page);
552 return PTR_ERR(new);
553 }
554 if (len < PAGE_CACHE_SIZE)
555 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
556 nfs_lock_request(new);
557 nfs_list_add_request(new, desc->head);
558 return 0;
559}
560
561int nfs_readpages(struct file *filp, struct address_space *mapping,
562 struct list_head *pages, unsigned nr_pages)
563{
564 LIST_HEAD(head);
565 struct nfs_readdesc desc = {
566 .head = &head,
567 };
568 struct inode *inode = mapping->host;
569 struct nfs_server *server = NFS_SERVER(inode);
570 int ret;
571
572 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
573 inode->i_sb->s_id,
574 (long long)NFS_FILEID(inode),
575 nr_pages);
576
577 if (filp == NULL) {
578 desc.ctx = nfs_find_open_context(inode, FMODE_READ);
579 if (desc.ctx == NULL)
580 return -EBADF;
581 } else
582 desc.ctx = get_nfs_open_context((struct nfs_open_context *)
583 filp->private_data);
584 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
585 if (!list_empty(&head)) {
586 int err = nfs_pagein_list(&head, server->rpages);
587 if (!ret)
588 ret = err;
589 }
590 put_nfs_open_context(desc.ctx);
591 return ret;
592}
593
594int nfs_init_readpagecache(void)
595{
596 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
597 sizeof(struct nfs_read_data),
598 0, SLAB_HWCACHE_ALIGN,
599 NULL, NULL);
600 if (nfs_rdata_cachep == NULL)
601 return -ENOMEM;
602
603 nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
604 mempool_alloc_slab,
605 mempool_free_slab,
606 nfs_rdata_cachep);
607 if (nfs_rdata_mempool == NULL)
608 return -ENOMEM;
609
610 return 0;
611}
612
613void nfs_destroy_readpagecache(void)
614{
615 mempool_destroy(nfs_rdata_mempool);
616 if (kmem_cache_destroy(nfs_rdata_cachep))
617 printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
618}
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
new file mode 100644
index 000000000000..35f106599144
--- /dev/null
+++ b/fs/nfs/symlink.c
@@ -0,0 +1,117 @@
1/*
2 * linux/fs/nfs/symlink.c
3 *
4 * Copyright (C) 1992 Rick Sladkey
5 *
6 * Optimization changes Copyright (C) 1994 Florian La Roche
7 *
8 * Jun 7 1999, cache symlink lookups in the page cache. -DaveM
9 *
10 * nfs symlink handling code
11 */
12
13#define NFS_NEED_XDR_TYPES
14#include <linux/time.h>
15#include <linux/errno.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/nfs.h>
18#include <linux/nfs2.h>
19#include <linux/nfs_fs.h>
20#include <linux/pagemap.h>
21#include <linux/stat.h>
22#include <linux/mm.h>
23#include <linux/slab.h>
24#include <linux/string.h>
25#include <linux/smp_lock.h>
26#include <linux/namei.h>
27
28/* Symlink caching in the page cache is even more simplistic
29 * and straight-forward than readdir caching.
30 *
31 * At the beginning of the page we store pointer to struct page in question,
32 * simplifying nfs_put_link() (if inode got invalidated we can't find the page
33 * to be freed via pagecache lookup).
34 * The NUL-terminated string follows immediately thereafter.
35 */
36
37struct nfs_symlink {
38 struct page *page;
39 char body[0];
40};
41
42static int nfs_symlink_filler(struct inode *inode, struct page *page)
43{
44 const unsigned int pgbase = offsetof(struct nfs_symlink, body);
45 const unsigned int pglen = PAGE_SIZE - pgbase;
46 int error;
47
48 lock_kernel();
49 error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen);
50 unlock_kernel();
51 if (error < 0)
52 goto error;
53 SetPageUptodate(page);
54 unlock_page(page);
55 return 0;
56
57error:
58 SetPageError(page);
59 unlock_page(page);
60 return -EIO;
61}
62
63static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
64{
65 struct inode *inode = dentry->d_inode;
66 struct page *page;
67 struct nfs_symlink *p;
68 void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode));
69 if (err)
70 goto read_failed;
71 page = read_cache_page(&inode->i_data, 0,
72 (filler_t *)nfs_symlink_filler, inode);
73 if (IS_ERR(page)) {
74 err = page;
75 goto read_failed;
76 }
77 if (!PageUptodate(page)) {
78 err = ERR_PTR(-EIO);
79 goto getlink_read_error;
80 }
81 p = kmap(page);
82 p->page = page;
83 nd_set_link(nd, p->body);
84 return 0;
85
86getlink_read_error:
87 page_cache_release(page);
88read_failed:
89 nd_set_link(nd, err);
90 return 0;
91}
92
93static void nfs_put_link(struct dentry *dentry, struct nameidata *nd)
94{
95 char *s = nd_get_link(nd);
96 if (!IS_ERR(s)) {
97 struct nfs_symlink *p;
98 struct page *page;
99
100 p = container_of(s, struct nfs_symlink, body[0]);
101 page = p->page;
102
103 kunmap(page);
104 page_cache_release(page);
105 }
106}
107
108/*
109 * symlinks can't do much...
110 */
111struct inode_operations nfs_symlink_inode_operations = {
112 .readlink = generic_readlink,
113 .follow_link = nfs_follow_link,
114 .put_link = nfs_put_link,
115 .getattr = nfs_getattr,
116 .setattr = nfs_setattr,
117};
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
new file mode 100644
index 000000000000..f732541a3332
--- /dev/null
+++ b/fs/nfs/unlink.c
@@ -0,0 +1,227 @@
1/*
2 * linux/fs/nfs/unlink.c
3 *
4 * nfs sillydelete handling
5 *
6 * NOTE: we rely on holding the BKL for list manipulation protection.
7 */
8
9#include <linux/slab.h>
10#include <linux/string.h>
11#include <linux/dcache.h>
12#include <linux/sunrpc/sched.h>
13#include <linux/sunrpc/clnt.h>
14#include <linux/nfs_fs.h>
15
16
17struct nfs_unlinkdata {
18 struct nfs_unlinkdata *next;
19 struct dentry *dir, *dentry;
20 struct qstr name;
21 struct rpc_task task;
22 struct rpc_cred *cred;
23 unsigned int count;
24};
25
26static struct nfs_unlinkdata *nfs_deletes;
27static RPC_WAITQ(nfs_delete_queue, "nfs_delete_queue");
28
29/**
30 * nfs_detach_unlinkdata - Remove asynchronous unlink from global list
31 * @data: pointer to descriptor
32 */
33static inline void
34nfs_detach_unlinkdata(struct nfs_unlinkdata *data)
35{
36 struct nfs_unlinkdata **q;
37
38 for (q = &nfs_deletes; *q != NULL; q = &((*q)->next)) {
39 if (*q == data) {
40 *q = data->next;
41 break;
42 }
43 }
44}
45
46/**
47 * nfs_put_unlinkdata - release data from a sillydelete operation.
48 * @data: pointer to unlink structure.
49 */
50static void
51nfs_put_unlinkdata(struct nfs_unlinkdata *data)
52{
53 if (--data->count == 0) {
54 nfs_detach_unlinkdata(data);
55 if (data->name.name != NULL)
56 kfree(data->name.name);
57 kfree(data);
58 }
59}
60
61#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
62/**
63 * nfs_copy_dname - copy dentry name to data structure
64 * @dentry: pointer to dentry
65 * @data: nfs_unlinkdata
66 */
67static inline void
68nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
69{
70 char *str;
71 int len = dentry->d_name.len;
72
73 str = kmalloc(NAME_ALLOC_LEN(len), GFP_KERNEL);
74 if (!str)
75 return;
76 memcpy(str, dentry->d_name.name, len);
77 if (!data->name.len) {
78 data->name.len = len;
79 data->name.name = str;
80 } else
81 kfree(str);
82}
83
84/**
85 * nfs_async_unlink_init - Initialize the RPC info
86 * @task: rpc_task of the sillydelete
87 *
88 * We delay initializing RPC info until after the call to dentry_iput()
89 * in order to minimize races against rename().
90 */
91static void
92nfs_async_unlink_init(struct rpc_task *task)
93{
94 struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata;
95 struct dentry *dir = data->dir;
96 struct rpc_message msg = {
97 .rpc_cred = data->cred,
98 };
99 int status = -ENOENT;
100
101 if (!data->name.len)
102 goto out_err;
103
104 status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name);
105 if (status < 0)
106 goto out_err;
107 nfs_begin_data_update(dir->d_inode);
108 rpc_call_setup(task, &msg, 0);
109 return;
110 out_err:
111 rpc_exit(task, status);
112}
113
114/**
115 * nfs_async_unlink_done - Sillydelete post-processing
116 * @task: rpc_task of the sillydelete
117 *
118 * Do the directory attribute update.
119 */
120static void
121nfs_async_unlink_done(struct rpc_task *task)
122{
123 struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata;
124 struct dentry *dir = data->dir;
125 struct inode *dir_i;
126
127 if (!dir)
128 return;
129 dir_i = dir->d_inode;
130 nfs_end_data_update(dir_i);
131 if (NFS_PROTO(dir_i)->unlink_done(dir, task))
132 return;
133 put_rpccred(data->cred);
134 data->cred = NULL;
135 dput(dir);
136}
137
138/**
139 * nfs_async_unlink_release - Release the sillydelete data.
140 * @task: rpc_task of the sillydelete
141 *
142 * We need to call nfs_put_unlinkdata as a 'tk_release' task since the
143 * rpc_task would be freed too.
144 */
145static void
146nfs_async_unlink_release(struct rpc_task *task)
147{
148 struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata;
149 nfs_put_unlinkdata(data);
150}
151
152/**
153 * nfs_async_unlink - asynchronous unlinking of a file
154 * @dentry: dentry to unlink
155 */
156int
157nfs_async_unlink(struct dentry *dentry)
158{
159 struct dentry *dir = dentry->d_parent;
160 struct nfs_unlinkdata *data;
161 struct rpc_task *task;
162 struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode);
163 int status = -ENOMEM;
164
165 data = kmalloc(sizeof(*data), GFP_KERNEL);
166 if (!data)
167 goto out;
168 memset(data, 0, sizeof(*data));
169
170 data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
171 if (IS_ERR(data->cred)) {
172 status = PTR_ERR(data->cred);
173 goto out_free;
174 }
175 data->dir = dget(dir);
176 data->dentry = dentry;
177
178 data->next = nfs_deletes;
179 nfs_deletes = data;
180 data->count = 1;
181
182 task = &data->task;
183 rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC);
184 task->tk_calldata = data;
185 task->tk_action = nfs_async_unlink_init;
186 task->tk_release = nfs_async_unlink_release;
187
188 spin_lock(&dentry->d_lock);
189 dentry->d_flags |= DCACHE_NFSFS_RENAMED;
190 spin_unlock(&dentry->d_lock);
191
192 rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL);
193 status = 0;
194 out:
195 return status;
196out_free:
197 kfree(data);
198 return status;
199}
200
201/**
202 * nfs_complete_unlink - Initialize completion of the sillydelete
203 * @dentry: dentry to delete
204 *
205 * Since we're most likely to be called by dentry_iput(), we
206 * only use the dentry to find the sillydelete. We then copy the name
207 * into the qstr.
208 */
209void
210nfs_complete_unlink(struct dentry *dentry)
211{
212 struct nfs_unlinkdata *data;
213
214 for(data = nfs_deletes; data != NULL; data = data->next) {
215 if (dentry == data->dentry)
216 break;
217 }
218 if (!data)
219 return;
220 data->count++;
221 nfs_copy_dname(dentry, data);
222 spin_lock(&dentry->d_lock);
223 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
224 spin_unlock(&dentry->d_lock);
225 rpc_wake_up_task(&data->task);
226 nfs_put_unlinkdata(data);
227}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
new file mode 100644
index 000000000000..6f7a4af3bc46
--- /dev/null
+++ b/fs/nfs/write.c
@@ -0,0 +1,1431 @@
1/*
2 * linux/fs/nfs/write.c
3 *
4 * Writing file data over NFS.
5 *
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
12 *
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
14 *
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
20 *
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
24 *
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
27 * cases:
28 *
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
32 *
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
35 *
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
40 *
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
45 *
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
47 */
48
49#include <linux/config.h>
50#include <linux/types.h>
51#include <linux/slab.h>
52#include <linux/mm.h>
53#include <linux/pagemap.h>
54#include <linux/file.h>
55#include <linux/mpage.h>
56#include <linux/writeback.h>
57
58#include <linux/sunrpc/clnt.h>
59#include <linux/nfs_fs.h>
60#include <linux/nfs_mount.h>
61#include <linux/nfs_page.h>
62#include <asm/uaccess.h>
63#include <linux/smp_lock.h>
64
65#include "delegation.h"
66
67#define NFSDBG_FACILITY NFSDBG_PAGECACHE
68
69#define MIN_POOL_WRITE (32)
70#define MIN_POOL_COMMIT (4)
71
72/*
73 * Local function declarations
74 */
75static struct nfs_page * nfs_update_request(struct nfs_open_context*,
76 struct inode *,
77 struct page *,
78 unsigned int, unsigned int);
79static void nfs_writeback_done_partial(struct nfs_write_data *, int);
80static void nfs_writeback_done_full(struct nfs_write_data *, int);
81static int nfs_wait_on_write_congestion(struct address_space *, int);
82static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
83static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
84 unsigned int npages, int how);
85
86static kmem_cache_t *nfs_wdata_cachep;
87mempool_t *nfs_wdata_mempool;
88static mempool_t *nfs_commit_mempool;
89
90static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
91
92static inline struct nfs_write_data *nfs_commit_alloc(void)
93{
94 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
95 if (p) {
96 memset(p, 0, sizeof(*p));
97 INIT_LIST_HEAD(&p->pages);
98 }
99 return p;
100}
101
102static inline void nfs_commit_free(struct nfs_write_data *p)
103{
104 mempool_free(p, nfs_commit_mempool);
105}
106
107static void nfs_writedata_release(struct rpc_task *task)
108{
109 struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
110 nfs_writedata_free(wdata);
111}
112
113/* Adjust the file length if we're writing beyond the end */
114static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
115{
116 struct inode *inode = page->mapping->host;
117 loff_t end, i_size = i_size_read(inode);
118 unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
119
120 if (i_size > 0 && page->index < end_index)
121 return;
122 end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
123 if (i_size >= end)
124 return;
125 i_size_write(inode, end);
126}
127
128/* We can set the PG_uptodate flag if we see that a write request
129 * covers the full page.
130 */
131static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
132{
133 loff_t end_offs;
134
135 if (PageUptodate(page))
136 return;
137 if (base != 0)
138 return;
139 if (count == PAGE_CACHE_SIZE) {
140 SetPageUptodate(page);
141 return;
142 }
143
144 end_offs = i_size_read(page->mapping->host) - 1;
145 if (end_offs < 0)
146 return;
147 /* Is this the last page? */
148 if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
149 return;
150 /* This is the last page: set PG_uptodate if we cover the entire
151 * extent of the data, then zero the rest of the page.
152 */
153 if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) {
154 memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count);
155 SetPageUptodate(page);
156 }
157}
158
159/*
160 * Write a page synchronously.
161 * Offset is the data offset within the page.
162 */
163static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
164 struct page *page, unsigned int offset, unsigned int count,
165 int how)
166{
167 unsigned int wsize = NFS_SERVER(inode)->wsize;
168 int result, written = 0;
169 struct nfs_write_data *wdata;
170
171 wdata = nfs_writedata_alloc();
172 if (!wdata)
173 return -ENOMEM;
174
175 wdata->flags = how;
176 wdata->cred = ctx->cred;
177 wdata->inode = inode;
178 wdata->args.fh = NFS_FH(inode);
179 wdata->args.context = ctx;
180 wdata->args.pages = &page;
181 wdata->args.stable = NFS_FILE_SYNC;
182 wdata->args.pgbase = offset;
183 wdata->args.count = wsize;
184 wdata->res.fattr = &wdata->fattr;
185 wdata->res.verf = &wdata->verf;
186
187 dprintk("NFS: nfs_writepage_sync(%s/%Ld %d@%Ld)\n",
188 inode->i_sb->s_id,
189 (long long)NFS_FILEID(inode),
190 count, (long long)(page_offset(page) + offset));
191
192 nfs_begin_data_update(inode);
193 do {
194 if (count < wsize)
195 wdata->args.count = count;
196 wdata->args.offset = page_offset(page) + wdata->args.pgbase;
197
198 result = NFS_PROTO(inode)->write(wdata);
199
200 if (result < 0) {
201 /* Must mark the page invalid after I/O error */
202 ClearPageUptodate(page);
203 goto io_error;
204 }
205 if (result < wdata->args.count)
206 printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n",
207 wdata->args.count, result);
208
209 wdata->args.offset += result;
210 wdata->args.pgbase += result;
211 written += result;
212 count -= result;
213 } while (count);
214 /* Update file length */
215 nfs_grow_file(page, offset, written);
216 /* Set the PG_uptodate flag? */
217 nfs_mark_uptodate(page, offset, written);
218
219 if (PageError(page))
220 ClearPageError(page);
221
222io_error:
223 nfs_end_data_update_defer(inode);
224 nfs_writedata_free(wdata);
225 return written ? written : result;
226}
227
228static int nfs_writepage_async(struct nfs_open_context *ctx,
229 struct inode *inode, struct page *page,
230 unsigned int offset, unsigned int count)
231{
232 struct nfs_page *req;
233 int status;
234
235 req = nfs_update_request(ctx, inode, page, offset, count);
236 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
237 if (status < 0)
238 goto out;
239 /* Update file length */
240 nfs_grow_file(page, offset, count);
241 /* Set the PG_uptodate flag? */
242 nfs_mark_uptodate(page, offset, count);
243 nfs_unlock_request(req);
244 out:
245 return status;
246}
247
248static int wb_priority(struct writeback_control *wbc)
249{
250 if (wbc->for_reclaim)
251 return FLUSH_HIGHPRI;
252 if (wbc->for_kupdate)
253 return FLUSH_LOWPRI;
254 return 0;
255}
256
257/*
258 * Write an mmapped page to the server.
259 */
260int nfs_writepage(struct page *page, struct writeback_control *wbc)
261{
262 struct nfs_open_context *ctx;
263 struct inode *inode = page->mapping->host;
264 unsigned long end_index;
265 unsigned offset = PAGE_CACHE_SIZE;
266 loff_t i_size = i_size_read(inode);
267 int inode_referenced = 0;
268 int priority = wb_priority(wbc);
269 int err;
270
271 /*
272 * Note: We need to ensure that we have a reference to the inode
273 * if we are to do asynchronous writes. If not, waiting
274 * in nfs_wait_on_request() may deadlock with clear_inode().
275 *
276 * If igrab() fails here, then it is in any case safe to
277 * call nfs_wb_page(), since there will be no pending writes.
278 */
279 if (igrab(inode) != 0)
280 inode_referenced = 1;
281 end_index = i_size >> PAGE_CACHE_SHIFT;
282
283 /* Ensure we've flushed out any previous writes */
284 nfs_wb_page_priority(inode, page, priority);
285
286 /* easy case */
287 if (page->index < end_index)
288 goto do_it;
289 /* things got complicated... */
290 offset = i_size & (PAGE_CACHE_SIZE-1);
291
292 /* OK, are we completely out? */
293 err = 0; /* potential race with truncate - ignore */
294 if (page->index >= end_index+1 || !offset)
295 goto out;
296do_it:
297 ctx = nfs_find_open_context(inode, FMODE_WRITE);
298 if (ctx == NULL) {
299 err = -EBADF;
300 goto out;
301 }
302 lock_kernel();
303 if (!IS_SYNC(inode) && inode_referenced) {
304 err = nfs_writepage_async(ctx, inode, page, 0, offset);
305 if (err >= 0) {
306 err = 0;
307 if (wbc->for_reclaim)
308 nfs_flush_inode(inode, 0, 0, FLUSH_STABLE);
309 }
310 } else {
311 err = nfs_writepage_sync(ctx, inode, page, 0,
312 offset, priority);
313 if (err >= 0) {
314 if (err != offset)
315 redirty_page_for_writepage(wbc, page);
316 err = 0;
317 }
318 }
319 unlock_kernel();
320 put_nfs_open_context(ctx);
321out:
322 unlock_page(page);
323 if (inode_referenced)
324 iput(inode);
325 return err;
326}
327
328/*
329 * Note: causes nfs_update_request() to block on the assumption
330 * that the writeback is generated due to memory pressure.
331 */
332int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
333{
334 struct backing_dev_info *bdi = mapping->backing_dev_info;
335 struct inode *inode = mapping->host;
336 int err;
337
338 err = generic_writepages(mapping, wbc);
339 if (err)
340 return err;
341 while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) {
342 if (wbc->nonblocking)
343 return 0;
344 nfs_wait_on_write_congestion(mapping, 0);
345 }
346 err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
347 if (err < 0)
348 goto out;
349 wbc->nr_to_write -= err;
350 if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
351 err = nfs_wait_on_requests(inode, 0, 0);
352 if (err < 0)
353 goto out;
354 }
355 err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc));
356 if (err > 0) {
357 wbc->nr_to_write -= err;
358 err = 0;
359 }
360out:
361 clear_bit(BDI_write_congested, &bdi->state);
362 wake_up_all(&nfs_write_congestion);
363 return err;
364}
365
366/*
367 * Insert a write request into an inode
368 */
369static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
370{
371 struct nfs_inode *nfsi = NFS_I(inode);
372 int error;
373
374 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
375 BUG_ON(error == -EEXIST);
376 if (error)
377 return error;
378 if (!nfsi->npages) {
379 igrab(inode);
380 nfs_begin_data_update(inode);
381 if (nfs_have_delegation(inode, FMODE_WRITE))
382 nfsi->change_attr++;
383 }
384 nfsi->npages++;
385 atomic_inc(&req->wb_count);
386 return 0;
387}
388
389/*
390 * Insert a write request into an inode
391 */
392static void nfs_inode_remove_request(struct nfs_page *req)
393{
394 struct inode *inode = req->wb_context->dentry->d_inode;
395 struct nfs_inode *nfsi = NFS_I(inode);
396
397 BUG_ON (!NFS_WBACK_BUSY(req));
398
399 spin_lock(&nfsi->req_lock);
400 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
401 nfsi->npages--;
402 if (!nfsi->npages) {
403 spin_unlock(&nfsi->req_lock);
404 nfs_end_data_update_defer(inode);
405 iput(inode);
406 } else
407 spin_unlock(&nfsi->req_lock);
408 nfs_clear_request(req);
409 nfs_release_request(req);
410}
411
412/*
413 * Find a request
414 */
415static inline struct nfs_page *
416_nfs_find_request(struct inode *inode, unsigned long index)
417{
418 struct nfs_inode *nfsi = NFS_I(inode);
419 struct nfs_page *req;
420
421 req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index);
422 if (req)
423 atomic_inc(&req->wb_count);
424 return req;
425}
426
427static struct nfs_page *
428nfs_find_request(struct inode *inode, unsigned long index)
429{
430 struct nfs_page *req;
431 struct nfs_inode *nfsi = NFS_I(inode);
432
433 spin_lock(&nfsi->req_lock);
434 req = _nfs_find_request(inode, index);
435 spin_unlock(&nfsi->req_lock);
436 return req;
437}
438
439/*
440 * Add a request to the inode's dirty list.
441 */
442static void
443nfs_mark_request_dirty(struct nfs_page *req)
444{
445 struct inode *inode = req->wb_context->dentry->d_inode;
446 struct nfs_inode *nfsi = NFS_I(inode);
447
448 spin_lock(&nfsi->req_lock);
449 nfs_list_add_request(req, &nfsi->dirty);
450 nfsi->ndirty++;
451 spin_unlock(&nfsi->req_lock);
452 inc_page_state(nr_dirty);
453 mark_inode_dirty(inode);
454}
455
456/*
457 * Check if a request is dirty
458 */
459static inline int
460nfs_dirty_request(struct nfs_page *req)
461{
462 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
463 return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty;
464}
465
466#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
467/*
468 * Add a request to the inode's commit list.
469 */
470static void
471nfs_mark_request_commit(struct nfs_page *req)
472{
473 struct inode *inode = req->wb_context->dentry->d_inode;
474 struct nfs_inode *nfsi = NFS_I(inode);
475
476 spin_lock(&nfsi->req_lock);
477 nfs_list_add_request(req, &nfsi->commit);
478 nfsi->ncommit++;
479 spin_unlock(&nfsi->req_lock);
480 inc_page_state(nr_unstable);
481 mark_inode_dirty(inode);
482}
483#endif
484
485/*
486 * Wait for a request to complete.
487 *
488 * Interruptible by signals only if mounted with intr flag.
489 */
490static int
491nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
492{
493 struct nfs_inode *nfsi = NFS_I(inode);
494 struct nfs_page *req;
495 unsigned long idx_end, next;
496 unsigned int res = 0;
497 int error;
498
499 if (npages == 0)
500 idx_end = ~0;
501 else
502 idx_end = idx_start + npages - 1;
503
504 spin_lock(&nfsi->req_lock);
505 next = idx_start;
506 while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) {
507 if (req->wb_index > idx_end)
508 break;
509
510 next = req->wb_index + 1;
511 if (!NFS_WBACK_BUSY(req))
512 continue;
513
514 atomic_inc(&req->wb_count);
515 spin_unlock(&nfsi->req_lock);
516 error = nfs_wait_on_request(req);
517 nfs_release_request(req);
518 if (error < 0)
519 return error;
520 spin_lock(&nfsi->req_lock);
521 res++;
522 }
523 spin_unlock(&nfsi->req_lock);
524 return res;
525}
526
527/*
528 * nfs_scan_dirty - Scan an inode for dirty requests
529 * @inode: NFS inode to scan
530 * @dst: destination list
531 * @idx_start: lower bound of page->index to scan.
532 * @npages: idx_start + npages sets the upper bound to scan.
533 *
534 * Moves requests from the inode's dirty page list.
535 * The requests are *not* checked to ensure that they form a contiguous set.
536 */
537static int
538nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
539{
540 struct nfs_inode *nfsi = NFS_I(inode);
541 int res;
542 res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages);
543 nfsi->ndirty -= res;
544 sub_page_state(nr_dirty,res);
545 if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
546 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
547 return res;
548}
549
550#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
551/*
552 * nfs_scan_commit - Scan an inode for commit requests
553 * @inode: NFS inode to scan
554 * @dst: destination list
555 * @idx_start: lower bound of page->index to scan.
556 * @npages: idx_start + npages sets the upper bound to scan.
557 *
558 * Moves requests from the inode's 'commit' request list.
559 * The requests are *not* checked to ensure that they form a contiguous set.
560 */
561static int
562nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
563{
564 struct nfs_inode *nfsi = NFS_I(inode);
565 int res;
566 res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
567 nfsi->ncommit -= res;
568 if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
569 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
570 return res;
571}
572#endif
573
574static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
575{
576 struct backing_dev_info *bdi = mapping->backing_dev_info;
577 DEFINE_WAIT(wait);
578 int ret = 0;
579
580 might_sleep();
581
582 if (!bdi_write_congested(bdi))
583 return 0;
584 if (intr) {
585 struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
586 sigset_t oldset;
587
588 rpc_clnt_sigmask(clnt, &oldset);
589 prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE);
590 if (bdi_write_congested(bdi)) {
591 if (signalled())
592 ret = -ERESTARTSYS;
593 else
594 schedule();
595 }
596 rpc_clnt_sigunmask(clnt, &oldset);
597 } else {
598 prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE);
599 if (bdi_write_congested(bdi))
600 schedule();
601 }
602 finish_wait(&nfs_write_congestion, &wait);
603 return ret;
604}
605
606
607/*
608 * Try to update any existing write request, or create one if there is none.
609 * In order to match, the request's credentials must match those of
610 * the calling process.
611 *
612 * Note: Should always be called with the Page Lock held!
613 */
614static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
615 struct inode *inode, struct page *page,
616 unsigned int offset, unsigned int bytes)
617{
618 struct nfs_server *server = NFS_SERVER(inode);
619 struct nfs_inode *nfsi = NFS_I(inode);
620 struct nfs_page *req, *new = NULL;
621 unsigned long rqend, end;
622
623 end = offset + bytes;
624
625 if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR))
626 return ERR_PTR(-ERESTARTSYS);
627 for (;;) {
628 /* Loop over all inode entries and see if we find
629 * A request for the page we wish to update
630 */
631 spin_lock(&nfsi->req_lock);
632 req = _nfs_find_request(inode, page->index);
633 if (req) {
634 if (!nfs_lock_request_dontget(req)) {
635 int error;
636 spin_unlock(&nfsi->req_lock);
637 error = nfs_wait_on_request(req);
638 nfs_release_request(req);
639 if (error < 0)
640 return ERR_PTR(error);
641 continue;
642 }
643 spin_unlock(&nfsi->req_lock);
644 if (new)
645 nfs_release_request(new);
646 break;
647 }
648
649 if (new) {
650 int error;
651 nfs_lock_request_dontget(new);
652 error = nfs_inode_add_request(inode, new);
653 if (error) {
654 spin_unlock(&nfsi->req_lock);
655 nfs_unlock_request(new);
656 return ERR_PTR(error);
657 }
658 spin_unlock(&nfsi->req_lock);
659 nfs_mark_request_dirty(new);
660 return new;
661 }
662 spin_unlock(&nfsi->req_lock);
663
664 new = nfs_create_request(ctx, inode, page, offset, bytes);
665 if (IS_ERR(new))
666 return new;
667 }
668
669 /* We have a request for our page.
670 * If the creds don't match, or the
671 * page addresses don't match,
672 * tell the caller to wait on the conflicting
673 * request.
674 */
675 rqend = req->wb_offset + req->wb_bytes;
676 if (req->wb_context != ctx
677 || req->wb_page != page
678 || !nfs_dirty_request(req)
679 || offset > rqend || end < req->wb_offset) {
680 nfs_unlock_request(req);
681 return ERR_PTR(-EBUSY);
682 }
683
684 /* Okay, the request matches. Update the region */
685 if (offset < req->wb_offset) {
686 req->wb_offset = offset;
687 req->wb_pgbase = offset;
688 req->wb_bytes = rqend - req->wb_offset;
689 }
690
691 if (end > rqend)
692 req->wb_bytes = end - req->wb_offset;
693
694 return req;
695}
696
697int nfs_flush_incompatible(struct file *file, struct page *page)
698{
699 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
700 struct inode *inode = page->mapping->host;
701 struct nfs_page *req;
702 int status = 0;
703 /*
704 * Look for a request corresponding to this page. If there
705 * is one, and it belongs to another file, we flush it out
706 * before we try to copy anything into the page. Do this
707 * due to the lack of an ACCESS-type call in NFSv2.
708 * Also do the same if we find a request from an existing
709 * dropped page.
710 */
711 req = nfs_find_request(inode, page->index);
712 if (req) {
713 if (req->wb_page != page || ctx != req->wb_context)
714 status = nfs_wb_page(inode, page);
715 nfs_release_request(req);
716 }
717 return (status < 0) ? status : 0;
718}
719
720/*
721 * Update and possibly write a cached page of an NFS file.
722 *
723 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
724 * things with a page scheduled for an RPC call (e.g. invalidate it).
725 */
726int nfs_updatepage(struct file *file, struct page *page,
727 unsigned int offset, unsigned int count)
728{
729 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
730 struct dentry *dentry = file->f_dentry;
731 struct inode *inode = page->mapping->host;
732 struct nfs_page *req;
733 int status = 0;
734
735 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
736 dentry->d_parent->d_name.name, dentry->d_name.name,
737 count, (long long)(page_offset(page) +offset));
738
739 if (IS_SYNC(inode)) {
740 status = nfs_writepage_sync(ctx, inode, page, offset, count, 0);
741 if (status > 0) {
742 if (offset == 0 && status == PAGE_CACHE_SIZE)
743 SetPageUptodate(page);
744 return 0;
745 }
746 return status;
747 }
748
749 /* If we're not using byte range locks, and we know the page
750 * is entirely in cache, it may be more efficient to avoid
751 * fragmenting write requests.
752 */
753 if (PageUptodate(page) && inode->i_flock == NULL) {
754 loff_t end_offs = i_size_read(inode) - 1;
755 unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
756
757 count += offset;
758 offset = 0;
759 if (unlikely(end_offs < 0)) {
760 /* Do nothing */
761 } else if (page->index == end_index) {
762 unsigned int pglen;
763 pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
764 if (count < pglen)
765 count = pglen;
766 } else if (page->index < end_index)
767 count = PAGE_CACHE_SIZE;
768 }
769
770 /*
771 * Try to find an NFS request corresponding to this page
772 * and update it.
773 * If the existing request cannot be updated, we must flush
774 * it out now.
775 */
776 do {
777 req = nfs_update_request(ctx, inode, page, offset, count);
778 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
779 if (status != -EBUSY)
780 break;
781 /* Request could not be updated. Flush it out and try again */
782 status = nfs_wb_page(inode, page);
783 } while (status >= 0);
784 if (status < 0)
785 goto done;
786
787 status = 0;
788
789 /* Update file length */
790 nfs_grow_file(page, offset, count);
791 /* Set the PG_uptodate flag? */
792 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
793 nfs_unlock_request(req);
794done:
795 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
796 status, (long long)i_size_read(inode));
797 if (status < 0)
798 ClearPageUptodate(page);
799 return status;
800}
801
802static void nfs_writepage_release(struct nfs_page *req)
803{
804 end_page_writeback(req->wb_page);
805
806#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
807 if (!PageError(req->wb_page)) {
808 if (NFS_NEED_RESCHED(req)) {
809 nfs_mark_request_dirty(req);
810 goto out;
811 } else if (NFS_NEED_COMMIT(req)) {
812 nfs_mark_request_commit(req);
813 goto out;
814 }
815 }
816 nfs_inode_remove_request(req);
817
818out:
819 nfs_clear_commit(req);
820 nfs_clear_reschedule(req);
821#else
822 nfs_inode_remove_request(req);
823#endif
824 nfs_unlock_request(req);
825}
826
827static inline int flush_task_priority(int how)
828{
829 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
830 case FLUSH_HIGHPRI:
831 return RPC_PRIORITY_HIGH;
832 case FLUSH_LOWPRI:
833 return RPC_PRIORITY_LOW;
834 }
835 return RPC_PRIORITY_NORMAL;
836}
837
838/*
839 * Set up the argument/result storage required for the RPC call.
840 */
841static void nfs_write_rpcsetup(struct nfs_page *req,
842 struct nfs_write_data *data,
843 unsigned int count, unsigned int offset,
844 int how)
845{
846 struct rpc_task *task = &data->task;
847 struct inode *inode;
848
849 /* Set up the RPC argument and reply structs
850 * NB: take care not to mess about with data->commit et al. */
851
852 data->req = req;
853 data->inode = inode = req->wb_context->dentry->d_inode;
854 data->cred = req->wb_context->cred;
855
856 data->args.fh = NFS_FH(inode);
857 data->args.offset = req_offset(req) + offset;
858 data->args.pgbase = req->wb_pgbase + offset;
859 data->args.pages = data->pagevec;
860 data->args.count = count;
861 data->args.context = req->wb_context;
862
863 data->res.fattr = &data->fattr;
864 data->res.count = count;
865 data->res.verf = &data->verf;
866
867 NFS_PROTO(inode)->write_setup(data, how);
868
869 data->task.tk_priority = flush_task_priority(how);
870 data->task.tk_cookie = (unsigned long)inode;
871 data->task.tk_calldata = data;
872 /* Release requests */
873 data->task.tk_release = nfs_writedata_release;
874
875 dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
876 task->tk_pid,
877 inode->i_sb->s_id,
878 (long long)NFS_FILEID(inode),
879 count,
880 (unsigned long long)data->args.offset);
881}
882
883static void nfs_execute_write(struct nfs_write_data *data)
884{
885 struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
886 sigset_t oldset;
887
888 rpc_clnt_sigmask(clnt, &oldset);
889 lock_kernel();
890 rpc_execute(&data->task);
891 unlock_kernel();
892 rpc_clnt_sigunmask(clnt, &oldset);
893}
894
895/*
896 * Generate multiple small requests to write out a single
897 * contiguous dirty area on one page.
898 */
899static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
900{
901 struct nfs_page *req = nfs_list_entry(head->next);
902 struct page *page = req->wb_page;
903 struct nfs_write_data *data;
904 unsigned int wsize = NFS_SERVER(inode)->wsize;
905 unsigned int nbytes, offset;
906 int requests = 0;
907 LIST_HEAD(list);
908
909 nfs_list_remove_request(req);
910
911 nbytes = req->wb_bytes;
912 for (;;) {
913 data = nfs_writedata_alloc();
914 if (!data)
915 goto out_bad;
916 list_add(&data->pages, &list);
917 requests++;
918 if (nbytes <= wsize)
919 break;
920 nbytes -= wsize;
921 }
922 atomic_set(&req->wb_complete, requests);
923
924 ClearPageError(page);
925 SetPageWriteback(page);
926 offset = 0;
927 nbytes = req->wb_bytes;
928 do {
929 data = list_entry(list.next, struct nfs_write_data, pages);
930 list_del_init(&data->pages);
931
932 data->pagevec[0] = page;
933 data->complete = nfs_writeback_done_partial;
934
935 if (nbytes > wsize) {
936 nfs_write_rpcsetup(req, data, wsize, offset, how);
937 offset += wsize;
938 nbytes -= wsize;
939 } else {
940 nfs_write_rpcsetup(req, data, nbytes, offset, how);
941 nbytes = 0;
942 }
943 nfs_execute_write(data);
944 } while (nbytes != 0);
945
946 return 0;
947
948out_bad:
949 while (!list_empty(&list)) {
950 data = list_entry(list.next, struct nfs_write_data, pages);
951 list_del(&data->pages);
952 nfs_writedata_free(data);
953 }
954 nfs_mark_request_dirty(req);
955 nfs_unlock_request(req);
956 return -ENOMEM;
957}
958
959/*
960 * Create an RPC task for the given write request and kick it.
961 * The page must have been locked by the caller.
962 *
963 * It may happen that the page we're passed is not marked dirty.
964 * This is the case if nfs_updatepage detects a conflicting request
965 * that has been written but not committed.
966 */
967static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
968{
969 struct nfs_page *req;
970 struct page **pages;
971 struct nfs_write_data *data;
972 unsigned int count;
973
974 if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
975 return nfs_flush_multi(head, inode, how);
976
977 data = nfs_writedata_alloc();
978 if (!data)
979 goto out_bad;
980
981 pages = data->pagevec;
982 count = 0;
983 while (!list_empty(head)) {
984 req = nfs_list_entry(head->next);
985 nfs_list_remove_request(req);
986 nfs_list_add_request(req, &data->pages);
987 ClearPageError(req->wb_page);
988 SetPageWriteback(req->wb_page);
989 *pages++ = req->wb_page;
990 count += req->wb_bytes;
991 }
992 req = nfs_list_entry(data->pages.next);
993
994 data->complete = nfs_writeback_done_full;
995 /* Set up the argument struct */
996 nfs_write_rpcsetup(req, data, count, 0, how);
997
998 nfs_execute_write(data);
999 return 0;
1000 out_bad:
1001 while (!list_empty(head)) {
1002 struct nfs_page *req = nfs_list_entry(head->next);
1003 nfs_list_remove_request(req);
1004 nfs_mark_request_dirty(req);
1005 nfs_unlock_request(req);
1006 }
1007 return -ENOMEM;
1008}
1009
1010static int
1011nfs_flush_list(struct list_head *head, int wpages, int how)
1012{
1013 LIST_HEAD(one_request);
1014 struct nfs_page *req;
1015 int error = 0;
1016 unsigned int pages = 0;
1017
1018 while (!list_empty(head)) {
1019 pages += nfs_coalesce_requests(head, &one_request, wpages);
1020 req = nfs_list_entry(one_request.next);
1021 error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
1022 if (error < 0)
1023 break;
1024 }
1025 if (error >= 0)
1026 return pages;
1027
1028 while (!list_empty(head)) {
1029 req = nfs_list_entry(head->next);
1030 nfs_list_remove_request(req);
1031 nfs_mark_request_dirty(req);
1032 nfs_unlock_request(req);
1033 }
1034 return error;
1035}
1036
1037/*
1038 * Handle a write reply that flushed part of a page.
1039 */
1040static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
1041{
1042 struct nfs_page *req = data->req;
1043 struct page *page = req->wb_page;
1044
1045 dprintk("NFS: write (%s/%Ld %d@%Ld)",
1046 req->wb_context->dentry->d_inode->i_sb->s_id,
1047 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1048 req->wb_bytes,
1049 (long long)req_offset(req));
1050
1051 if (status < 0) {
1052 ClearPageUptodate(page);
1053 SetPageError(page);
1054 req->wb_context->error = status;
1055 dprintk(", error = %d\n", status);
1056 } else {
1057#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1058 if (data->verf.committed < NFS_FILE_SYNC) {
1059 if (!NFS_NEED_COMMIT(req)) {
1060 nfs_defer_commit(req);
1061 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1062 dprintk(" defer commit\n");
1063 } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
1064 nfs_defer_reschedule(req);
1065 dprintk(" server reboot detected\n");
1066 }
1067 } else
1068#endif
1069 dprintk(" OK\n");
1070 }
1071
1072 if (atomic_dec_and_test(&req->wb_complete))
1073 nfs_writepage_release(req);
1074}
1075
1076/*
1077 * Handle a write reply that flushes a whole page.
1078 *
1079 * FIXME: There is an inherent race with invalidate_inode_pages and
1080 * writebacks since the page->count is kept > 1 for as long
1081 * as the page has a write request pending.
1082 */
1083static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
1084{
1085 struct nfs_page *req;
1086 struct page *page;
1087
1088 /* Update attributes as result of writeback. */
1089 while (!list_empty(&data->pages)) {
1090 req = nfs_list_entry(data->pages.next);
1091 nfs_list_remove_request(req);
1092 page = req->wb_page;
1093
1094 dprintk("NFS: write (%s/%Ld %d@%Ld)",
1095 req->wb_context->dentry->d_inode->i_sb->s_id,
1096 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1097 req->wb_bytes,
1098 (long long)req_offset(req));
1099
1100 if (status < 0) {
1101 ClearPageUptodate(page);
1102 SetPageError(page);
1103 req->wb_context->error = status;
1104 end_page_writeback(page);
1105 nfs_inode_remove_request(req);
1106 dprintk(", error = %d\n", status);
1107 goto next;
1108 }
1109 end_page_writeback(page);
1110
1111#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1112 if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
1113 nfs_inode_remove_request(req);
1114 dprintk(" OK\n");
1115 goto next;
1116 }
1117 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1118 nfs_mark_request_commit(req);
1119 dprintk(" marked for commit\n");
1120#else
1121 nfs_inode_remove_request(req);
1122#endif
1123 next:
1124 nfs_unlock_request(req);
1125 }
1126}
1127
1128/*
1129 * This function is called when the WRITE call is complete.
1130 */
1131void nfs_writeback_done(struct rpc_task *task)
1132{
1133 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
1134 struct nfs_writeargs *argp = &data->args;
1135 struct nfs_writeres *resp = &data->res;
1136
1137 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1138 task->tk_pid, task->tk_status);
1139
1140#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1141 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1142 /* We tried a write call, but the server did not
1143 * commit data to stable storage even though we
1144 * requested it.
1145 * Note: There is a known bug in Tru64 < 5.0 in which
1146 * the server reports NFS_DATA_SYNC, but performs
1147 * NFS_FILE_SYNC. We therefore implement this checking
1148 * as a dprintk() in order to avoid filling syslog.
1149 */
1150 static unsigned long complain;
1151
1152 if (time_before(complain, jiffies)) {
1153 dprintk("NFS: faulty NFS server %s:"
1154 " (committed = %d) != (stable = %d)\n",
1155 NFS_SERVER(data->inode)->hostname,
1156 resp->verf->committed, argp->stable);
1157 complain = jiffies + 300 * HZ;
1158 }
1159 }
1160#endif
1161 /* Is this a short write? */
1162 if (task->tk_status >= 0 && resp->count < argp->count) {
1163 static unsigned long complain;
1164
1165 /* Has the server at least made some progress? */
1166 if (resp->count != 0) {
1167 /* Was this an NFSv2 write or an NFSv3 stable write? */
1168 if (resp->verf->committed != NFS_UNSTABLE) {
1169 /* Resend from where the server left off */
1170 argp->offset += resp->count;
1171 argp->pgbase += resp->count;
1172 argp->count -= resp->count;
1173 } else {
1174 /* Resend as a stable write in order to avoid
1175 * headaches in the case of a server crash.
1176 */
1177 argp->stable = NFS_FILE_SYNC;
1178 }
1179 rpc_restart_call(task);
1180 return;
1181 }
1182 if (time_before(complain, jiffies)) {
1183 printk(KERN_WARNING
1184 "NFS: Server wrote zero bytes, expected %u.\n",
1185 argp->count);
1186 complain = jiffies + 300 * HZ;
1187 }
1188 /* Can't do anything about it except throw an error. */
1189 task->tk_status = -EIO;
1190 }
1191
1192 /*
1193 * Process the nfs_page list
1194 */
1195 data->complete(data, task->tk_status);
1196}
1197
1198
1199#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1200static void nfs_commit_release(struct rpc_task *task)
1201{
1202 struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
1203 nfs_commit_free(wdata);
1204}
1205
1206/*
1207 * Set up the argument/result storage required for the RPC call.
1208 */
1209static void nfs_commit_rpcsetup(struct list_head *head,
1210 struct nfs_write_data *data, int how)
1211{
1212 struct rpc_task *task = &data->task;
1213 struct nfs_page *first, *last;
1214 struct inode *inode;
1215 loff_t start, end, len;
1216
1217 /* Set up the RPC argument and reply structs
1218 * NB: take care not to mess about with data->commit et al. */
1219
1220 list_splice_init(head, &data->pages);
1221 first = nfs_list_entry(data->pages.next);
1222 last = nfs_list_entry(data->pages.prev);
1223 inode = first->wb_context->dentry->d_inode;
1224
1225 /*
1226 * Determine the offset range of requests in the COMMIT call.
1227 * We rely on the fact that data->pages is an ordered list...
1228 */
1229 start = req_offset(first);
1230 end = req_offset(last) + last->wb_bytes;
1231 len = end - start;
1232 /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
1233 if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1))
1234 len = 0;
1235
1236 data->inode = inode;
1237 data->cred = first->wb_context->cred;
1238
1239 data->args.fh = NFS_FH(data->inode);
1240 data->args.offset = start;
1241 data->args.count = len;
1242 data->res.count = len;
1243 data->res.fattr = &data->fattr;
1244 data->res.verf = &data->verf;
1245
1246 NFS_PROTO(inode)->commit_setup(data, how);
1247
1248 data->task.tk_priority = flush_task_priority(how);
1249 data->task.tk_cookie = (unsigned long)inode;
1250 data->task.tk_calldata = data;
1251 /* Release requests */
1252 data->task.tk_release = nfs_commit_release;
1253
1254 dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
1255}
1256
1257/*
1258 * Commit dirty pages
1259 */
1260static int
1261nfs_commit_list(struct list_head *head, int how)
1262{
1263 struct nfs_write_data *data;
1264 struct nfs_page *req;
1265
1266 data = nfs_commit_alloc();
1267
1268 if (!data)
1269 goto out_bad;
1270
1271 /* Set up the argument struct */
1272 nfs_commit_rpcsetup(head, data, how);
1273
1274 nfs_execute_write(data);
1275 return 0;
1276 out_bad:
1277 while (!list_empty(head)) {
1278 req = nfs_list_entry(head->next);
1279 nfs_list_remove_request(req);
1280 nfs_mark_request_commit(req);
1281 nfs_unlock_request(req);
1282 }
1283 return -ENOMEM;
1284}
1285
1286/*
1287 * COMMIT call returned
1288 */
1289void
1290nfs_commit_done(struct rpc_task *task)
1291{
1292 struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
1293 struct nfs_page *req;
1294 int res = 0;
1295
1296 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1297 task->tk_pid, task->tk_status);
1298
1299 while (!list_empty(&data->pages)) {
1300 req = nfs_list_entry(data->pages.next);
1301 nfs_list_remove_request(req);
1302
1303 dprintk("NFS: commit (%s/%Ld %d@%Ld)",
1304 req->wb_context->dentry->d_inode->i_sb->s_id,
1305 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1306 req->wb_bytes,
1307 (long long)req_offset(req));
1308 if (task->tk_status < 0) {
1309 req->wb_context->error = task->tk_status;
1310 nfs_inode_remove_request(req);
1311 dprintk(", error = %d\n", task->tk_status);
1312 goto next;
1313 }
1314
1315 /* Okay, COMMIT succeeded, apparently. Check the verifier
1316 * returned by the server against all stored verfs. */
1317 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1318 /* We have a match */
1319 nfs_inode_remove_request(req);
1320 dprintk(" OK\n");
1321 goto next;
1322 }
1323 /* We have a mismatch. Write the page again */
1324 dprintk(" mismatch\n");
1325 nfs_mark_request_dirty(req);
1326 next:
1327 nfs_unlock_request(req);
1328 res++;
1329 }
1330 sub_page_state(nr_unstable,res);
1331}
1332#endif
1333
1334static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
1335 unsigned int npages, int how)
1336{
1337 struct nfs_inode *nfsi = NFS_I(inode);
1338 LIST_HEAD(head);
1339 int res,
1340 error = 0;
1341
1342 spin_lock(&nfsi->req_lock);
1343 res = nfs_scan_dirty(inode, &head, idx_start, npages);
1344 spin_unlock(&nfsi->req_lock);
1345 if (res)
1346 error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
1347 if (error < 0)
1348 return error;
1349 return res;
1350}
1351
1352#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1353int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
1354 unsigned int npages, int how)
1355{
1356 struct nfs_inode *nfsi = NFS_I(inode);
1357 LIST_HEAD(head);
1358 int res,
1359 error = 0;
1360
1361 spin_lock(&nfsi->req_lock);
1362 res = nfs_scan_commit(inode, &head, idx_start, npages);
1363 if (res) {
1364 res += nfs_scan_commit(inode, &head, 0, 0);
1365 spin_unlock(&nfsi->req_lock);
1366 error = nfs_commit_list(&head, how);
1367 } else
1368 spin_unlock(&nfsi->req_lock);
1369 if (error < 0)
1370 return error;
1371 return res;
1372}
1373#endif
1374
1375int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
1376 unsigned int npages, int how)
1377{
1378 int error,
1379 wait;
1380
1381 wait = how & FLUSH_WAIT;
1382 how &= ~FLUSH_WAIT;
1383
1384 do {
1385 error = 0;
1386 if (wait)
1387 error = nfs_wait_on_requests(inode, idx_start, npages);
1388 if (error == 0)
1389 error = nfs_flush_inode(inode, idx_start, npages, how);
1390#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1391 if (error == 0)
1392 error = nfs_commit_inode(inode, idx_start, npages, how);
1393#endif
1394 } while (error > 0);
1395 return error;
1396}
1397
1398int nfs_init_writepagecache(void)
1399{
1400 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1401 sizeof(struct nfs_write_data),
1402 0, SLAB_HWCACHE_ALIGN,
1403 NULL, NULL);
1404 if (nfs_wdata_cachep == NULL)
1405 return -ENOMEM;
1406
1407 nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE,
1408 mempool_alloc_slab,
1409 mempool_free_slab,
1410 nfs_wdata_cachep);
1411 if (nfs_wdata_mempool == NULL)
1412 return -ENOMEM;
1413
1414 nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT,
1415 mempool_alloc_slab,
1416 mempool_free_slab,
1417 nfs_wdata_cachep);
1418 if (nfs_commit_mempool == NULL)
1419 return -ENOMEM;
1420
1421 return 0;
1422}
1423
1424void nfs_destroy_writepagecache(void)
1425{
1426 mempool_destroy(nfs_commit_mempool);
1427 mempool_destroy(nfs_wdata_mempool);
1428 if (kmem_cache_destroy(nfs_wdata_cachep))
1429 printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
1430}
1431