aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/debugfs/inode.c63
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/lockd/host.c39
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c6
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/client.c28
-rw-r--r--fs/nfs/delegation.c186
-rw-r--r--fs/nfs/delegation.h26
-rw-r--r--fs/nfs/dir.c16
-rw-r--r--fs/nfs/direct.c34
-rw-r--r--fs/nfs/inode.c73
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/mount_clnt.c169
-rw-r--r--fs/nfs/nfs2xdr.c6
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c8
-rw-r--r--fs/nfs/nfs4_fs.h40
-rw-r--r--fs/nfs/nfs4proc.c760
-rw-r--r--fs/nfs/nfs4state.c310
-rw-r--r--fs/nfs/nfs4xdr.c126
-rw-r--r--fs/nfs/nfsroot.c5
-rw-r--r--fs/nfs/pagelist.c60
-rw-r--r--fs/nfs/read.c40
-rw-r--r--fs/nfs/super.c1189
-rw-r--r--fs/nfs/write.c149
-rw-r--r--fs/nfsd/nfs4callback.c18
-rw-r--r--fs/nfsd/nfs4state.c1
-rw-r--r--fs/nfsd/vfs.c1
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/partitions/check.c1
-rw-r--r--fs/splice.c43
-rw-r--r--fs/sysfs/bin.c195
-rw-r--r--fs/sysfs/dir.c1297
-rw-r--r--fs/sysfs/file.c379
-rw-r--r--fs/sysfs/group.c55
-rw-r--r--fs/sysfs/inode.c221
-rw-r--r--fs/sysfs/mount.c36
-rw-r--r--fs/sysfs/symlink.c150
-rw-r--r--fs/sysfs/sysfs.h169
41 files changed, 3830 insertions, 2088 deletions
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9ddf5ed62162..898a86dde8f5 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -470,7 +470,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
470 470
471 ret = -ENOENT; 471 ret = -ENOENT;
472 if (!IS_DEADDIR(host_inode)) { 472 if (!IS_DEADDIR(host_inode)) {
473 ret = host_file->f_op->readdir(host_file, filldir, dirent); 473 ret = host_file->f_op->readdir(host_file, dirent, filldir);
474 file_accessed(host_file); 474 file_accessed(host_file);
475 } 475 }
476 } 476 }
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index ec8896b264de..1d533a2ec3a6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -368,6 +368,69 @@ void debugfs_remove(struct dentry *dentry)
368} 368}
369EXPORT_SYMBOL_GPL(debugfs_remove); 369EXPORT_SYMBOL_GPL(debugfs_remove);
370 370
371/**
372 * debugfs_rename - rename a file/directory in the debugfs filesystem
373 * @old_dir: a pointer to the parent dentry for the renamed object. This
374 * should be a directory dentry.
375 * @old_dentry: dentry of an object to be renamed.
376 * @new_dir: a pointer to the parent dentry where the object should be
377 * moved. This should be a directory dentry.
378 * @new_name: a pointer to a string containing the target name.
379 *
380 * This function renames a file/directory in debugfs. The target must not
381 * exist for rename to succeed.
382 *
383 * This function will return a pointer to old_dentry (which is updated to
384 * reflect renaming) if it succeeds. If an error occurs, %NULL will be
385 * returned.
386 *
387 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
388 * returned.
389 */
390struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
391 struct dentry *new_dir, const char *new_name)
392{
393 int error;
394 struct dentry *dentry = NULL, *trap;
395 const char *old_name;
396
397 trap = lock_rename(new_dir, old_dir);
398 /* Source or destination directories don't exist? */
399 if (!old_dir->d_inode || !new_dir->d_inode)
400 goto exit;
401 /* Source does not exist, cyclic rename, or mountpoint? */
402 if (!old_dentry->d_inode || old_dentry == trap ||
403 d_mountpoint(old_dentry))
404 goto exit;
405 dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
406 /* Lookup failed, cyclic rename or target exists? */
407 if (IS_ERR(dentry) || dentry == trap || dentry->d_inode)
408 goto exit;
409
410 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
411
412 error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode,
413 dentry);
414 if (error) {
415 fsnotify_oldname_free(old_name);
416 goto exit;
417 }
418 d_move(old_dentry, dentry);
419 fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
420 old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode),
421 NULL, old_dentry->d_inode);
422 fsnotify_oldname_free(old_name);
423 unlock_rename(new_dir, old_dir);
424 dput(dentry);
425 return old_dentry;
426exit:
427 if (dentry && !IS_ERR(dentry))
428 dput(dentry);
429 unlock_rename(new_dir, old_dir);
430 return NULL;
431}
432EXPORT_SYMBOL_GPL(debugfs_rename);
433
371static decl_subsys(debug, NULL, NULL); 434static decl_subsys(debug, NULL, NULL);
372 435
373static int __init debugfs_init(void) 436static int __init debugfs_init(void)
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 606128f5c927..02ca6f1e55d7 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -840,8 +840,6 @@ static int __init ecryptfs_init(void)
840 goto out; 840 goto out;
841 } 841 }
842 kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); 842 kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
843 sysfs_attr_version.attr.owner = THIS_MODULE;
844 sysfs_attr_version_str.attr.owner = THIS_MODULE;
845 rc = do_sysfs_registration(); 843 rc = do_sysfs_registration();
846 if (rc) { 844 if (rc) {
847 printk(KERN_ERR "sysfs registration failed\n"); 845 printk(KERN_ERR "sysfs registration failed\n");
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 96070bff93fc..572601e98dcd 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -44,9 +44,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
44 */ 44 */
45static struct nlm_host * 45static struct nlm_host *
46nlm_lookup_host(int server, const struct sockaddr_in *sin, 46nlm_lookup_host(int server, const struct sockaddr_in *sin,
47 int proto, int version, 47 int proto, int version, const char *hostname,
48 const char *hostname, 48 int hostname_len, const struct sockaddr_in *ssin)
49 int hostname_len)
50{ 49{
51 struct hlist_head *chain; 50 struct hlist_head *chain;
52 struct hlist_node *pos; 51 struct hlist_node *pos;
@@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
54 struct nsm_handle *nsm = NULL; 53 struct nsm_handle *nsm = NULL;
55 int hash; 54 int hash;
56 55
57 dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n", 56 dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
57 ", p=%d, v=%d, my role=%s, name=%.*s)\n",
58 NIPQUAD(ssin->sin_addr.s_addr),
58 NIPQUAD(sin->sin_addr.s_addr), proto, version, 59 NIPQUAD(sin->sin_addr.s_addr), proto, version,
59 server? "server" : "client", 60 server? "server" : "client",
60 hostname_len, 61 hostname_len,
@@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
91 continue; 92 continue;
92 if (host->h_server != server) 93 if (host->h_server != server)
93 continue; 94 continue;
95 if (!nlm_cmp_addr(&host->h_saddr, ssin))
96 continue;
94 97
95 /* Move to head of hash chain. */ 98 /* Move to head of hash chain. */
96 hlist_del(&host->h_hash); 99 hlist_del(&host->h_hash);
@@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
118 host->h_name = nsm->sm_name; 121 host->h_name = nsm->sm_name;
119 host->h_addr = *sin; 122 host->h_addr = *sin;
120 host->h_addr.sin_port = 0; /* ouch! */ 123 host->h_addr.sin_port = 0; /* ouch! */
124 host->h_saddr = *ssin;
121 host->h_version = version; 125 host->h_version = version;
122 host->h_proto = proto; 126 host->h_proto = proto;
123 host->h_rpcclnt = NULL; 127 host->h_rpcclnt = NULL;
@@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host)
161 */ 165 */
162 nsm_unmonitor(host); 166 nsm_unmonitor(host);
163 167
164 if ((clnt = host->h_rpcclnt) != NULL) { 168 clnt = host->h_rpcclnt;
165 if (atomic_read(&clnt->cl_users)) { 169 if (clnt != NULL)
166 printk(KERN_WARNING 170 rpc_shutdown_client(clnt);
167 "lockd: active RPC handle\n");
168 clnt->cl_dead = 1;
169 } else {
170 rpc_destroy_client(host->h_rpcclnt);
171 }
172 }
173 kfree(host); 171 kfree(host);
174} 172}
175 173
@@ -180,8 +178,10 @@ struct nlm_host *
180nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, 178nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
181 const char *hostname, int hostname_len) 179 const char *hostname, int hostname_len)
182{ 180{
181 struct sockaddr_in ssin = {0};
182
183 return nlm_lookup_host(0, sin, proto, version, 183 return nlm_lookup_host(0, sin, proto, version,
184 hostname, hostname_len); 184 hostname, hostname_len, &ssin);
185} 185}
186 186
187/* 187/*
@@ -191,9 +191,12 @@ struct nlm_host *
191nlmsvc_lookup_host(struct svc_rqst *rqstp, 191nlmsvc_lookup_host(struct svc_rqst *rqstp,
192 const char *hostname, int hostname_len) 192 const char *hostname, int hostname_len)
193{ 193{
194 struct sockaddr_in ssin = {0};
195
196 ssin.sin_addr = rqstp->rq_daddr.addr;
194 return nlm_lookup_host(1, svc_addr_in(rqstp), 197 return nlm_lookup_host(1, svc_addr_in(rqstp),
195 rqstp->rq_prot, rqstp->rq_vers, 198 rqstp->rq_prot, rqstp->rq_vers,
196 hostname, hostname_len); 199 hostname, hostname_len, &ssin);
197} 200}
198 201
199/* 202/*
@@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host)
204{ 207{
205 struct rpc_clnt *clnt; 208 struct rpc_clnt *clnt;
206 209
207 dprintk("lockd: nlm_bind_host(%08x)\n", 210 dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n",
208 (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); 211 NIPQUAD(host->h_saddr.sin_addr),
212 NIPQUAD(host->h_addr.sin_addr));
209 213
210 /* Lock host handle */ 214 /* Lock host handle */
211 mutex_lock(&host->h_mutex); 215 mutex_lock(&host->h_mutex);
@@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host)
232 .protocol = host->h_proto, 236 .protocol = host->h_proto,
233 .address = (struct sockaddr *)&host->h_addr, 237 .address = (struct sockaddr *)&host->h_addr,
234 .addrsize = sizeof(host->h_addr), 238 .addrsize = sizeof(host->h_addr),
239 .saddress = (struct sockaddr *)&host->h_saddr,
235 .timeout = &timeparms, 240 .timeout = &timeparms,
236 .servername = host->h_name, 241 .servername = host->h_name,
237 .program = &nlm_program, 242 .program = &nlm_program,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2102e2d0134d..3353ed8421a7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
61 status); 61 status);
62 else 62 else
63 status = 0; 63 status = 0;
64 rpc_shutdown_client(clnt);
64 out: 65 out:
65 return status; 66 return status;
66} 67}
@@ -138,7 +139,6 @@ nsm_create(void)
138 .program = &nsm_program, 139 .program = &nsm_program,
139 .version = SM_VERSION, 140 .version = SM_VERSION,
140 .authflavor = RPC_AUTH_NULL, 141 .authflavor = RPC_AUTH_NULL,
141 .flags = (RPC_CLNT_CREATE_ONESHOT),
142 }; 142 };
143 143
144 return rpc_create(&args); 144 return rpc_create(&args);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 126b1bf02c0e..26809325469c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp)
123 /* Process request with signals blocked, but allow SIGKILL. */ 123 /* Process request with signals blocked, but allow SIGKILL. */
124 allow_signal(SIGKILL); 124 allow_signal(SIGKILL);
125 125
126 /* kick rpciod */
127 rpciod_up();
128
129 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); 126 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
130 127
131 if (!nlm_timeout) 128 if (!nlm_timeout)
@@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp)
202 /* Exit the RPC thread */ 199 /* Exit the RPC thread */
203 svc_exit_thread(rqstp); 200 svc_exit_thread(rqstp);
204 201
205 /* release rpciod */
206 rpciod_down();
207
208 /* Release module */ 202 /* Release module */
209 unlock_kernel(); 203 unlock_kernel();
210 module_put_and_exit(0); 204 module_put_and_exit(0);
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index f4580b44eef4..b55cb236cf74 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
8 pagelist.o proc.o read.o symlink.o unlink.o \ 8 pagelist.o proc.o read.o symlink.o unlink.o \
9 write.o namespace.o 9 write.o namespace.o mount_clnt.o
10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o 10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
13nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ 13nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 881fa4900923..ccb455053ee4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
102 int nfsversion) 102 int nfsversion)
103{ 103{
104 struct nfs_client *clp; 104 struct nfs_client *clp;
105 int error;
106 105
107 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) 106 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
108 goto error_0; 107 goto error_0;
109 108
110 error = rpciod_up();
111 if (error < 0) {
112 dprintk("%s: couldn't start rpciod! Error = %d\n",
113 __FUNCTION__, error);
114 goto error_1;
115 }
116 __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
117
118 if (nfsversion == 4) { 109 if (nfsversion == 4) {
119 if (nfs_callback_up() < 0) 110 if (nfs_callback_up() < 0)
120 goto error_2; 111 goto error_2;
@@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
139#ifdef CONFIG_NFS_V4 130#ifdef CONFIG_NFS_V4
140 init_rwsem(&clp->cl_sem); 131 init_rwsem(&clp->cl_sem);
141 INIT_LIST_HEAD(&clp->cl_delegations); 132 INIT_LIST_HEAD(&clp->cl_delegations);
142 INIT_LIST_HEAD(&clp->cl_state_owners);
143 INIT_LIST_HEAD(&clp->cl_unused);
144 spin_lock_init(&clp->cl_lock); 133 spin_lock_init(&clp->cl_lock);
145 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 134 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
146 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); 135 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -154,9 +143,6 @@ error_3:
154 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 143 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
155 nfs_callback_down(); 144 nfs_callback_down();
156error_2: 145error_2:
157 rpciod_down();
158 __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
159error_1:
160 kfree(clp); 146 kfree(clp);
161error_0: 147error_0:
162 return NULL; 148 return NULL;
@@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
167#ifdef CONFIG_NFS_V4 153#ifdef CONFIG_NFS_V4
168 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) 154 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
169 nfs4_kill_renewd(clp); 155 nfs4_kill_renewd(clp);
170 while (!list_empty(&clp->cl_unused)) { 156 BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
171 struct nfs4_state_owner *sp;
172
173 sp = list_entry(clp->cl_unused.next,
174 struct nfs4_state_owner,
175 so_list);
176 list_del(&sp->so_list);
177 kfree(sp);
178 }
179 BUG_ON(!list_empty(&clp->cl_state_owners));
180 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) 157 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
181 nfs_idmap_delete(clp); 158 nfs_idmap_delete(clp);
182#endif 159#endif
@@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp)
198 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 175 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
199 nfs_callback_down(); 176 nfs_callback_down();
200 177
201 if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
202 rpciod_down();
203
204 kfree(clp->cl_hostname); 178 kfree(clp->cl_hostname);
205 kfree(clp); 179 kfree(clp);
206 180
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f37d1bea83f..20ac403469a0 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
27 kfree(delegation); 27 kfree(delegation);
28} 28}
29 29
30static void nfs_free_delegation_callback(struct rcu_head *head)
31{
32 struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
33
34 nfs_free_delegation(delegation);
35}
36
30static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) 37static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
31{ 38{
32 struct inode *inode = state->inode; 39 struct inode *inode = state->inode;
@@ -57,7 +64,7 @@ out_err:
57 return status; 64 return status;
58} 65}
59 66
60static void nfs_delegation_claim_opens(struct inode *inode) 67static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
61{ 68{
62 struct nfs_inode *nfsi = NFS_I(inode); 69 struct nfs_inode *nfsi = NFS_I(inode);
63 struct nfs_open_context *ctx; 70 struct nfs_open_context *ctx;
@@ -72,9 +79,11 @@ again:
72 continue; 79 continue;
73 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 80 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
74 continue; 81 continue;
82 if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
83 continue;
75 get_nfs_open_context(ctx); 84 get_nfs_open_context(ctx);
76 spin_unlock(&inode->i_lock); 85 spin_unlock(&inode->i_lock);
77 err = nfs4_open_delegation_recall(ctx->dentry, state); 86 err = nfs4_open_delegation_recall(ctx, state, stateid);
78 if (err >= 0) 87 if (err >= 0)
79 err = nfs_delegation_claim_locks(ctx, state); 88 err = nfs_delegation_claim_locks(ctx, state);
80 put_nfs_open_context(ctx); 89 put_nfs_open_context(ctx);
@@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
115 struct nfs_delegation *delegation; 124 struct nfs_delegation *delegation;
116 int status = 0; 125 int status = 0;
117 126
118 /* Ensure we first revalidate the attributes and page cache! */
119 if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
120 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
121
122 delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); 127 delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
123 if (delegation == NULL) 128 if (delegation == NULL)
124 return -ENOMEM; 129 return -ENOMEM;
@@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
131 delegation->inode = inode; 136 delegation->inode = inode;
132 137
133 spin_lock(&clp->cl_lock); 138 spin_lock(&clp->cl_lock);
134 if (nfsi->delegation == NULL) { 139 if (rcu_dereference(nfsi->delegation) == NULL) {
135 list_add(&delegation->super_list, &clp->cl_delegations); 140 list_add_rcu(&delegation->super_list, &clp->cl_delegations);
136 nfsi->delegation = delegation;
137 nfsi->delegation_state = delegation->type; 141 nfsi->delegation_state = delegation->type;
142 rcu_assign_pointer(nfsi->delegation, delegation);
138 delegation = NULL; 143 delegation = NULL;
139 } else { 144 } else {
140 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, 145 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
@@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
145 status = -EIO; 150 status = -EIO;
146 } 151 }
147 } 152 }
153
154 /* Ensure we revalidate the attributes and page cache! */
155 spin_lock(&inode->i_lock);
156 nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
157 spin_unlock(&inode->i_lock);
158
148 spin_unlock(&clp->cl_lock); 159 spin_unlock(&clp->cl_lock);
149 kfree(delegation); 160 kfree(delegation);
150 return status; 161 return status;
@@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
155 int res = 0; 166 int res = 0;
156 167
157 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); 168 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
158 nfs_free_delegation(delegation); 169 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
159 return res; 170 return res;
160} 171}
161 172
@@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode)
170/* 181/*
171 * Basic procedure for returning a delegation to the server 182 * Basic procedure for returning a delegation to the server
172 */ 183 */
173int __nfs_inode_return_delegation(struct inode *inode) 184static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
174{ 185{
175 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 186 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
176 struct nfs_inode *nfsi = NFS_I(inode); 187 struct nfs_inode *nfsi = NFS_I(inode);
177 struct nfs_delegation *delegation;
178 int res = 0;
179 188
180 nfs_msync_inode(inode); 189 nfs_msync_inode(inode);
181 down_read(&clp->cl_sem); 190 down_read(&clp->cl_sem);
182 /* Guard against new delegated open calls */ 191 /* Guard against new delegated open calls */
183 down_write(&nfsi->rwsem); 192 down_write(&nfsi->rwsem);
184 spin_lock(&clp->cl_lock); 193 nfs_delegation_claim_opens(inode, &delegation->stateid);
185 delegation = nfsi->delegation;
186 if (delegation != NULL) {
187 list_del_init(&delegation->super_list);
188 nfsi->delegation = NULL;
189 nfsi->delegation_state = 0;
190 }
191 spin_unlock(&clp->cl_lock);
192 nfs_delegation_claim_opens(inode);
193 up_write(&nfsi->rwsem); 194 up_write(&nfsi->rwsem);
194 up_read(&clp->cl_sem); 195 up_read(&clp->cl_sem);
195 nfs_msync_inode(inode); 196 nfs_msync_inode(inode);
196 197
197 if (delegation != NULL) 198 return nfs_do_return_delegation(inode, delegation);
198 res = nfs_do_return_delegation(inode, delegation); 199}
199 return res; 200
201static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
202{
203 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
204
205 if (delegation == NULL)
206 goto nomatch;
207 if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
208 sizeof(delegation->stateid.data)) != 0)
209 goto nomatch;
210 list_del_rcu(&delegation->super_list);
211 nfsi->delegation_state = 0;
212 rcu_assign_pointer(nfsi->delegation, NULL);
213 return delegation;
214nomatch:
215 return NULL;
216}
217
218int nfs_inode_return_delegation(struct inode *inode)
219{
220 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
221 struct nfs_inode *nfsi = NFS_I(inode);
222 struct nfs_delegation *delegation;
223 int err = 0;
224
225 if (rcu_dereference(nfsi->delegation) != NULL) {
226 spin_lock(&clp->cl_lock);
227 delegation = nfs_detach_delegation_locked(nfsi, NULL);
228 spin_unlock(&clp->cl_lock);
229 if (delegation != NULL)
230 err = __nfs_inode_return_delegation(inode, delegation);
231 }
232 return err;
200} 233}
201 234
202/* 235/*
@@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb)
211 if (clp == NULL) 244 if (clp == NULL)
212 return; 245 return;
213restart: 246restart:
214 spin_lock(&clp->cl_lock); 247 rcu_read_lock();
215 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 248 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
216 if (delegation->inode->i_sb != sb) 249 if (delegation->inode->i_sb != sb)
217 continue; 250 continue;
218 inode = igrab(delegation->inode); 251 inode = igrab(delegation->inode);
219 if (inode == NULL) 252 if (inode == NULL)
220 continue; 253 continue;
254 spin_lock(&clp->cl_lock);
255 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
221 spin_unlock(&clp->cl_lock); 256 spin_unlock(&clp->cl_lock);
222 nfs_inode_return_delegation(inode); 257 rcu_read_unlock();
258 if (delegation != NULL)
259 __nfs_inode_return_delegation(inode, delegation);
223 iput(inode); 260 iput(inode);
224 goto restart; 261 goto restart;
225 } 262 }
226 spin_unlock(&clp->cl_lock); 263 rcu_read_unlock();
227} 264}
228 265
229static int nfs_do_expire_all_delegations(void *ptr) 266static int nfs_do_expire_all_delegations(void *ptr)
@@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr)
234 271
235 allow_signal(SIGKILL); 272 allow_signal(SIGKILL);
236restart: 273restart:
237 spin_lock(&clp->cl_lock);
238 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0) 274 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
239 goto out; 275 goto out;
240 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) 276 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
241 goto out; 277 goto out;
242 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 278 rcu_read_lock();
279 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
243 inode = igrab(delegation->inode); 280 inode = igrab(delegation->inode);
244 if (inode == NULL) 281 if (inode == NULL)
245 continue; 282 continue;
283 spin_lock(&clp->cl_lock);
284 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
246 spin_unlock(&clp->cl_lock); 285 spin_unlock(&clp->cl_lock);
247 nfs_inode_return_delegation(inode); 286 rcu_read_unlock();
287 if (delegation)
288 __nfs_inode_return_delegation(inode, delegation);
248 iput(inode); 289 iput(inode);
249 goto restart; 290 goto restart;
250 } 291 }
292 rcu_read_unlock();
251out: 293out:
252 spin_unlock(&clp->cl_lock);
253 nfs_put_client(clp); 294 nfs_put_client(clp);
254 module_put_and_exit(0); 295 module_put_and_exit(0);
255} 296}
@@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
280 if (clp == NULL) 321 if (clp == NULL)
281 return; 322 return;
282restart: 323restart:
283 spin_lock(&clp->cl_lock); 324 rcu_read_lock();
284 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 325 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
285 inode = igrab(delegation->inode); 326 inode = igrab(delegation->inode);
286 if (inode == NULL) 327 if (inode == NULL)
287 continue; 328 continue;
329 spin_lock(&clp->cl_lock);
330 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
288 spin_unlock(&clp->cl_lock); 331 spin_unlock(&clp->cl_lock);
289 nfs_inode_return_delegation(inode); 332 rcu_read_unlock();
333 if (delegation != NULL)
334 __nfs_inode_return_delegation(inode, delegation);
290 iput(inode); 335 iput(inode);
291 goto restart; 336 goto restart;
292 } 337 }
293 spin_unlock(&clp->cl_lock); 338 rcu_read_unlock();
294} 339}
295 340
296struct recall_threadargs { 341struct recall_threadargs {
@@ -316,21 +361,14 @@ static int recall_thread(void *data)
316 down_read(&clp->cl_sem); 361 down_read(&clp->cl_sem);
317 down_write(&nfsi->rwsem); 362 down_write(&nfsi->rwsem);
318 spin_lock(&clp->cl_lock); 363 spin_lock(&clp->cl_lock);
319 delegation = nfsi->delegation; 364 delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
320 if (delegation != NULL && memcmp(delegation->stateid.data, 365 if (delegation != NULL)
321 args->stateid->data,
322 sizeof(delegation->stateid.data)) == 0) {
323 list_del_init(&delegation->super_list);
324 nfsi->delegation = NULL;
325 nfsi->delegation_state = 0;
326 args->result = 0; 366 args->result = 0;
327 } else { 367 else
328 delegation = NULL;
329 args->result = -ENOENT; 368 args->result = -ENOENT;
330 }
331 spin_unlock(&clp->cl_lock); 369 spin_unlock(&clp->cl_lock);
332 complete(&args->started); 370 complete(&args->started);
333 nfs_delegation_claim_opens(inode); 371 nfs_delegation_claim_opens(inode, args->stateid);
334 up_write(&nfsi->rwsem); 372 up_write(&nfsi->rwsem);
335 up_read(&clp->cl_sem); 373 up_read(&clp->cl_sem);
336 nfs_msync_inode(inode); 374 nfs_msync_inode(inode);
@@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
371{ 409{
372 struct nfs_delegation *delegation; 410 struct nfs_delegation *delegation;
373 struct inode *res = NULL; 411 struct inode *res = NULL;
374 spin_lock(&clp->cl_lock); 412 rcu_read_lock();
375 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 413 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
376 if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { 414 if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
377 res = igrab(delegation->inode); 415 res = igrab(delegation->inode);
378 break; 416 break;
379 } 417 }
380 } 418 }
381 spin_unlock(&clp->cl_lock); 419 rcu_read_unlock();
382 return res; 420 return res;
383} 421}
384 422
@@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
388void nfs_delegation_mark_reclaim(struct nfs_client *clp) 426void nfs_delegation_mark_reclaim(struct nfs_client *clp)
389{ 427{
390 struct nfs_delegation *delegation; 428 struct nfs_delegation *delegation;
391 spin_lock(&clp->cl_lock); 429 rcu_read_lock();
392 list_for_each_entry(delegation, &clp->cl_delegations, super_list) 430 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
393 delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; 431 delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
394 spin_unlock(&clp->cl_lock); 432 rcu_read_unlock();
395} 433}
396 434
397/* 435/*
@@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
399 */ 437 */
400void nfs_delegation_reap_unclaimed(struct nfs_client *clp) 438void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
401{ 439{
402 struct nfs_delegation *delegation, *n; 440 struct nfs_delegation *delegation;
403 LIST_HEAD(head); 441restart:
404 spin_lock(&clp->cl_lock); 442 rcu_read_lock();
405 list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) { 443 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
406 if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) 444 if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
407 continue; 445 continue;
408 list_move(&delegation->super_list, &head); 446 spin_lock(&clp->cl_lock);
409 NFS_I(delegation->inode)->delegation = NULL; 447 delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
410 NFS_I(delegation->inode)->delegation_state = 0; 448 spin_unlock(&clp->cl_lock);
411 } 449 rcu_read_unlock();
412 spin_unlock(&clp->cl_lock); 450 if (delegation != NULL)
413 while(!list_empty(&head)) { 451 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
414 delegation = list_entry(head.next, struct nfs_delegation, super_list); 452 goto restart;
415 list_del(&delegation->super_list);
416 nfs_free_delegation(delegation);
417 } 453 }
454 rcu_read_unlock();
418} 455}
419 456
420int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) 457int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
421{ 458{
422 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
423 struct nfs_inode *nfsi = NFS_I(inode); 459 struct nfs_inode *nfsi = NFS_I(inode);
424 struct nfs_delegation *delegation; 460 struct nfs_delegation *delegation;
425 int res = 0; 461 int ret = 0;
426 462
427 if (nfsi->delegation_state == 0) 463 rcu_read_lock();
428 return 0; 464 delegation = rcu_dereference(nfsi->delegation);
429 spin_lock(&clp->cl_lock);
430 delegation = nfsi->delegation;
431 if (delegation != NULL) { 465 if (delegation != NULL) {
432 memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); 466 memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
433 res = 1; 467 ret = 1;
434 } 468 }
435 spin_unlock(&clp->cl_lock); 469 rcu_read_unlock();
436 return res; 470 return ret;
437} 471}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2cfd4b24c7fe..5874ce7fdbae 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,11 +22,12 @@ struct nfs_delegation {
22 long flags; 22 long flags;
23 loff_t maxsize; 23 loff_t maxsize;
24 __u64 change_attr; 24 __u64 change_attr;
25 struct rcu_head rcu;
25}; 26};
26 27
27int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 28int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
28void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 29void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
29int __nfs_inode_return_delegation(struct inode *inode); 30int nfs_inode_return_delegation(struct inode *inode);
30int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); 31int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
31 32
32struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 33struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
@@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
39 40
40/* NFSv4 delegation-related procedures */ 41/* NFSv4 delegation-related procedures */
41int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); 42int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
42int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); 43int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
43int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); 44int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
44int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); 45int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
45 46
46static inline int nfs_have_delegation(struct inode *inode, int flags) 47static inline int nfs_have_delegation(struct inode *inode, int flags)
47{ 48{
49 struct nfs_delegation *delegation;
50 int ret = 0;
51
48 flags &= FMODE_READ|FMODE_WRITE; 52 flags &= FMODE_READ|FMODE_WRITE;
49 smp_rmb(); 53 rcu_read_lock();
50 if ((NFS_I(inode)->delegation_state & flags) == flags) 54 delegation = rcu_dereference(NFS_I(inode)->delegation);
51 return 1; 55 if (delegation != NULL && (delegation->type & flags) == flags)
52 return 0; 56 ret = 1;
57 rcu_read_unlock();
58 return ret;
53} 59}
54 60
55static inline int nfs_inode_return_delegation(struct inode *inode)
56{
57 int err = 0;
58
59 if (NFS_I(inode)->delegation != NULL)
60 err = __nfs_inode_return_delegation(inode);
61 return err;
62}
63#else 61#else
64static inline int nfs_have_delegation(struct inode *inode, int flags) 62static inline int nfs_have_delegation(struct inode *inode, int flags)
65{ 63{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c27258b5d3e1..322141f4ab48 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
897 return (nd->intent.open.flags & O_EXCL) != 0; 897 return (nd->intent.open.flags & O_EXCL) != 0;
898} 898}
899 899
900static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, 900static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
901 struct nfs_fh *fh, struct nfs_fattr *fattr)
902{ 901{
903 struct nfs_server *server = NFS_SERVER(dir); 902 struct nfs_server *server = NFS_SERVER(dir);
904 903
905 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) 904 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
906 /* Revalidate fsid on root dir */ 905 /* Revalidate fsid using the parent directory */
907 return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); 906 return __nfs_revalidate_inode(server, dir);
908 return 0; 907 return 0;
909} 908}
910 909
@@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
946 res = ERR_PTR(error); 945 res = ERR_PTR(error);
947 goto out_unlock; 946 goto out_unlock;
948 } 947 }
949 error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); 948 error = nfs_reval_fsid(dir, &fattr);
950 if (error < 0) { 949 if (error < 0) {
951 res = ERR_PTR(error); 950 res = ERR_PTR(error);
952 goto out_unlock; 951 goto out_unlock;
@@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1244 attr.ia_mode = mode; 1243 attr.ia_mode = mode;
1245 attr.ia_valid = ATTR_MODE; 1244 attr.ia_valid = ATTR_MODE;
1246 1245
1247 if (nd && (nd->flags & LOOKUP_CREATE)) 1246 if ((nd->flags & LOOKUP_CREATE) != 0)
1248 open_flags = nd->intent.open.flags; 1247 open_flags = nd->intent.open.flags;
1249 1248
1250 lock_kernel(); 1249 lock_kernel();
@@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1535 1534
1536 lock_kernel(); 1535 lock_kernel();
1537 1536
1538 page = alloc_page(GFP_KERNEL); 1537 page = alloc_page(GFP_HIGHUSER);
1539 if (!page) { 1538 if (!page) {
1540 unlock_kernel(); 1539 unlock_kernel();
1541 return -ENOMEM; 1540 return -ENOMEM;
@@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
1744 struct nfs_inode *nfsi; 1743 struct nfs_inode *nfsi;
1745 struct nfs_access_entry *cache; 1744 struct nfs_access_entry *cache;
1746 1745
1747 spin_lock(&nfs_access_lru_lock);
1748restart: 1746restart:
1747 spin_lock(&nfs_access_lru_lock);
1749 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { 1748 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
1750 struct inode *inode; 1749 struct inode *inode;
1751 1750
@@ -1770,6 +1769,7 @@ remove_lru_entry:
1770 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); 1769 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
1771 } 1770 }
1772 spin_unlock(&inode->i_lock); 1771 spin_unlock(&inode->i_lock);
1772 spin_unlock(&nfs_access_lru_lock);
1773 iput(inode); 1773 iput(inode);
1774 goto restart; 1774 goto restart;
1775 } 1775 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 00eee87510fe..a5c82b6f3b45 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
266static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) 266static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
267{ 267{
268 struct nfs_open_context *ctx = dreq->ctx; 268 struct nfs_open_context *ctx = dreq->ctx;
269 struct inode *inode = ctx->dentry->d_inode; 269 struct inode *inode = ctx->path.dentry->d_inode;
270 size_t rsize = NFS_SERVER(inode)->rsize; 270 size_t rsize = NFS_SERVER(inode)->rsize;
271 unsigned int pgbase; 271 unsigned int pgbase;
272 int result; 272 int result;
@@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
295 break; 295 break;
296 } 296 }
297 if ((unsigned)result < data->npages) { 297 if ((unsigned)result < data->npages) {
298 nfs_direct_release_pages(data->pagevec, result); 298 bytes = result * PAGE_SIZE;
299 nfs_readdata_release(data); 299 if (bytes <= pgbase) {
300 break; 300 nfs_direct_release_pages(data->pagevec, result);
301 nfs_readdata_release(data);
302 break;
303 }
304 bytes -= pgbase;
305 data->npages = result;
301 } 306 }
302 307
303 get_dreq(dreq); 308 get_dreq(dreq);
@@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
601static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) 606static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
602{ 607{
603 struct nfs_open_context *ctx = dreq->ctx; 608 struct nfs_open_context *ctx = dreq->ctx;
604 struct inode *inode = ctx->dentry->d_inode; 609 struct inode *inode = ctx->path.dentry->d_inode;
605 size_t wsize = NFS_SERVER(inode)->wsize; 610 size_t wsize = NFS_SERVER(inode)->wsize;
606 unsigned int pgbase; 611 unsigned int pgbase;
607 int result; 612 int result;
@@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
630 break; 635 break;
631 } 636 }
632 if ((unsigned)result < data->npages) { 637 if ((unsigned)result < data->npages) {
633 nfs_direct_release_pages(data->pagevec, result); 638 bytes = result * PAGE_SIZE;
634 nfs_writedata_release(data); 639 if (bytes <= pgbase) {
635 break; 640 nfs_direct_release_pages(data->pagevec, result);
641 nfs_writedata_release(data);
642 break;
643 }
644 bytes -= pgbase;
645 data->npages = result;
636 } 646 }
637 647
638 get_dreq(dreq); 648 get_dreq(dreq);
@@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
763 (unsigned long) count, (long long) pos); 773 (unsigned long) count, (long long) pos);
764 774
765 if (nr_segs != 1) 775 if (nr_segs != 1)
766 return -EINVAL;
767
768 if (count < 0)
769 goto out; 776 goto out;
777
770 retval = -EFAULT; 778 retval = -EFAULT;
771 if (!access_ok(VERIFY_WRITE, buf, count)) 779 if (!access_ok(VERIFY_WRITE, buf, count))
772 goto out; 780 goto out;
@@ -814,7 +822,7 @@ out:
814ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 822ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
815 unsigned long nr_segs, loff_t pos) 823 unsigned long nr_segs, loff_t pos)
816{ 824{
817 ssize_t retval; 825 ssize_t retval = -EINVAL;
818 struct file *file = iocb->ki_filp; 826 struct file *file = iocb->ki_filp;
819 struct address_space *mapping = file->f_mapping; 827 struct address_space *mapping = file->f_mapping;
820 /* XXX: temporary */ 828 /* XXX: temporary */
@@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
827 (unsigned long) count, (long long) pos); 835 (unsigned long) count, (long long) pos);
828 836
829 if (nr_segs != 1) 837 if (nr_segs != 1)
830 return -EINVAL; 838 goto out;
831 839
832 retval = generic_write_checks(file, &pos, &count, 0); 840 retval = generic_write_checks(file, &pos, &count, 0);
833 if (retval) 841 if (retval)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd9f5a836592..3d9fccf4ef93 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
461 461
462 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 462 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
463 if (ctx != NULL) { 463 if (ctx != NULL) {
464 atomic_set(&ctx->count, 1); 464 ctx->path.dentry = dget(dentry);
465 ctx->dentry = dget(dentry); 465 ctx->path.mnt = mntget(mnt);
466 ctx->vfsmnt = mntget(mnt);
467 ctx->cred = get_rpccred(cred); 466 ctx->cred = get_rpccred(cred);
468 ctx->state = NULL; 467 ctx->state = NULL;
469 ctx->lockowner = current->files; 468 ctx->lockowner = current->files;
470 ctx->error = 0; 469 ctx->error = 0;
471 ctx->dir_cookie = 0; 470 ctx->dir_cookie = 0;
471 kref_init(&ctx->kref);
472 } 472 }
473 return ctx; 473 return ctx;
474} 474}
@@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
476struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) 476struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
477{ 477{
478 if (ctx != NULL) 478 if (ctx != NULL)
479 atomic_inc(&ctx->count); 479 kref_get(&ctx->kref);
480 return ctx; 480 return ctx;
481} 481}
482 482
483void put_nfs_open_context(struct nfs_open_context *ctx) 483static void nfs_free_open_context(struct kref *kref)
484{ 484{
485 if (atomic_dec_and_test(&ctx->count)) { 485 struct nfs_open_context *ctx = container_of(kref,
486 if (!list_empty(&ctx->list)) { 486 struct nfs_open_context, kref);
487 struct inode *inode = ctx->dentry->d_inode; 487
488 spin_lock(&inode->i_lock); 488 if (!list_empty(&ctx->list)) {
489 list_del(&ctx->list); 489 struct inode *inode = ctx->path.dentry->d_inode;
490 spin_unlock(&inode->i_lock); 490 spin_lock(&inode->i_lock);
491 } 491 list_del(&ctx->list);
492 if (ctx->state != NULL) 492 spin_unlock(&inode->i_lock);
493 nfs4_close_state(ctx->state, ctx->mode);
494 if (ctx->cred != NULL)
495 put_rpccred(ctx->cred);
496 dput(ctx->dentry);
497 mntput(ctx->vfsmnt);
498 kfree(ctx);
499 } 493 }
494 if (ctx->state != NULL)
495 nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
496 if (ctx->cred != NULL)
497 put_rpccred(ctx->cred);
498 dput(ctx->path.dentry);
499 mntput(ctx->path.mnt);
500 kfree(ctx);
501}
502
503void put_nfs_open_context(struct nfs_open_context *ctx)
504{
505 kref_put(&ctx->kref, nfs_free_open_context);
500} 506}
501 507
502/* 508/*
@@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
961 goto out_changed; 967 goto out_changed;
962 968
963 server = NFS_SERVER(inode); 969 server = NFS_SERVER(inode);
964 /* Update the fsid if and only if this is the root directory */ 970 /* Update the fsid? */
965 if (inode == inode->i_sb->s_root->d_inode 971 if (S_ISDIR(inode->i_mode)
966 && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) 972 && !nfs_fsid_equal(&server->fsid, &fattr->fsid))
967 server->fsid = fattr->fsid; 973 server->fsid = fattr->fsid;
968 974
@@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1066 invalid &= ~NFS_INO_INVALID_DATA; 1072 invalid &= ~NFS_INO_INVALID_DATA;
1067 if (data_stable) 1073 if (data_stable)
1068 invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); 1074 invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
1069 if (!nfs_have_delegation(inode, FMODE_READ)) 1075 if (!nfs_have_delegation(inode, FMODE_READ) ||
1076 (nfsi->cache_validity & NFS_INO_REVAL_FORCED))
1070 nfsi->cache_validity |= invalid; 1077 nfsi->cache_validity |= invalid;
1078 nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
1071 1079
1072 return 0; 1080 return 0;
1073 out_changed: 1081 out_changed:
@@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1103 */ 1111 */
1104void nfs4_clear_inode(struct inode *inode) 1112void nfs4_clear_inode(struct inode *inode)
1105{ 1113{
1106 struct nfs_inode *nfsi = NFS_I(inode);
1107
1108 /* If we are holding a delegation, return it! */ 1114 /* If we are holding a delegation, return it! */
1109 nfs_inode_return_delegation(inode); 1115 nfs_inode_return_delegation(inode);
1110 /* First call standard NFS clear_inode() code */ 1116 /* First call standard NFS clear_inode() code */
1111 nfs_clear_inode(inode); 1117 nfs_clear_inode(inode);
1112 /* Now clear out any remaining state */
1113 while (!list_empty(&nfsi->open_states)) {
1114 struct nfs4_state *state;
1115
1116 state = list_entry(nfsi->open_states.next,
1117 struct nfs4_state,
1118 inode_states);
1119 dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
1120 __FUNCTION__,
1121 inode->i_sb->s_id,
1122 (long long)NFS_FILEID(inode),
1123 state);
1124 BUG_ON(atomic_read(&state->count) != 1);
1125 nfs4_close_state(state, state->state);
1126 }
1127} 1118}
1128#endif 1119#endif
1129 1120
@@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
1165 struct nfs_inode *nfsi = (struct nfs_inode *) foo; 1156 struct nfs_inode *nfsi = (struct nfs_inode *) foo;
1166 1157
1167 inode_init_once(&nfsi->vfs_inode); 1158 inode_init_once(&nfsi->vfs_inode);
1168 spin_lock_init(&nfsi->req_lock);
1169 INIT_LIST_HEAD(&nfsi->dirty);
1170 INIT_LIST_HEAD(&nfsi->commit);
1171 INIT_LIST_HEAD(&nfsi->open_files); 1159 INIT_LIST_HEAD(&nfsi->open_files);
1172 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1160 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1173 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1161 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1174 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1162 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1175 atomic_set(&nfsi->data_updates, 0); 1163 atomic_set(&nfsi->data_updates, 0);
1176 nfsi->ndirty = 0;
1177 nfsi->ncommit = 0; 1164 nfsi->ncommit = 0;
1178 nfsi->npages = 0; 1165 nfsi->npages = 0;
1179 nfs4_init_once(nfsi); 1166 nfs4_init_once(nfsi);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ad2b40db1e65..76cf55d57101 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
183/* 183/*
184 * Calculate the number of 512byte blocks used. 184 * Calculate the number of 512byte blocks used.
185 */ 185 */
186static inline unsigned long nfs_calc_block_size(u64 tsize) 186static inline blkcnt_t nfs_calc_block_size(u64 tsize)
187{ 187{
188 loff_t used = (tsize + 511) >> 9; 188 blkcnt_t used = (tsize + 511) >> 9;
189 return (used > ULONG_MAX) ? ULONG_MAX : used; 189 return (used > ULONG_MAX) ? ULONG_MAX : used;
190} 190}
191 191
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca5a266a3140..8afd9f7e7a97 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -1,7 +1,5 @@
1/* 1/*
2 * linux/fs/nfs/mount_clnt.c 2 * In-kernel MOUNT protocol client
3 *
4 * MOUNT client to support NFSroot.
5 * 3 *
6 * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
@@ -18,33 +16,31 @@
18#include <linux/nfs_fs.h> 16#include <linux/nfs_fs.h>
19 17
20#ifdef RPC_DEBUG 18#ifdef RPC_DEBUG
21# define NFSDBG_FACILITY NFSDBG_ROOT 19# define NFSDBG_FACILITY NFSDBG_MOUNT
22#endif 20#endif
23 21
24/*
25#define MOUNT_PROGRAM 100005
26#define MOUNT_VERSION 1
27#define MOUNT_MNT 1
28#define MOUNT_UMNT 3
29 */
30
31static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *,
32 int, int);
33static struct rpc_program mnt_program; 22static struct rpc_program mnt_program;
34 23
35struct mnt_fhstatus { 24struct mnt_fhstatus {
36 unsigned int status; 25 u32 status;
37 struct nfs_fh * fh; 26 struct nfs_fh *fh;
38}; 27};
39 28
40/* 29/**
41 * Obtain an NFS file handle for the given host and path 30 * nfs_mount - Obtain an NFS file handle for the given host and path
31 * @addr: pointer to server's address
32 * @len: size of server's address
33 * @hostname: name of server host, or NULL
34 * @path: pointer to string containing export path to mount
35 * @version: mount version to use for this request
36 * @protocol: transport protocol to use for thie request
37 * @fh: pointer to location to place returned file handle
38 *
39 * Uses default timeout parameters specified by underlying transport.
42 */ 40 */
43int 41int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
44nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, 42 int version, int protocol, struct nfs_fh *fh)
45 int version, int protocol)
46{ 43{
47 struct rpc_clnt *mnt_clnt;
48 struct mnt_fhstatus result = { 44 struct mnt_fhstatus result = {
49 .fh = fh 45 .fh = fh
50 }; 46 };
@@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
52 .rpc_argp = path, 48 .rpc_argp = path,
53 .rpc_resp = &result, 49 .rpc_resp = &result,
54 }; 50 };
55 char hostname[32]; 51 struct rpc_create_args args = {
52 .protocol = protocol,
53 .address = addr,
54 .addrsize = len,
55 .servername = hostname,
56 .program = &mnt_program,
57 .version = version,
58 .authflavor = RPC_AUTH_UNIX,
59 .flags = RPC_CLNT_CREATE_INTR,
60 };
61 struct rpc_clnt *mnt_clnt;
56 int status; 62 int status;
57 63
58 dprintk("NFS: nfs_mount(%08x:%s)\n", 64 dprintk("NFS: sending MNT request for %s:%s\n",
59 (unsigned)ntohl(addr->sin_addr.s_addr), path); 65 (hostname ? hostname : "server"), path);
60 66
61 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); 67 mnt_clnt = rpc_create(&args);
62 mnt_clnt = mnt_create(hostname, addr, version, protocol);
63 if (IS_ERR(mnt_clnt)) 68 if (IS_ERR(mnt_clnt))
64 return PTR_ERR(mnt_clnt); 69 goto out_clnt_err;
65 70
66 if (version == NFS_MNT3_VERSION) 71 if (version == NFS_MNT3_VERSION)
67 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; 72 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
@@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
69 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; 74 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
70 75
71 status = rpc_call_sync(mnt_clnt, &msg, 0); 76 status = rpc_call_sync(mnt_clnt, &msg, 0);
72 return status < 0? status : (result.status? -EACCES : 0); 77 rpc_shutdown_client(mnt_clnt);
73}
74 78
75static struct rpc_clnt * 79 if (status < 0)
76mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, 80 goto out_call_err;
77 int protocol) 81 if (result.status != 0)
78{ 82 goto out_mnt_err;
79 struct rpc_create_args args = { 83
80 .protocol = protocol, 84 dprintk("NFS: MNT request succeeded\n");
81 .address = (struct sockaddr *)srvaddr, 85 status = 0;
82 .addrsize = sizeof(*srvaddr), 86
83 .servername = hostname, 87out:
84 .program = &mnt_program, 88 return status;
85 .version = version, 89
86 .authflavor = RPC_AUTH_UNIX, 90out_clnt_err:
87 .flags = (RPC_CLNT_CREATE_ONESHOT | 91 status = PTR_ERR(mnt_clnt);
88 RPC_CLNT_CREATE_INTR), 92 dprintk("NFS: failed to create RPC client, status=%d\n", status);
89 }; 93 goto out;
94
95out_call_err:
96 dprintk("NFS: failed to start MNT request, status=%d\n", status);
97 goto out;
90 98
91 return rpc_create(&args); 99out_mnt_err:
100 dprintk("NFS: MNT server returned result %d\n", result.status);
101 status = -EACCES;
102 goto out;
92} 103}
93 104
94/* 105/*
95 * XDR encode/decode functions for MOUNT 106 * XDR encode/decode functions for MOUNT
96 */ 107 */
97static int 108static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
98xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path) 109 const char *path)
99{ 110{
100 p = xdr_encode_string(p, path); 111 p = xdr_encode_string(p, path);
101 112
@@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
103 return 0; 114 return 0;
104} 115}
105 116
106static int 117static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
107xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) 118 struct mnt_fhstatus *res)
108{ 119{
109 struct nfs_fh *fh = res->fh; 120 struct nfs_fh *fh = res->fh;
110 121
@@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
115 return 0; 126 return 0;
116} 127}
117 128
118static int 129static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
119xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) 130 struct mnt_fhstatus *res)
120{ 131{
121 struct nfs_fh *fh = res->fh; 132 struct nfs_fh *fh = res->fh;
122 133
@@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
135#define MNT_fhstatus_sz (1 + 8) 146#define MNT_fhstatus_sz (1 + 8)
136#define MNT_fhstatus3_sz (1 + 16) 147#define MNT_fhstatus3_sz (1 + 16)
137 148
138static struct rpc_procinfo mnt_procedures[] = { 149static struct rpc_procinfo mnt_procedures[] = {
139[MNTPROC_MNT] = { 150 [MNTPROC_MNT] = {
140 .p_proc = MNTPROC_MNT, 151 .p_proc = MNTPROC_MNT,
141 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 152 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
142 .p_decode = (kxdrproc_t) xdr_decode_fhstatus, 153 .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
143 .p_arglen = MNT_dirpath_sz, 154 .p_arglen = MNT_dirpath_sz,
144 .p_replen = MNT_fhstatus_sz, 155 .p_replen = MNT_fhstatus_sz,
145 .p_statidx = MNTPROC_MNT, 156 .p_statidx = MNTPROC_MNT,
146 .p_name = "MOUNT", 157 .p_name = "MOUNT",
147 }, 158 },
148}; 159};
149 160
150static struct rpc_procinfo mnt3_procedures[] = { 161static struct rpc_procinfo mnt3_procedures[] = {
151[MOUNTPROC3_MNT] = { 162 [MOUNTPROC3_MNT] = {
152 .p_proc = MOUNTPROC3_MNT, 163 .p_proc = MOUNTPROC3_MNT,
153 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 164 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
154 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, 165 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
155 .p_arglen = MNT_dirpath_sz, 166 .p_arglen = MNT_dirpath_sz,
156 .p_replen = MNT_fhstatus3_sz, 167 .p_replen = MNT_fhstatus3_sz,
157 .p_statidx = MOUNTPROC3_MNT, 168 .p_statidx = MOUNTPROC3_MNT,
158 .p_name = "MOUNT", 169 .p_name = "MOUNT",
159 }, 170 },
160}; 171};
161 172
162 173
163static struct rpc_version mnt_version1 = { 174static struct rpc_version mnt_version1 = {
164 .number = 1, 175 .number = 1,
165 .nrprocs = 2, 176 .nrprocs = 2,
166 .procs = mnt_procedures 177 .procs = mnt_procedures,
167}; 178};
168 179
169static struct rpc_version mnt_version3 = { 180static struct rpc_version mnt_version3 = {
170 .number = 3, 181 .number = 3,
171 .nrprocs = 2, 182 .nrprocs = 2,
172 .procs = mnt3_procedures 183 .procs = mnt3_procedures,
173}; 184};
174 185
175static struct rpc_version * mnt_version[] = { 186static struct rpc_version *mnt_version[] = {
176 NULL, 187 NULL,
177 &mnt_version1, 188 &mnt_version1,
178 NULL, 189 NULL,
179 &mnt_version3, 190 &mnt_version3,
180}; 191};
181 192
182static struct rpc_stat mnt_stats; 193static struct rpc_stat mnt_stats;
183 194
184static struct rpc_program mnt_program = { 195static struct rpc_program mnt_program = {
185 .name = "mount", 196 .name = "mount",
186 .number = NFS_MNT_PROGRAM, 197 .number = NFS_MNT_PROGRAM,
187 .nrvers = ARRAY_SIZE(mnt_version), 198 .nrvers = ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index cd3ca7b5d3db..7fcc78f2aa71 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
223static int 223static int
224nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 224nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
225{ 225{
226 struct rpc_auth *auth = req->rq_task->tk_auth; 226 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
227 unsigned int replen; 227 unsigned int replen;
228 u32 offset = (u32)args->offset; 228 u32 offset = (u32)args->offset;
229 u32 count = args->count; 229 u32 count = args->count;
@@ -380,7 +380,7 @@ static int
380nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) 380nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
381{ 381{
382 struct rpc_task *task = req->rq_task; 382 struct rpc_task *task = req->rq_task;
383 struct rpc_auth *auth = task->tk_auth; 383 struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth;
384 unsigned int replen; 384 unsigned int replen;
385 u32 count = args->count; 385 u32 count = args->count;
386 386
@@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
541static int 541static int
542nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) 542nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
543{ 543{
544 struct rpc_auth *auth = req->rq_task->tk_auth; 544 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
545 unsigned int replen; 545 unsigned int replen;
546 546
547 p = xdr_encode_fhandle(p, args->fh); 547 p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 45268d6def2e..814d886b6aa4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -335,9 +335,7 @@ again:
335 * not sure this buys us anything (and I'd have 335 * not sure this buys us anything (and I'd have
336 * to revamp the NFSv3 XDR code) */ 336 * to revamp the NFSv3 XDR code) */
337 status = nfs3_proc_setattr(dentry, &fattr, sattr); 337 status = nfs3_proc_setattr(dentry, &fattr, sattr);
338 if (status == 0) 338 nfs_post_op_update_inode(dentry->d_inode, &fattr);
339 nfs_setattr_update_inode(dentry->d_inode, sattr);
340 nfs_refresh_inode(dentry->d_inode, &fattr);
341 dprintk("NFS reply setattr (post-create): %d\n", status); 339 dprintk("NFS reply setattr (post-create): %d\n", status);
342 } 340 }
343 if (status != 0) 341 if (status != 0)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b51df8eb9f01..b4647a22f349 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
319static int 319static int
320nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 320nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
321{ 321{
322 struct rpc_auth *auth = req->rq_task->tk_auth; 322 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
323 unsigned int replen; 323 unsigned int replen;
324 u32 count = args->count; 324 u32 count = args->count;
325 325
@@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
458static int 458static int
459nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) 459nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
460{ 460{
461 struct rpc_auth *auth = req->rq_task->tk_auth; 461 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
462 unsigned int replen; 462 unsigned int replen;
463 u32 count = args->count; 463 u32 count = args->count;
464 464
@@ -643,7 +643,7 @@ static int
643nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, 643nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
644 struct nfs3_getaclargs *args) 644 struct nfs3_getaclargs *args)
645{ 645{
646 struct rpc_auth *auth = req->rq_task->tk_auth; 646 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
647 unsigned int replen; 647 unsigned int replen;
648 648
649 p = xdr_encode_fhandle(p, args->fh); 649 p = xdr_encode_fhandle(p, args->fh);
@@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
773static int 773static int
774nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) 774nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
775{ 775{
776 struct rpc_auth *auth = req->rq_task->tk_auth; 776 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
777 unsigned int replen; 777 unsigned int replen;
778 778
779 p = xdr_encode_fhandle(p, args->fh); 779 p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cf3a17eb5c09..6c028e734fe6 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
70 seqid->flags |= NFS_SEQID_CONFIRMED; 70 seqid->flags |= NFS_SEQID_CONFIRMED;
71} 71}
72 72
73struct nfs_unique_id {
74 struct rb_node rb_node;
75 __u64 id;
76};
77
73/* 78/*
74 * NFS4 state_owners and lock_owners are simply labels for ordered 79 * NFS4 state_owners and lock_owners are simply labels for ordered
75 * sequences of RPC calls. Their sole purpose is to provide once-only 80 * sequences of RPC calls. Their sole purpose is to provide once-only
76 * semantics by allowing the server to identify replayed requests. 81 * semantics by allowing the server to identify replayed requests.
77 */ 82 */
78struct nfs4_state_owner { 83struct nfs4_state_owner {
79 spinlock_t so_lock; 84 struct nfs_unique_id so_owner_id;
80 struct list_head so_list; /* per-clientid list of state_owners */
81 struct nfs_client *so_client; 85 struct nfs_client *so_client;
82 u32 so_id; /* 32-bit identifier, unique */ 86 struct nfs_server *so_server;
83 atomic_t so_count; 87 struct rb_node so_client_node;
84 88
85 struct rpc_cred *so_cred; /* Associated cred */ 89 struct rpc_cred *so_cred; /* Associated cred */
90
91 spinlock_t so_lock;
92 atomic_t so_count;
86 struct list_head so_states; 93 struct list_head so_states;
87 struct list_head so_delegations; 94 struct list_head so_delegations;
88 struct nfs_seqid_counter so_seqid; 95 struct nfs_seqid_counter so_seqid;
@@ -108,7 +115,7 @@ struct nfs4_lock_state {
108#define NFS_LOCK_INITIALIZED 1 115#define NFS_LOCK_INITIALIZED 1
109 int ls_flags; 116 int ls_flags;
110 struct nfs_seqid_counter ls_seqid; 117 struct nfs_seqid_counter ls_seqid;
111 u32 ls_id; 118 struct nfs_unique_id ls_id;
112 nfs4_stateid ls_stateid; 119 nfs4_stateid ls_stateid;
113 atomic_t ls_count; 120 atomic_t ls_count;
114}; 121};
@@ -116,7 +123,10 @@ struct nfs4_lock_state {
116/* bits for nfs4_state->flags */ 123/* bits for nfs4_state->flags */
117enum { 124enum {
118 LK_STATE_IN_USE, 125 LK_STATE_IN_USE,
119 NFS_DELEGATED_STATE, 126 NFS_DELEGATED_STATE, /* Current stateid is delegation */
127 NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
128 NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
129 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
120}; 130};
121 131
122struct nfs4_state { 132struct nfs4_state {
@@ -130,11 +140,14 @@ struct nfs4_state {
130 unsigned long flags; /* Do we hold any locks? */ 140 unsigned long flags; /* Do we hold any locks? */
131 spinlock_t state_lock; /* Protects the lock_states list */ 141 spinlock_t state_lock; /* Protects the lock_states list */
132 142
133 nfs4_stateid stateid; 143 seqlock_t seqlock; /* Protects the stateid/open_stateid */
144 nfs4_stateid stateid; /* Current stateid: may be delegation */
145 nfs4_stateid open_stateid; /* OPEN stateid */
134 146
135 unsigned int n_rdonly; 147 /* The following 3 fields are protected by owner->so_lock */
136 unsigned int n_wronly; 148 unsigned int n_rdonly; /* Number of read-only references */
137 unsigned int n_rdwr; 149 unsigned int n_wronly; /* Number of write-only references */
150 unsigned int n_rdwr; /* Number of read/write references */
138 int state; /* State on the server (R,W, or RW) */ 151 int state; /* State on the server (R,W, or RW) */
139 atomic_t count; 152 atomic_t count;
140}; 153};
@@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
165extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); 178extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
166extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); 179extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
167extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 180extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
168extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); 181extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
169extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 182extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
170extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 183extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
171extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 184extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *);
189 202
190/* nfs4state.c */ 203/* nfs4state.c */
191struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); 204struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
192extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
193 205
194extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 206extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
195extern void nfs4_put_state_owner(struct nfs4_state_owner *); 207extern void nfs4_put_state_owner(struct nfs4_state_owner *);
196extern void nfs4_drop_state_owner(struct nfs4_state_owner *); 208extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
197extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 209extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
198extern void nfs4_put_open_state(struct nfs4_state *); 210extern void nfs4_put_open_state(struct nfs4_state *);
199extern void nfs4_close_state(struct nfs4_state *, mode_t); 211extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
200extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); 212extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
201extern void nfs4_schedule_state_recovery(struct nfs_client *); 213extern void nfs4_schedule_state_recovery(struct nfs_client *);
202extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 214extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1;
222 234
223#else 235#else
224 236
225#define nfs4_close_state(a, b) do { } while (0) 237#define nfs4_close_state(a, b, c) do { } while (0)
226 238
227#endif /* CONFIG_NFS_V4 */ 239#endif /* CONFIG_NFS_V4 */
228#endif /* __LINUX_FS_NFS_NFS4_FS.H */ 240#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 648e0ac0f90e..fee2da856c95 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); 65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); 66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); 67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
68static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
68 69
69/* Prevent leaks of NFSv4 errors into userland */ 70/* Prevent leaks of NFSv4 errors into userland */
70int nfs4_map_errors(int err) 71int nfs4_map_errors(int err)
@@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
214} 215}
215 216
216struct nfs4_opendata { 217struct nfs4_opendata {
217 atomic_t count; 218 struct kref kref;
218 struct nfs_openargs o_arg; 219 struct nfs_openargs o_arg;
219 struct nfs_openres o_res; 220 struct nfs_openres o_res;
220 struct nfs_open_confirmargs c_arg; 221 struct nfs_open_confirmargs c_arg;
221 struct nfs_open_confirmres c_res; 222 struct nfs_open_confirmres c_res;
222 struct nfs_fattr f_attr; 223 struct nfs_fattr f_attr;
223 struct nfs_fattr dir_attr; 224 struct nfs_fattr dir_attr;
224 struct dentry *dentry; 225 struct path path;
225 struct dentry *dir; 226 struct dentry *dir;
226 struct nfs4_state_owner *owner; 227 struct nfs4_state_owner *owner;
228 struct nfs4_state *state;
227 struct iattr attrs; 229 struct iattr attrs;
228 unsigned long timestamp; 230 unsigned long timestamp;
231 unsigned int rpc_done : 1;
229 int rpc_status; 232 int rpc_status;
230 int cancelled; 233 int cancelled;
231}; 234};
232 235
233static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, 236
237static void nfs4_init_opendata_res(struct nfs4_opendata *p)
238{
239 p->o_res.f_attr = &p->f_attr;
240 p->o_res.dir_attr = &p->dir_attr;
241 p->o_res.server = p->o_arg.server;
242 nfs_fattr_init(&p->f_attr);
243 nfs_fattr_init(&p->dir_attr);
244}
245
246static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
234 struct nfs4_state_owner *sp, int flags, 247 struct nfs4_state_owner *sp, int flags,
235 const struct iattr *attrs) 248 const struct iattr *attrs)
236{ 249{
237 struct dentry *parent = dget_parent(dentry); 250 struct dentry *parent = dget_parent(path->dentry);
238 struct inode *dir = parent->d_inode; 251 struct inode *dir = parent->d_inode;
239 struct nfs_server *server = NFS_SERVER(dir); 252 struct nfs_server *server = NFS_SERVER(dir);
240 struct nfs4_opendata *p; 253 struct nfs4_opendata *p;
@@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
245 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); 258 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
246 if (p->o_arg.seqid == NULL) 259 if (p->o_arg.seqid == NULL)
247 goto err_free; 260 goto err_free;
248 atomic_set(&p->count, 1); 261 p->path.mnt = mntget(path->mnt);
249 p->dentry = dget(dentry); 262 p->path.dentry = dget(path->dentry);
250 p->dir = parent; 263 p->dir = parent;
251 p->owner = sp; 264 p->owner = sp;
252 atomic_inc(&sp->so_count); 265 atomic_inc(&sp->so_count);
253 p->o_arg.fh = NFS_FH(dir); 266 p->o_arg.fh = NFS_FH(dir);
254 p->o_arg.open_flags = flags, 267 p->o_arg.open_flags = flags,
255 p->o_arg.clientid = server->nfs_client->cl_clientid; 268 p->o_arg.clientid = server->nfs_client->cl_clientid;
256 p->o_arg.id = sp->so_id; 269 p->o_arg.id = sp->so_owner_id.id;
257 p->o_arg.name = &dentry->d_name; 270 p->o_arg.name = &p->path.dentry->d_name;
258 p->o_arg.server = server; 271 p->o_arg.server = server;
259 p->o_arg.bitmask = server->attr_bitmask; 272 p->o_arg.bitmask = server->attr_bitmask;
260 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 273 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
261 p->o_res.f_attr = &p->f_attr;
262 p->o_res.dir_attr = &p->dir_attr;
263 p->o_res.server = server;
264 nfs_fattr_init(&p->f_attr);
265 nfs_fattr_init(&p->dir_attr);
266 if (flags & O_EXCL) { 274 if (flags & O_EXCL) {
267 u32 *s = (u32 *) p->o_arg.u.verifier.data; 275 u32 *s = (u32 *) p->o_arg.u.verifier.data;
268 s[0] = jiffies; 276 s[0] = jiffies;
@@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
274 p->c_arg.fh = &p->o_res.fh; 282 p->c_arg.fh = &p->o_res.fh;
275 p->c_arg.stateid = &p->o_res.stateid; 283 p->c_arg.stateid = &p->o_res.stateid;
276 p->c_arg.seqid = p->o_arg.seqid; 284 p->c_arg.seqid = p->o_arg.seqid;
285 nfs4_init_opendata_res(p);
286 kref_init(&p->kref);
277 return p; 287 return p;
278err_free: 288err_free:
279 kfree(p); 289 kfree(p);
@@ -282,27 +292,25 @@ err:
282 return NULL; 292 return NULL;
283} 293}
284 294
285static void nfs4_opendata_free(struct nfs4_opendata *p) 295static void nfs4_opendata_free(struct kref *kref)
286{ 296{
287 if (p != NULL && atomic_dec_and_test(&p->count)) { 297 struct nfs4_opendata *p = container_of(kref,
288 nfs_free_seqid(p->o_arg.seqid); 298 struct nfs4_opendata, kref);
289 nfs4_put_state_owner(p->owner); 299
290 dput(p->dir); 300 nfs_free_seqid(p->o_arg.seqid);
291 dput(p->dentry); 301 if (p->state != NULL)
292 kfree(p); 302 nfs4_put_open_state(p->state);
293 } 303 nfs4_put_state_owner(p->owner);
304 dput(p->dir);
305 dput(p->path.dentry);
306 mntput(p->path.mnt);
307 kfree(p);
294} 308}
295 309
296/* Helper for asynchronous RPC calls */ 310static void nfs4_opendata_put(struct nfs4_opendata *p)
297static int nfs4_call_async(struct rpc_clnt *clnt,
298 const struct rpc_call_ops *tk_ops, void *calldata)
299{ 311{
300 struct rpc_task *task; 312 if (p != NULL)
301 313 kref_put(&p->kref, nfs4_opendata_free);
302 if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
303 return -ENOMEM;
304 rpc_execute(task);
305 return 0;
306} 314}
307 315
308static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) 316static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
@@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
316 return ret; 324 return ret;
317} 325}
318 326
319static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) 327static int can_open_cached(struct nfs4_state *state, int mode)
328{
329 int ret = 0;
330 switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
331 case FMODE_READ:
332 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
333 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
334 break;
335 case FMODE_WRITE:
336 ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
337 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
338 break;
339 case FMODE_READ|FMODE_WRITE:
340 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
341 }
342 return ret;
343}
344
345static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
346{
347 if ((delegation->type & open_flags) != open_flags)
348 return 0;
349 if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM)
350 return 0;
351 return 1;
352}
353
354static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
320{ 355{
321 switch (open_flags) { 356 switch (open_flags) {
322 case FMODE_WRITE: 357 case FMODE_WRITE:
@@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_
328 case FMODE_READ|FMODE_WRITE: 363 case FMODE_READ|FMODE_WRITE:
329 state->n_rdwr++; 364 state->n_rdwr++;
330 } 365 }
366 nfs4_state_set_mode_locked(state, state->state | open_flags);
331} 367}
332 368
333static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) 369static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
334{ 370{
335 struct inode *inode = state->inode; 371 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
372 memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
373 memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
374 switch (open_flags) {
375 case FMODE_READ:
376 set_bit(NFS_O_RDONLY_STATE, &state->flags);
377 break;
378 case FMODE_WRITE:
379 set_bit(NFS_O_WRONLY_STATE, &state->flags);
380 break;
381 case FMODE_READ|FMODE_WRITE:
382 set_bit(NFS_O_RDWR_STATE, &state->flags);
383 }
384}
385
386static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
387{
388 write_seqlock(&state->seqlock);
389 nfs_set_open_stateid_locked(state, stateid, open_flags);
390 write_sequnlock(&state->seqlock);
391}
336 392
393static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags)
394{
337 open_flags &= (FMODE_READ|FMODE_WRITE); 395 open_flags &= (FMODE_READ|FMODE_WRITE);
338 /* Protect against nfs4_find_state_byowner() */ 396 /*
397 * Protect the call to nfs4_state_set_mode_locked and
398 * serialise the stateid update
399 */
400 write_seqlock(&state->seqlock);
401 if (deleg_stateid != NULL) {
402 memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
403 set_bit(NFS_DELEGATED_STATE, &state->flags);
404 }
405 if (open_stateid != NULL)
406 nfs_set_open_stateid_locked(state, open_stateid, open_flags);
407 write_sequnlock(&state->seqlock);
339 spin_lock(&state->owner->so_lock); 408 spin_lock(&state->owner->so_lock);
340 spin_lock(&inode->i_lock);
341 memcpy(&state->stateid, stateid, sizeof(state->stateid));
342 update_open_stateflags(state, open_flags); 409 update_open_stateflags(state, open_flags);
343 nfs4_state_set_mode_locked(state, state->state | open_flags);
344 spin_unlock(&inode->i_lock);
345 spin_unlock(&state->owner->so_lock); 410 spin_unlock(&state->owner->so_lock);
346} 411}
347 412
413static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
414{
415 struct nfs_delegation *delegation;
416
417 rcu_read_lock();
418 delegation = rcu_dereference(NFS_I(inode)->delegation);
419 if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
420 rcu_read_unlock();
421 return;
422 }
423 rcu_read_unlock();
424 nfs_inode_return_delegation(inode);
425}
426
427static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
428{
429 struct nfs4_state *state = opendata->state;
430 struct nfs_inode *nfsi = NFS_I(state->inode);
431 struct nfs_delegation *delegation;
432 int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
433 nfs4_stateid stateid;
434 int ret = -EAGAIN;
435
436 rcu_read_lock();
437 delegation = rcu_dereference(nfsi->delegation);
438 for (;;) {
439 if (can_open_cached(state, open_mode)) {
440 spin_lock(&state->owner->so_lock);
441 if (can_open_cached(state, open_mode)) {
442 update_open_stateflags(state, open_mode);
443 spin_unlock(&state->owner->so_lock);
444 rcu_read_unlock();
445 goto out_return_state;
446 }
447 spin_unlock(&state->owner->so_lock);
448 }
449 if (delegation == NULL)
450 break;
451 if (!can_open_delegated(delegation, open_mode))
452 break;
453 /* Save the delegation */
454 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
455 rcu_read_unlock();
456 lock_kernel();
457 ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
458 unlock_kernel();
459 if (ret != 0)
460 goto out;
461 ret = -EAGAIN;
462 rcu_read_lock();
463 delegation = rcu_dereference(nfsi->delegation);
464 /* If no delegation, try a cached open */
465 if (delegation == NULL)
466 continue;
467 /* Is the delegation still valid? */
468 if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
469 continue;
470 rcu_read_unlock();
471 update_open_stateid(state, NULL, &stateid, open_mode);
472 goto out_return_state;
473 }
474 rcu_read_unlock();
475out:
476 return ERR_PTR(ret);
477out_return_state:
478 atomic_inc(&state->count);
479 return state;
480}
481
348static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) 482static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
349{ 483{
350 struct inode *inode; 484 struct inode *inode;
351 struct nfs4_state *state = NULL; 485 struct nfs4_state *state = NULL;
486 struct nfs_delegation *delegation;
487 nfs4_stateid *deleg_stateid = NULL;
488 int ret;
352 489
353 if (!(data->f_attr.valid & NFS_ATTR_FATTR)) 490 if (!data->rpc_done) {
491 state = nfs4_try_open_cached(data);
354 goto out; 492 goto out;
493 }
494
495 ret = -EAGAIN;
496 if (!(data->f_attr.valid & NFS_ATTR_FATTR))
497 goto err;
355 inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); 498 inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
499 ret = PTR_ERR(inode);
356 if (IS_ERR(inode)) 500 if (IS_ERR(inode))
357 goto out; 501 goto err;
502 ret = -ENOMEM;
358 state = nfs4_get_open_state(inode, data->owner); 503 state = nfs4_get_open_state(inode, data->owner);
359 if (state == NULL) 504 if (state == NULL)
360 goto put_inode; 505 goto err_put_inode;
361 update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags); 506 if (data->o_res.delegation_type != 0) {
362put_inode: 507 int delegation_flags = 0;
508
509 rcu_read_lock();
510 delegation = rcu_dereference(NFS_I(inode)->delegation);
511 if (delegation)
512 delegation_flags = delegation->flags;
513 rcu_read_unlock();
514 if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
515 nfs_inode_set_delegation(state->inode,
516 data->owner->so_cred,
517 &data->o_res);
518 else
519 nfs_inode_reclaim_delegation(state->inode,
520 data->owner->so_cred,
521 &data->o_res);
522 }
523 rcu_read_lock();
524 delegation = rcu_dereference(NFS_I(inode)->delegation);
525 if (delegation != NULL)
526 deleg_stateid = &delegation->stateid;
527 update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
528 rcu_read_unlock();
363 iput(inode); 529 iput(inode);
364out: 530out:
365 return state; 531 return state;
532err_put_inode:
533 iput(inode);
534err:
535 return ERR_PTR(ret);
366} 536}
367 537
368static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) 538static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
382 return ERR_PTR(-ENOENT); 552 return ERR_PTR(-ENOENT);
383} 553}
384 554
385static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid) 555static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
386{ 556{
557 struct nfs4_state *newstate;
387 int ret; 558 int ret;
388 559
389 opendata->o_arg.open_flags = openflags; 560 opendata->o_arg.open_flags = openflags;
561 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
562 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
563 nfs4_init_opendata_res(opendata);
390 ret = _nfs4_proc_open(opendata); 564 ret = _nfs4_proc_open(opendata);
391 if (ret != 0) 565 if (ret != 0)
392 return ret; 566 return ret;
393 memcpy(stateid->data, opendata->o_res.stateid.data, 567 newstate = nfs4_opendata_to_nfs4_state(opendata);
394 sizeof(stateid->data)); 568 if (IS_ERR(newstate))
569 return PTR_ERR(newstate);
570 nfs4_close_state(&opendata->path, newstate, openflags);
571 *res = newstate;
395 return 0; 572 return 0;
396} 573}
397 574
398static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) 575static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
399{ 576{
400 nfs4_stateid stateid;
401 struct nfs4_state *newstate; 577 struct nfs4_state *newstate;
402 int mode = 0;
403 int delegation = 0;
404 int ret; 578 int ret;
405 579
406 /* memory barrier prior to reading state->n_* */ 580 /* memory barrier prior to reading state->n_* */
581 clear_bit(NFS_DELEGATED_STATE, &state->flags);
407 smp_rmb(); 582 smp_rmb();
408 if (state->n_rdwr != 0) { 583 if (state->n_rdwr != 0) {
409 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid); 584 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
410 if (ret != 0) 585 if (ret != 0)
411 return ret; 586 return ret;
412 mode |= FMODE_READ|FMODE_WRITE; 587 if (newstate != state)
413 if (opendata->o_res.delegation_type != 0) 588 return -ESTALE;
414 delegation = opendata->o_res.delegation_type;
415 smp_rmb();
416 } 589 }
417 if (state->n_wronly != 0) { 590 if (state->n_wronly != 0) {
418 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid); 591 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
419 if (ret != 0) 592 if (ret != 0)
420 return ret; 593 return ret;
421 mode |= FMODE_WRITE; 594 if (newstate != state)
422 if (opendata->o_res.delegation_type != 0) 595 return -ESTALE;
423 delegation = opendata->o_res.delegation_type;
424 smp_rmb();
425 } 596 }
426 if (state->n_rdonly != 0) { 597 if (state->n_rdonly != 0) {
427 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid); 598 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
428 if (ret != 0) 599 if (ret != 0)
429 return ret; 600 return ret;
430 mode |= FMODE_READ; 601 if (newstate != state)
602 return -ESTALE;
431 } 603 }
432 clear_bit(NFS_DELEGATED_STATE, &state->flags); 604 /*
433 if (mode == 0) 605 * We may have performed cached opens for all three recoveries.
434 return 0; 606 * Check if we need to update the current stateid.
435 if (opendata->o_res.delegation_type == 0) 607 */
436 opendata->o_res.delegation_type = delegation; 608 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
437 opendata->o_arg.open_flags |= mode; 609 memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
438 newstate = nfs4_opendata_to_nfs4_state(opendata); 610 write_seqlock(&state->seqlock);
439 if (newstate != NULL) { 611 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
440 if (opendata->o_res.delegation_type != 0) { 612 memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
441 struct nfs_inode *nfsi = NFS_I(newstate->inode); 613 write_sequnlock(&state->seqlock);
442 int delegation_flags = 0;
443 if (nfsi->delegation)
444 delegation_flags = nfsi->delegation->flags;
445 if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
446 nfs_inode_set_delegation(newstate->inode,
447 opendata->owner->so_cred,
448 &opendata->o_res);
449 else
450 nfs_inode_reclaim_delegation(newstate->inode,
451 opendata->owner->so_cred,
452 &opendata->o_res);
453 }
454 nfs4_close_state(newstate, opendata->o_arg.open_flags);
455 } 614 }
456 if (newstate != state)
457 return -ESTALE;
458 return 0; 615 return 0;
459} 616}
460 617
@@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
462 * OPEN_RECLAIM: 619 * OPEN_RECLAIM:
463 * reclaim state on the server after a reboot. 620 * reclaim state on the server after a reboot.
464 */ 621 */
465static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 622static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
466{ 623{
467 struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; 624 struct nfs_delegation *delegation;
468 struct nfs4_opendata *opendata; 625 struct nfs4_opendata *opendata;
469 int delegation_type = 0; 626 int delegation_type = 0;
470 int status; 627 int status;
471 628
472 if (delegation != NULL) { 629 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
473 if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
474 memcpy(&state->stateid, &delegation->stateid,
475 sizeof(state->stateid));
476 set_bit(NFS_DELEGATED_STATE, &state->flags);
477 return 0;
478 }
479 delegation_type = delegation->type;
480 }
481 opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
482 if (opendata == NULL) 630 if (opendata == NULL)
483 return -ENOMEM; 631 return -ENOMEM;
484 opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; 632 opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
485 opendata->o_arg.fh = NFS_FH(state->inode); 633 opendata->o_arg.fh = NFS_FH(state->inode);
486 nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); 634 nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
635 rcu_read_lock();
636 delegation = rcu_dereference(NFS_I(state->inode)->delegation);
637 if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
638 delegation_type = delegation->flags;
639 rcu_read_unlock();
487 opendata->o_arg.u.delegation_type = delegation_type; 640 opendata->o_arg.u.delegation_type = delegation_type;
488 status = nfs4_open_recover(opendata, state); 641 status = nfs4_open_recover(opendata, state);
489 nfs4_opendata_free(opendata); 642 nfs4_opendata_put(opendata);
490 return status; 643 return status;
491} 644}
492 645
493static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 646static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
494{ 647{
495 struct nfs_server *server = NFS_SERVER(state->inode); 648 struct nfs_server *server = NFS_SERVER(state->inode);
496 struct nfs4_exception exception = { }; 649 struct nfs4_exception exception = { };
497 int err; 650 int err;
498 do { 651 do {
499 err = _nfs4_do_open_reclaim(sp, state, dentry); 652 err = _nfs4_do_open_reclaim(ctx, state);
500 if (err != -NFS4ERR_DELAY) 653 if (err != -NFS4ERR_DELAY)
501 break; 654 break;
502 nfs4_handle_exception(server, err, &exception); 655 nfs4_handle_exception(server, err, &exception);
@@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
512 ctx = nfs4_state_find_open_context(state); 665 ctx = nfs4_state_find_open_context(state);
513 if (IS_ERR(ctx)) 666 if (IS_ERR(ctx))
514 return PTR_ERR(ctx); 667 return PTR_ERR(ctx);
515 ret = nfs4_do_open_reclaim(sp, state, ctx->dentry); 668 ret = nfs4_do_open_reclaim(ctx, state);
516 put_nfs_open_context(ctx); 669 put_nfs_open_context(ctx);
517 return ret; 670 return ret;
518} 671}
519 672
520static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) 673static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
521{ 674{
522 struct nfs4_state_owner *sp = state->owner; 675 struct nfs4_state_owner *sp = state->owner;
523 struct nfs4_opendata *opendata; 676 struct nfs4_opendata *opendata;
524 int ret; 677 int ret;
525 678
526 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 679 opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
527 return 0;
528 opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
529 if (opendata == NULL) 680 if (opendata == NULL)
530 return -ENOMEM; 681 return -ENOMEM;
531 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; 682 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
532 memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, 683 memcpy(opendata->o_arg.u.delegation.data, stateid->data,
533 sizeof(opendata->o_arg.u.delegation.data)); 684 sizeof(opendata->o_arg.u.delegation.data));
534 ret = nfs4_open_recover(opendata, state); 685 ret = nfs4_open_recover(opendata, state);
535 nfs4_opendata_free(opendata); 686 nfs4_opendata_put(opendata);
536 return ret; 687 return ret;
537} 688}
538 689
539int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) 690int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
540{ 691{
541 struct nfs4_exception exception = { }; 692 struct nfs4_exception exception = { };
542 struct nfs_server *server = NFS_SERVER(dentry->d_inode); 693 struct nfs_server *server = NFS_SERVER(state->inode);
543 int err; 694 int err;
544 do { 695 do {
545 err = _nfs4_open_delegation_recall(dentry, state); 696 err = _nfs4_open_delegation_recall(ctx, state, stateid);
546 switch (err) { 697 switch (err) {
547 case 0: 698 case 0:
548 return err; 699 return err;
@@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
582 memcpy(data->o_res.stateid.data, data->c_res.stateid.data, 733 memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
583 sizeof(data->o_res.stateid.data)); 734 sizeof(data->o_res.stateid.data));
584 renew_lease(data->o_res.server, data->timestamp); 735 renew_lease(data->o_res.server, data->timestamp);
736 data->rpc_done = 1;
585 } 737 }
586 nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
587 nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); 738 nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
739 nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
588} 740}
589 741
590static void nfs4_open_confirm_release(void *calldata) 742static void nfs4_open_confirm_release(void *calldata)
@@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata)
596 if (data->cancelled == 0) 748 if (data->cancelled == 0)
597 goto out_free; 749 goto out_free;
598 /* In case of error, no cleanup! */ 750 /* In case of error, no cleanup! */
599 if (data->rpc_status != 0) 751 if (!data->rpc_done)
600 goto out_free; 752 goto out_free;
601 nfs_confirm_seqid(&data->owner->so_seqid, 0); 753 nfs_confirm_seqid(&data->owner->so_seqid, 0);
602 state = nfs4_opendata_to_nfs4_state(data); 754 state = nfs4_opendata_to_nfs4_state(data);
603 if (state != NULL) 755 if (!IS_ERR(state))
604 nfs4_close_state(state, data->o_arg.open_flags); 756 nfs4_close_state(&data->path, state, data->o_arg.open_flags);
605out_free: 757out_free:
606 nfs4_opendata_free(data); 758 nfs4_opendata_put(data);
607} 759}
608 760
609static const struct rpc_call_ops nfs4_open_confirm_ops = { 761static const struct rpc_call_ops nfs4_open_confirm_ops = {
@@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
621 struct rpc_task *task; 773 struct rpc_task *task;
622 int status; 774 int status;
623 775
624 atomic_inc(&data->count); 776 kref_get(&data->kref);
625 /* 777 data->rpc_done = 0;
626 * If rpc_run_task() ends up calling ->rpc_release(), we 778 data->rpc_status = 0;
627 * want to ensure that it takes the 'error' code path.
628 */
629 data->rpc_status = -ENOMEM;
630 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); 779 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
631 if (IS_ERR(task)) 780 if (IS_ERR(task))
632 return PTR_ERR(task); 781 return PTR_ERR(task);
@@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
653 802
654 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) 803 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
655 return; 804 return;
805 /*
806 * Check if we still need to send an OPEN call, or if we can use
807 * a delegation instead.
808 */
809 if (data->state != NULL) {
810 struct nfs_delegation *delegation;
811
812 if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
813 goto out_no_action;
814 rcu_read_lock();
815 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
816 if (delegation != NULL &&
817 (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) {
818 rcu_read_unlock();
819 goto out_no_action;
820 }
821 rcu_read_unlock();
822 }
656 /* Update sequence id. */ 823 /* Update sequence id. */
657 data->o_arg.id = sp->so_id; 824 data->o_arg.id = sp->so_owner_id.id;
658 data->o_arg.clientid = sp->so_client->cl_clientid; 825 data->o_arg.clientid = sp->so_client->cl_clientid;
659 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) 826 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
660 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; 827 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
661 data->timestamp = jiffies; 828 data->timestamp = jiffies;
662 rpc_call_setup(task, &msg, 0); 829 rpc_call_setup(task, &msg, 0);
830 return;
831out_no_action:
832 task->tk_action = NULL;
833
663} 834}
664 835
665static void nfs4_open_done(struct rpc_task *task, void *calldata) 836static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
683 data->rpc_status = -ENOTDIR; 854 data->rpc_status = -ENOTDIR;
684 } 855 }
685 renew_lease(data->o_res.server, data->timestamp); 856 renew_lease(data->o_res.server, data->timestamp);
857 if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
858 nfs_confirm_seqid(&data->owner->so_seqid, 0);
686 } 859 }
687 nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid); 860 nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
861 data->rpc_done = 1;
688} 862}
689 863
690static void nfs4_open_release(void *calldata) 864static void nfs4_open_release(void *calldata)
@@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata)
696 if (data->cancelled == 0) 870 if (data->cancelled == 0)
697 goto out_free; 871 goto out_free;
698 /* In case of error, no cleanup! */ 872 /* In case of error, no cleanup! */
699 if (data->rpc_status != 0) 873 if (data->rpc_status != 0 || !data->rpc_done)
700 goto out_free; 874 goto out_free;
701 /* In case we need an open_confirm, no cleanup! */ 875 /* In case we need an open_confirm, no cleanup! */
702 if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) 876 if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
703 goto out_free; 877 goto out_free;
704 nfs_confirm_seqid(&data->owner->so_seqid, 0); 878 nfs_confirm_seqid(&data->owner->so_seqid, 0);
705 state = nfs4_opendata_to_nfs4_state(data); 879 state = nfs4_opendata_to_nfs4_state(data);
706 if (state != NULL) 880 if (!IS_ERR(state))
707 nfs4_close_state(state, data->o_arg.open_flags); 881 nfs4_close_state(&data->path, state, data->o_arg.open_flags);
708out_free: 882out_free:
709 nfs4_opendata_free(data); 883 nfs4_opendata_put(data);
710} 884}
711 885
712static const struct rpc_call_ops nfs4_open_ops = { 886static const struct rpc_call_ops nfs4_open_ops = {
@@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
727 struct rpc_task *task; 901 struct rpc_task *task;
728 int status; 902 int status;
729 903
730 atomic_inc(&data->count); 904 kref_get(&data->kref);
731 /* 905 data->rpc_done = 0;
732 * If rpc_run_task() ends up calling ->rpc_release(), we 906 data->rpc_status = 0;
733 * want to ensure that it takes the 'error' code path. 907 data->cancelled = 0;
734 */
735 data->rpc_status = -ENOMEM;
736 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); 908 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
737 if (IS_ERR(task)) 909 if (IS_ERR(task))
738 return PTR_ERR(task); 910 return PTR_ERR(task);
@@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
743 } else 915 } else
744 status = data->rpc_status; 916 status = data->rpc_status;
745 rpc_put_task(task); 917 rpc_put_task(task);
746 if (status != 0) 918 if (status != 0 || !data->rpc_done)
747 return status; 919 return status;
748 920
749 if (o_arg->open_flags & O_CREAT) { 921 if (o_arg->open_flags & O_CREAT) {
@@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
756 if (status != 0) 928 if (status != 0)
757 return status; 929 return status;
758 } 930 }
759 nfs_confirm_seqid(&data->owner->so_seqid, 0);
760 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) 931 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
761 return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); 932 return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
762 return 0; 933 return 0;
@@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
772 mask |= MAY_READ; 943 mask |= MAY_READ;
773 if (openflags & FMODE_WRITE) 944 if (openflags & FMODE_WRITE)
774 mask |= MAY_WRITE; 945 mask |= MAY_WRITE;
946 if (openflags & FMODE_EXEC)
947 mask |= MAY_EXEC;
775 status = nfs_access_get_cached(inode, cred, &cache); 948 status = nfs_access_get_cached(inode, cred, &cache);
776 if (status == 0) 949 if (status == 0)
777 goto out; 950 goto out;
@@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
811 * reclaim state on the server after a network partition. 984 * reclaim state on the server after a network partition.
812 * Assumes caller holds the appropriate lock 985 * Assumes caller holds the appropriate lock
813 */ 986 */
814static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 987static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
815{ 988{
816 struct inode *inode = state->inode;
817 struct nfs_delegation *delegation = NFS_I(inode)->delegation;
818 struct nfs4_opendata *opendata; 989 struct nfs4_opendata *opendata;
819 int openflags = state->state & (FMODE_READ|FMODE_WRITE);
820 int ret; 990 int ret;
821 991
822 if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { 992 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
823 ret = _nfs4_do_access(inode, sp->so_cred, openflags);
824 if (ret < 0)
825 return ret;
826 memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
827 set_bit(NFS_DELEGATED_STATE, &state->flags);
828 return 0;
829 }
830 opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
831 if (opendata == NULL) 993 if (opendata == NULL)
832 return -ENOMEM; 994 return -ENOMEM;
833 ret = nfs4_open_recover(opendata, state); 995 ret = nfs4_open_recover(opendata, state);
834 if (ret == -ESTALE) { 996 if (ret == -ESTALE) {
835 /* Invalidate the state owner so we don't ever use it again */ 997 /* Invalidate the state owner so we don't ever use it again */
836 nfs4_drop_state_owner(sp); 998 nfs4_drop_state_owner(state->owner);
837 d_drop(dentry); 999 d_drop(ctx->path.dentry);
838 } 1000 }
839 nfs4_opendata_free(opendata); 1001 nfs4_opendata_put(opendata);
840 return ret; 1002 return ret;
841} 1003}
842 1004
843static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 1005static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
844{ 1006{
845 struct nfs_server *server = NFS_SERVER(dentry->d_inode); 1007 struct nfs_server *server = NFS_SERVER(state->inode);
846 struct nfs4_exception exception = { }; 1008 struct nfs4_exception exception = { };
847 int err; 1009 int err;
848 1010
849 do { 1011 do {
850 err = _nfs4_open_expired(sp, state, dentry); 1012 err = _nfs4_open_expired(ctx, state);
851 if (err == -NFS4ERR_DELAY) 1013 if (err == -NFS4ERR_DELAY)
852 nfs4_handle_exception(server, err, &exception); 1014 nfs4_handle_exception(server, err, &exception);
853 } while (exception.retry); 1015 } while (exception.retry);
@@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
862 ctx = nfs4_state_find_open_context(state); 1024 ctx = nfs4_state_find_open_context(state);
863 if (IS_ERR(ctx)) 1025 if (IS_ERR(ctx))
864 return PTR_ERR(ctx); 1026 return PTR_ERR(ctx);
865 ret = nfs4_do_open_expired(sp, state, ctx->dentry); 1027 ret = nfs4_do_open_expired(ctx, state);
866 put_nfs_open_context(ctx); 1028 put_nfs_open_context(ctx);
867 return ret; 1029 return ret;
868} 1030}
869 1031
870/* 1032/*
871 * Returns a referenced nfs4_state if there is an open delegation on the file 1033 * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
1034 * fields corresponding to attributes that were used to store the verifier.
1035 * Make sure we clobber those fields in the later setattr call
872 */ 1036 */
873static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) 1037static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
874{
875 struct nfs_delegation *delegation;
876 struct nfs_server *server = NFS_SERVER(inode);
877 struct nfs_client *clp = server->nfs_client;
878 struct nfs_inode *nfsi = NFS_I(inode);
879 struct nfs4_state_owner *sp = NULL;
880 struct nfs4_state *state = NULL;
881 int open_flags = flags & (FMODE_READ|FMODE_WRITE);
882 int err;
883
884 err = -ENOMEM;
885 if (!(sp = nfs4_get_state_owner(server, cred))) {
886 dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
887 return err;
888 }
889 err = nfs4_recover_expired_lease(server);
890 if (err != 0)
891 goto out_put_state_owner;
892 /* Protect against reboot recovery - NOTE ORDER! */
893 down_read(&clp->cl_sem);
894 /* Protect against delegation recall */
895 down_read(&nfsi->rwsem);
896 delegation = NFS_I(inode)->delegation;
897 err = -ENOENT;
898 if (delegation == NULL || (delegation->type & open_flags) != open_flags)
899 goto out_err;
900 err = -ENOMEM;
901 state = nfs4_get_open_state(inode, sp);
902 if (state == NULL)
903 goto out_err;
904
905 err = -ENOENT;
906 if ((state->state & open_flags) == open_flags) {
907 spin_lock(&inode->i_lock);
908 update_open_stateflags(state, open_flags);
909 spin_unlock(&inode->i_lock);
910 goto out_ok;
911 } else if (state->state != 0)
912 goto out_put_open_state;
913
914 lock_kernel();
915 err = _nfs4_do_access(inode, cred, open_flags);
916 unlock_kernel();
917 if (err != 0)
918 goto out_put_open_state;
919 set_bit(NFS_DELEGATED_STATE, &state->flags);
920 update_open_stateid(state, &delegation->stateid, open_flags);
921out_ok:
922 nfs4_put_state_owner(sp);
923 up_read(&nfsi->rwsem);
924 up_read(&clp->cl_sem);
925 *res = state;
926 return 0;
927out_put_open_state:
928 nfs4_put_open_state(state);
929out_err:
930 up_read(&nfsi->rwsem);
931 up_read(&clp->cl_sem);
932 if (err != -EACCES)
933 nfs_inode_return_delegation(inode);
934out_put_state_owner:
935 nfs4_put_state_owner(sp);
936 return err;
937}
938
939static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
940{ 1038{
941 struct nfs4_exception exception = { }; 1039 if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
942 struct nfs4_state *res = ERR_PTR(-EIO); 1040 !(sattr->ia_valid & ATTR_ATIME_SET))
943 int err; 1041 sattr->ia_valid |= ATTR_ATIME;
944 1042
945 do { 1043 if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
946 err = _nfs4_open_delegated(inode, flags, cred, &res); 1044 !(sattr->ia_valid & ATTR_MTIME_SET))
947 if (err == 0) 1045 sattr->ia_valid |= ATTR_MTIME;
948 break;
949 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
950 err, &exception));
951 } while (exception.retry);
952 return res;
953} 1046}
954 1047
955/* 1048/*
956 * Returns a referenced nfs4_state 1049 * Returns a referenced nfs4_state
957 */ 1050 */
958static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1051static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
959{ 1052{
960 struct nfs4_state_owner *sp; 1053 struct nfs4_state_owner *sp;
961 struct nfs4_state *state = NULL; 1054 struct nfs4_state *state = NULL;
962 struct nfs_server *server = NFS_SERVER(dir); 1055 struct nfs_server *server = NFS_SERVER(dir);
963 struct nfs_client *clp = server->nfs_client; 1056 struct nfs_client *clp = server->nfs_client;
964 struct nfs4_opendata *opendata; 1057 struct nfs4_opendata *opendata;
965 int status; 1058 int status;
966 1059
967 /* Protect against reboot recovery conflicts */ 1060 /* Protect against reboot recovery conflicts */
968 status = -ENOMEM; 1061 status = -ENOMEM;
@@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
973 status = nfs4_recover_expired_lease(server); 1066 status = nfs4_recover_expired_lease(server);
974 if (status != 0) 1067 if (status != 0)
975 goto err_put_state_owner; 1068 goto err_put_state_owner;
1069 if (path->dentry->d_inode != NULL)
1070 nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
976 down_read(&clp->cl_sem); 1071 down_read(&clp->cl_sem);
977 status = -ENOMEM; 1072 status = -ENOMEM;
978 opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); 1073 opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
979 if (opendata == NULL) 1074 if (opendata == NULL)
980 goto err_release_rwsem; 1075 goto err_release_rwsem;
981 1076
1077 if (path->dentry->d_inode != NULL)
1078 opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
1079
982 status = _nfs4_proc_open(opendata); 1080 status = _nfs4_proc_open(opendata);
983 if (status != 0) 1081 if (status != 0)
984 goto err_opendata_free; 1082 goto err_opendata_put;
1083
1084 if (opendata->o_arg.open_flags & O_EXCL)
1085 nfs4_exclusive_attrset(opendata, sattr);
985 1086
986 status = -ENOMEM;
987 state = nfs4_opendata_to_nfs4_state(opendata); 1087 state = nfs4_opendata_to_nfs4_state(opendata);
988 if (state == NULL) 1088 status = PTR_ERR(state);
989 goto err_opendata_free; 1089 if (IS_ERR(state))
990 if (opendata->o_res.delegation_type != 0) 1090 goto err_opendata_put;
991 nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); 1091 nfs4_opendata_put(opendata);
992 nfs4_opendata_free(opendata);
993 nfs4_put_state_owner(sp); 1092 nfs4_put_state_owner(sp);
994 up_read(&clp->cl_sem); 1093 up_read(&clp->cl_sem);
995 *res = state; 1094 *res = state;
996 return 0; 1095 return 0;
997err_opendata_free: 1096err_opendata_put:
998 nfs4_opendata_free(opendata); 1097 nfs4_opendata_put(opendata);
999err_release_rwsem: 1098err_release_rwsem:
1000 up_read(&clp->cl_sem); 1099 up_read(&clp->cl_sem);
1001err_put_state_owner: 1100err_put_state_owner:
@@ -1006,14 +1105,14 @@ out_err:
1006} 1105}
1007 1106
1008 1107
1009static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) 1108static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
1010{ 1109{
1011 struct nfs4_exception exception = { }; 1110 struct nfs4_exception exception = { };
1012 struct nfs4_state *res; 1111 struct nfs4_state *res;
1013 int status; 1112 int status;
1014 1113
1015 do { 1114 do {
1016 status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); 1115 status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
1017 if (status == 0) 1116 if (status == 0)
1018 break; 1117 break;
1019 /* NOTE: BAD_SEQID means the server and client disagree about the 1118 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
1028 * the user though... 1127 * the user though...
1029 */ 1128 */
1030 if (status == -NFS4ERR_BAD_SEQID) { 1129 if (status == -NFS4ERR_BAD_SEQID) {
1031 printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); 1130 printk(KERN_WARNING "NFS: v4 server %s "
1131 " returned a bad sequence-id error!\n",
1132 NFS_SERVER(dir)->nfs_client->cl_hostname);
1032 exception.retry = 1; 1133 exception.retry = 1;
1033 continue; 1134 continue;
1034 } 1135 }
@@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
1042 exception.retry = 1; 1143 exception.retry = 1;
1043 continue; 1144 continue;
1044 } 1145 }
1146 if (status == -EAGAIN) {
1147 /* We must have found a delegation */
1148 exception.retry = 1;
1149 continue;
1150 }
1045 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), 1151 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
1046 status, &exception)); 1152 status, &exception));
1047 } while (exception.retry); 1153 } while (exception.retry);
@@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
1101} 1207}
1102 1208
1103struct nfs4_closedata { 1209struct nfs4_closedata {
1210 struct path path;
1104 struct inode *inode; 1211 struct inode *inode;
1105 struct nfs4_state *state; 1212 struct nfs4_state *state;
1106 struct nfs_closeargs arg; 1213 struct nfs_closeargs arg;
@@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data)
1117 nfs4_put_open_state(calldata->state); 1224 nfs4_put_open_state(calldata->state);
1118 nfs_free_seqid(calldata->arg.seqid); 1225 nfs_free_seqid(calldata->arg.seqid);
1119 nfs4_put_state_owner(sp); 1226 nfs4_put_state_owner(sp);
1227 dput(calldata->path.dentry);
1228 mntput(calldata->path.mnt);
1120 kfree(calldata); 1229 kfree(calldata);
1121} 1230}
1122 1231
@@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1134 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); 1243 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
1135 switch (task->tk_status) { 1244 switch (task->tk_status) {
1136 case 0: 1245 case 0:
1137 memcpy(&state->stateid, &calldata->res.stateid, 1246 nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
1138 sizeof(state->stateid));
1139 renew_lease(server, calldata->timestamp); 1247 renew_lease(server, calldata->timestamp);
1140 break; 1248 break;
1141 case -NFS4ERR_STALE_STATEID: 1249 case -NFS4ERR_STALE_STATEID:
@@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1160 .rpc_resp = &calldata->res, 1268 .rpc_resp = &calldata->res,
1161 .rpc_cred = state->owner->so_cred, 1269 .rpc_cred = state->owner->so_cred,
1162 }; 1270 };
1163 int mode = 0, old_mode; 1271 int clear_rd, clear_wr, clear_rdwr;
1272 int mode;
1164 1273
1165 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 1274 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
1166 return; 1275 return;
1167 /* Recalculate the new open mode in case someone reopened the file 1276
1168 * while we were waiting in line to be scheduled. 1277 mode = FMODE_READ|FMODE_WRITE;
1169 */ 1278 clear_rd = clear_wr = clear_rdwr = 0;
1170 spin_lock(&state->owner->so_lock); 1279 spin_lock(&state->owner->so_lock);
1171 spin_lock(&calldata->inode->i_lock); 1280 /* Calculate the change in open mode */
1172 mode = old_mode = state->state;
1173 if (state->n_rdwr == 0) { 1281 if (state->n_rdwr == 0) {
1174 if (state->n_rdonly == 0) 1282 if (state->n_rdonly == 0) {
1175 mode &= ~FMODE_READ; 1283 mode &= ~FMODE_READ;
1176 if (state->n_wronly == 0) 1284 clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1285 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
1286 }
1287 if (state->n_wronly == 0) {
1177 mode &= ~FMODE_WRITE; 1288 mode &= ~FMODE_WRITE;
1289 clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1290 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
1291 }
1178 } 1292 }
1179 nfs4_state_set_mode_locked(state, mode);
1180 spin_unlock(&calldata->inode->i_lock);
1181 spin_unlock(&state->owner->so_lock); 1293 spin_unlock(&state->owner->so_lock);
1182 if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) { 1294 if (!clear_rd && !clear_wr && !clear_rdwr) {
1183 /* Note: exit _without_ calling nfs4_close_done */ 1295 /* Note: exit _without_ calling nfs4_close_done */
1184 task->tk_action = NULL; 1296 task->tk_action = NULL;
1185 return; 1297 return;
@@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = {
1209 * 1321 *
1210 * NOTE: Caller must be holding the sp->so_owner semaphore! 1322 * NOTE: Caller must be holding the sp->so_owner semaphore!
1211 */ 1323 */
1212int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 1324int nfs4_do_close(struct path *path, struct nfs4_state *state)
1213{ 1325{
1214 struct nfs_server *server = NFS_SERVER(inode); 1326 struct nfs_server *server = NFS_SERVER(state->inode);
1215 struct nfs4_closedata *calldata; 1327 struct nfs4_closedata *calldata;
1328 struct nfs4_state_owner *sp = state->owner;
1329 struct rpc_task *task;
1216 int status = -ENOMEM; 1330 int status = -ENOMEM;
1217 1331
1218 calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); 1332 calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
1219 if (calldata == NULL) 1333 if (calldata == NULL)
1220 goto out; 1334 goto out;
1221 calldata->inode = inode; 1335 calldata->inode = state->inode;
1222 calldata->state = state; 1336 calldata->state = state;
1223 calldata->arg.fh = NFS_FH(inode); 1337 calldata->arg.fh = NFS_FH(state->inode);
1224 calldata->arg.stateid = &state->stateid; 1338 calldata->arg.stateid = &state->open_stateid;
1225 /* Serialization for the sequence id */ 1339 /* Serialization for the sequence id */
1226 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1340 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1227 if (calldata->arg.seqid == NULL) 1341 if (calldata->arg.seqid == NULL)
@@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
1229 calldata->arg.bitmask = server->attr_bitmask; 1343 calldata->arg.bitmask = server->attr_bitmask;
1230 calldata->res.fattr = &calldata->fattr; 1344 calldata->res.fattr = &calldata->fattr;
1231 calldata->res.server = server; 1345 calldata->res.server = server;
1346 calldata->path.mnt = mntget(path->mnt);
1347 calldata->path.dentry = dget(path->dentry);
1232 1348
1233 status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); 1349 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
1234 if (status == 0) 1350 if (IS_ERR(task))
1235 goto out; 1351 return PTR_ERR(task);
1236 1352 rpc_put_task(task);
1237 nfs_free_seqid(calldata->arg.seqid); 1353 return 0;
1238out_free_calldata: 1354out_free_calldata:
1239 kfree(calldata); 1355 kfree(calldata);
1240out: 1356out:
1357 nfs4_put_open_state(state);
1358 nfs4_put_state_owner(sp);
1241 return status; 1359 return status;
1242} 1360}
1243 1361
1244static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) 1362static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
1245{ 1363{
1246 struct file *filp; 1364 struct file *filp;
1365 int ret;
1247 1366
1248 filp = lookup_instantiate_filp(nd, dentry, NULL); 1367 /* If the open_intent is for execute, we have an extra check to make */
1368 if (nd->intent.open.flags & FMODE_EXEC) {
1369 ret = _nfs4_do_access(state->inode,
1370 state->owner->so_cred,
1371 nd->intent.open.flags);
1372 if (ret < 0)
1373 goto out_close;
1374 }
1375 filp = lookup_instantiate_filp(nd, path->dentry, NULL);
1249 if (!IS_ERR(filp)) { 1376 if (!IS_ERR(filp)) {
1250 struct nfs_open_context *ctx; 1377 struct nfs_open_context *ctx;
1251 ctx = (struct nfs_open_context *)filp->private_data; 1378 ctx = (struct nfs_open_context *)filp->private_data;
1252 ctx->state = state; 1379 ctx->state = state;
1253 return 0; 1380 return 0;
1254 } 1381 }
1255 nfs4_close_state(state, nd->intent.open.flags); 1382 ret = PTR_ERR(filp);
1256 return PTR_ERR(filp); 1383out_close:
1384 nfs4_close_state(path, state, nd->intent.open.flags);
1385 return ret;
1257} 1386}
1258 1387
1259struct dentry * 1388struct dentry *
1260nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1389nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1261{ 1390{
1391 struct path path = {
1392 .mnt = nd->mnt,
1393 .dentry = dentry,
1394 };
1262 struct iattr attr; 1395 struct iattr attr;
1263 struct rpc_cred *cred; 1396 struct rpc_cred *cred;
1264 struct nfs4_state *state; 1397 struct nfs4_state *state;
@@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1277 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 1410 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1278 if (IS_ERR(cred)) 1411 if (IS_ERR(cred))
1279 return (struct dentry *)cred; 1412 return (struct dentry *)cred;
1280 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); 1413 state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
1281 put_rpccred(cred); 1414 put_rpccred(cred);
1282 if (IS_ERR(state)) { 1415 if (IS_ERR(state)) {
1283 if (PTR_ERR(state) == -ENOENT) 1416 if (PTR_ERR(state) == -ENOENT)
@@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1287 res = d_add_unique(dentry, igrab(state->inode)); 1420 res = d_add_unique(dentry, igrab(state->inode));
1288 if (res != NULL) 1421 if (res != NULL)
1289 dentry = res; 1422 dentry = res;
1290 nfs4_intent_set_file(nd, dentry, state); 1423 nfs4_intent_set_file(nd, &path, state);
1291 return res; 1424 return res;
1292} 1425}
1293 1426
1294int 1427int
1295nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) 1428nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
1296{ 1429{
1430 struct path path = {
1431 .mnt = nd->mnt,
1432 .dentry = dentry,
1433 };
1297 struct rpc_cred *cred; 1434 struct rpc_cred *cred;
1298 struct nfs4_state *state; 1435 struct nfs4_state *state;
1299 1436
1300 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 1437 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1301 if (IS_ERR(cred)) 1438 if (IS_ERR(cred))
1302 return PTR_ERR(cred); 1439 return PTR_ERR(cred);
1303 state = nfs4_open_delegated(dentry->d_inode, openflags, cred); 1440 state = nfs4_do_open(dir, &path, openflags, NULL, cred);
1304 if (IS_ERR(state))
1305 state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
1306 put_rpccred(cred); 1441 put_rpccred(cred);
1307 if (IS_ERR(state)) { 1442 if (IS_ERR(state)) {
1308 switch (PTR_ERR(state)) { 1443 switch (PTR_ERR(state)) {
@@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1318 } 1453 }
1319 } 1454 }
1320 if (state->inode == dentry->d_inode) { 1455 if (state->inode == dentry->d_inode) {
1321 nfs4_intent_set_file(nd, dentry, state); 1456 nfs4_intent_set_file(nd, &path, state);
1322 return 1; 1457 return 1;
1323 } 1458 }
1324 nfs4_close_state(state, openflags); 1459 nfs4_close_state(&path, state, openflags);
1325out_drop: 1460out_drop:
1326 d_drop(dentry); 1461 d_drop(dentry);
1327 return 0; 1462 return 0;
@@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1559 dprintk("NFS call lookupfh %s\n", name->name); 1694 dprintk("NFS call lookupfh %s\n", name->name);
1560 status = rpc_call_sync(server->client, &msg, 0); 1695 status = rpc_call_sync(server->client, &msg, 0);
1561 dprintk("NFS reply lookupfh: %d\n", status); 1696 dprintk("NFS reply lookupfh: %d\n", status);
1562 if (status == -NFS4ERR_MOVED)
1563 status = -EREMOTE;
1564 return status; 1697 return status;
1565} 1698}
1566 1699
@@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1571 struct nfs4_exception exception = { }; 1704 struct nfs4_exception exception = { };
1572 int err; 1705 int err;
1573 do { 1706 do {
1574 err = nfs4_handle_exception(server, 1707 err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
1575 _nfs4_proc_lookupfh(server, dirfh, name, 1708 /* FIXME: !!!! */
1576 fhandle, fattr), 1709 if (err == -NFS4ERR_MOVED) {
1577 &exception); 1710 err = -EREMOTE;
1711 break;
1712 }
1713 err = nfs4_handle_exception(server, err, &exception);
1578 } while (exception.retry); 1714 } while (exception.retry);
1579 return err; 1715 return err;
1580} 1716}
@@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1582static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, 1718static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
1583 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 1719 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1584{ 1720{
1585 int status; 1721 int status;
1586 struct nfs_server *server = NFS_SERVER(dir);
1587 struct nfs4_lookup_arg args = {
1588 .bitmask = server->attr_bitmask,
1589 .dir_fh = NFS_FH(dir),
1590 .name = name,
1591 };
1592 struct nfs4_lookup_res res = {
1593 .server = server,
1594 .fattr = fattr,
1595 .fh = fhandle,
1596 };
1597 struct rpc_message msg = {
1598 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
1599 .rpc_argp = &args,
1600 .rpc_resp = &res,
1601 };
1602
1603 nfs_fattr_init(fattr);
1604 1722
1605 dprintk("NFS call lookup %s\n", name->name); 1723 dprintk("NFS call lookup %s\n", name->name);
1606 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 1724 status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
1607 if (status == -NFS4ERR_MOVED) 1725 if (status == -NFS4ERR_MOVED)
1608 status = nfs4_get_referral(dir, name, fattr, fhandle); 1726 status = nfs4_get_referral(dir, name, fattr, fhandle);
1609 dprintk("NFS reply lookup: %d\n", status); 1727 dprintk("NFS reply lookup: %d\n", status);
@@ -1752,6 +1870,10 @@ static int
1752nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 1870nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1753 int flags, struct nameidata *nd) 1871 int flags, struct nameidata *nd)
1754{ 1872{
1873 struct path path = {
1874 .mnt = nd->mnt,
1875 .dentry = dentry,
1876 };
1755 struct nfs4_state *state; 1877 struct nfs4_state *state;
1756 struct rpc_cred *cred; 1878 struct rpc_cred *cred;
1757 int status = 0; 1879 int status = 0;
@@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1761 status = PTR_ERR(cred); 1883 status = PTR_ERR(cred);
1762 goto out; 1884 goto out;
1763 } 1885 }
1764 state = nfs4_do_open(dir, dentry, flags, sattr, cred); 1886 state = nfs4_do_open(dir, &path, flags, sattr, cred);
1765 put_rpccred(cred); 1887 put_rpccred(cred);
1766 if (IS_ERR(state)) { 1888 if (IS_ERR(state)) {
1767 status = PTR_ERR(state); 1889 status = PTR_ERR(state);
@@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1773 status = nfs4_do_setattr(state->inode, &fattr, sattr, state); 1895 status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
1774 if (status == 0) 1896 if (status == 0)
1775 nfs_setattr_update_inode(state->inode, sattr); 1897 nfs_setattr_update_inode(state->inode, sattr);
1898 nfs_post_op_update_inode(state->inode, &fattr);
1776 } 1899 }
1777 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) 1900 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
1778 status = nfs4_intent_set_file(nd, dentry, state); 1901 status = nfs4_intent_set_file(nd, &path, state);
1779 else 1902 else
1780 nfs4_close_state(state, flags); 1903 nfs4_close_state(&path, state, flags);
1781out: 1904out:
1782 return status; 1905 return status;
1783} 1906}
@@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3008 if (status != 0) 3131 if (status != 0)
3009 goto out; 3132 goto out;
3010 lsp = request->fl_u.nfs4_fl.owner; 3133 lsp = request->fl_u.nfs4_fl.owner;
3011 arg.lock_owner.id = lsp->ls_id; 3134 arg.lock_owner.id = lsp->ls_id.id;
3012 status = rpc_call_sync(server->client, &msg, 0); 3135 status = rpc_call_sync(server->client, &msg, 0);
3013 switch (status) { 3136 switch (status) {
3014 case 0: 3137 case 0:
@@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3152{ 3275{
3153 struct nfs4_unlockdata *data; 3276 struct nfs4_unlockdata *data;
3154 3277
3278 /* Ensure this is an unlock - when canceling a lock, the
3279 * canceled lock is passed in, and it won't be an unlock.
3280 */
3281 fl->fl_type = F_UNLCK;
3282
3155 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); 3283 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
3156 if (data == NULL) { 3284 if (data == NULL) {
3157 nfs_free_seqid(seqid); 3285 nfs_free_seqid(seqid);
@@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3222 goto out_free; 3350 goto out_free;
3223 p->arg.lock_stateid = &lsp->ls_stateid; 3351 p->arg.lock_stateid = &lsp->ls_stateid;
3224 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 3352 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
3225 p->arg.lock_owner.id = lsp->ls_id; 3353 p->arg.lock_owner.id = lsp->ls_id.id;
3226 p->lsp = lsp; 3354 p->lsp = lsp;
3227 atomic_inc(&lsp->ls_count); 3355 atomic_inc(&lsp->ls_count);
3228 p->ctx = get_nfs_open_context(ctx); 3356 p->ctx = get_nfs_open_context(ctx);
@@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3285 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, 3413 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
3286 sizeof(data->lsp->ls_stateid.data)); 3414 sizeof(data->lsp->ls_stateid.data));
3287 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; 3415 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
3288 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); 3416 renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
3289 } 3417 }
3290 nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); 3418 nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
3291out: 3419out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8ed79d5c54f9..e9662ba81d86 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -38,12 +38,14 @@
38 * subsequent patch. 38 * subsequent patch.
39 */ 39 */
40 40
41#include <linux/kernel.h>
41#include <linux/slab.h> 42#include <linux/slab.h>
42#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
43#include <linux/nfs_fs.h> 44#include <linux/nfs_fs.h>
44#include <linux/nfs_idmap.h> 45#include <linux/nfs_idmap.h>
45#include <linux/kthread.h> 46#include <linux/kthread.h>
46#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/random.h>
47#include <linux/workqueue.h> 49#include <linux/workqueue.h>
48#include <linux/bitops.h> 50#include <linux/bitops.h>
49 51
@@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
69 return status; 71 return status;
70} 72}
71 73
72u32
73nfs4_alloc_lockowner_id(struct nfs_client *clp)
74{
75 return clp->cl_lockowner_id ++;
76}
77
78static struct nfs4_state_owner *
79nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
80{
81 struct nfs4_state_owner *sp = NULL;
82
83 if (!list_empty(&clp->cl_unused)) {
84 sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
85 atomic_inc(&sp->so_count);
86 sp->so_cred = cred;
87 list_move(&sp->so_list, &clp->cl_state_owners);
88 clp->cl_nunused--;
89 }
90 return sp;
91}
92
93struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) 74struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
94{ 75{
95 struct nfs4_state_owner *sp; 76 struct nfs4_state_owner *sp;
77 struct rb_node *pos;
96 struct rpc_cred *cred = NULL; 78 struct rpc_cred *cred = NULL;
97 79
98 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 80 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
81 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
99 if (list_empty(&sp->so_states)) 82 if (list_empty(&sp->so_states))
100 continue; 83 continue;
101 cred = get_rpccred(sp->so_cred); 84 cred = get_rpccred(sp->so_cred);
@@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
107static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) 90static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
108{ 91{
109 struct nfs4_state_owner *sp; 92 struct nfs4_state_owner *sp;
93 struct rb_node *pos;
110 94
111 if (!list_empty(&clp->cl_state_owners)) { 95 pos = rb_first(&clp->cl_state_owners);
112 sp = list_entry(clp->cl_state_owners.next, 96 if (pos != NULL) {
113 struct nfs4_state_owner, so_list); 97 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
114 return get_rpccred(sp->so_cred); 98 return get_rpccred(sp->so_cred);
115 } 99 }
116 return NULL; 100 return NULL;
117} 101}
118 102
103static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
104 __u64 minval, int maxbits)
105{
106 struct rb_node **p, *parent;
107 struct nfs_unique_id *pos;
108 __u64 mask = ~0ULL;
109
110 if (maxbits < 64)
111 mask = (1ULL << maxbits) - 1ULL;
112
113 /* Ensure distribution is more or less flat */
114 get_random_bytes(&new->id, sizeof(new->id));
115 new->id &= mask;
116 if (new->id < minval)
117 new->id += minval;
118retry:
119 p = &root->rb_node;
120 parent = NULL;
121
122 while (*p != NULL) {
123 parent = *p;
124 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
125
126 if (new->id < pos->id)
127 p = &(*p)->rb_left;
128 else if (new->id > pos->id)
129 p = &(*p)->rb_right;
130 else
131 goto id_exists;
132 }
133 rb_link_node(&new->rb_node, parent, p);
134 rb_insert_color(&new->rb_node, root);
135 return;
136id_exists:
137 for (;;) {
138 new->id++;
139 if (new->id < minval || (new->id & mask) != new->id) {
140 new->id = minval;
141 break;
142 }
143 parent = rb_next(parent);
144 if (parent == NULL)
145 break;
146 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
147 if (new->id < pos->id)
148 break;
149 }
150 goto retry;
151}
152
153static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
154{
155 rb_erase(&id->rb_node, root);
156}
157
119static struct nfs4_state_owner * 158static struct nfs4_state_owner *
120nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) 159nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
121{ 160{
161 struct nfs_client *clp = server->nfs_client;
162 struct rb_node **p = &clp->cl_state_owners.rb_node,
163 *parent = NULL;
122 struct nfs4_state_owner *sp, *res = NULL; 164 struct nfs4_state_owner *sp, *res = NULL;
123 165
124 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 166 while (*p != NULL) {
125 if (sp->so_cred != cred) 167 parent = *p;
168 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
169
170 if (server < sp->so_server) {
171 p = &parent->rb_left;
126 continue; 172 continue;
127 atomic_inc(&sp->so_count); 173 }
128 /* Move to the head of the list */ 174 if (server > sp->so_server) {
129 list_move(&sp->so_list, &clp->cl_state_owners); 175 p = &parent->rb_right;
130 res = sp; 176 continue;
131 break; 177 }
178 if (cred < sp->so_cred)
179 p = &parent->rb_left;
180 else if (cred > sp->so_cred)
181 p = &parent->rb_right;
182 else {
183 atomic_inc(&sp->so_count);
184 res = sp;
185 break;
186 }
132 } 187 }
133 return res; 188 return res;
134} 189}
135 190
191static struct nfs4_state_owner *
192nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
193{
194 struct rb_node **p = &clp->cl_state_owners.rb_node,
195 *parent = NULL;
196 struct nfs4_state_owner *sp;
197
198 while (*p != NULL) {
199 parent = *p;
200 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
201
202 if (new->so_server < sp->so_server) {
203 p = &parent->rb_left;
204 continue;
205 }
206 if (new->so_server > sp->so_server) {
207 p = &parent->rb_right;
208 continue;
209 }
210 if (new->so_cred < sp->so_cred)
211 p = &parent->rb_left;
212 else if (new->so_cred > sp->so_cred)
213 p = &parent->rb_right;
214 else {
215 atomic_inc(&sp->so_count);
216 return sp;
217 }
218 }
219 nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
220 rb_link_node(&new->so_client_node, parent, p);
221 rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
222 return new;
223}
224
225static void
226nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
227{
228 if (!RB_EMPTY_NODE(&sp->so_client_node))
229 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
230 nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
231}
232
136/* 233/*
137 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to 234 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
138 * create a new state_owner. 235 * create a new state_owner.
@@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void)
160void 257void
161nfs4_drop_state_owner(struct nfs4_state_owner *sp) 258nfs4_drop_state_owner(struct nfs4_state_owner *sp)
162{ 259{
163 struct nfs_client *clp = sp->so_client; 260 if (!RB_EMPTY_NODE(&sp->so_client_node)) {
164 spin_lock(&clp->cl_lock); 261 struct nfs_client *clp = sp->so_client;
165 list_del_init(&sp->so_list); 262
166 spin_unlock(&clp->cl_lock); 263 spin_lock(&clp->cl_lock);
264 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
265 RB_CLEAR_NODE(&sp->so_client_node);
266 spin_unlock(&clp->cl_lock);
267 }
167} 268}
168 269
169/* 270/*
@@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
175 struct nfs_client *clp = server->nfs_client; 276 struct nfs_client *clp = server->nfs_client;
176 struct nfs4_state_owner *sp, *new; 277 struct nfs4_state_owner *sp, *new;
177 278
178 get_rpccred(cred);
179 new = nfs4_alloc_state_owner();
180 spin_lock(&clp->cl_lock); 279 spin_lock(&clp->cl_lock);
181 sp = nfs4_find_state_owner(clp, cred); 280 sp = nfs4_find_state_owner(server, cred);
182 if (sp == NULL)
183 sp = nfs4_client_grab_unused(clp, cred);
184 if (sp == NULL && new != NULL) {
185 list_add(&new->so_list, &clp->cl_state_owners);
186 new->so_client = clp;
187 new->so_id = nfs4_alloc_lockowner_id(clp);
188 new->so_cred = cred;
189 sp = new;
190 new = NULL;
191 }
192 spin_unlock(&clp->cl_lock); 281 spin_unlock(&clp->cl_lock);
193 kfree(new);
194 if (sp != NULL) 282 if (sp != NULL)
195 return sp; 283 return sp;
196 put_rpccred(cred); 284 new = nfs4_alloc_state_owner();
197 return NULL; 285 if (new == NULL)
286 return NULL;
287 new->so_client = clp;
288 new->so_server = server;
289 new->so_cred = cred;
290 spin_lock(&clp->cl_lock);
291 sp = nfs4_insert_state_owner(clp, new);
292 spin_unlock(&clp->cl_lock);
293 if (sp == new)
294 get_rpccred(cred);
295 else
296 kfree(new);
297 return sp;
198} 298}
199 299
200/* 300/*
@@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
208 308
209 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 309 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
210 return; 310 return;
211 if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) 311 nfs4_remove_state_owner(clp, sp);
212 goto out_free;
213 if (list_empty(&sp->so_list))
214 goto out_free;
215 list_move(&sp->so_list, &clp->cl_unused);
216 clp->cl_nunused++;
217 spin_unlock(&clp->cl_lock);
218 put_rpccred(cred);
219 cred = NULL;
220 return;
221out_free:
222 list_del(&sp->so_list);
223 spin_unlock(&clp->cl_lock); 312 spin_unlock(&clp->cl_lock);
224 put_rpccred(cred); 313 put_rpccred(cred);
225 kfree(sp); 314 kfree(sp);
@@ -236,6 +325,7 @@ nfs4_alloc_open_state(void)
236 atomic_set(&state->count, 1); 325 atomic_set(&state->count, 1);
237 INIT_LIST_HEAD(&state->lock_states); 326 INIT_LIST_HEAD(&state->lock_states);
238 spin_lock_init(&state->state_lock); 327 spin_lock_init(&state->state_lock);
328 seqlock_init(&state->seqlock);
239 return state; 329 return state;
240} 330}
241 331
@@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
263 struct nfs4_state *state; 353 struct nfs4_state *state;
264 354
265 list_for_each_entry(state, &nfsi->open_states, inode_states) { 355 list_for_each_entry(state, &nfsi->open_states, inode_states) {
266 /* Is this in the process of being freed? */ 356 if (state->owner != owner)
267 if (state->state == 0)
268 continue; 357 continue;
269 if (state->owner == owner) { 358 if (atomic_inc_not_zero(&state->count))
270 atomic_inc(&state->count);
271 return state; 359 return state;
272 }
273 } 360 }
274 return NULL; 361 return NULL;
275} 362}
@@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
341/* 428/*
342 * Close the current file. 429 * Close the current file.
343 */ 430 */
344void nfs4_close_state(struct nfs4_state *state, mode_t mode) 431void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
345{ 432{
346 struct inode *inode = state->inode;
347 struct nfs4_state_owner *owner = state->owner; 433 struct nfs4_state_owner *owner = state->owner;
348 int oldstate, newstate = 0; 434 int call_close = 0;
435 int newstate;
349 436
350 atomic_inc(&owner->so_count); 437 atomic_inc(&owner->so_count);
351 /* Protect against nfs4_find_state() */ 438 /* Protect against nfs4_find_state() */
352 spin_lock(&owner->so_lock); 439 spin_lock(&owner->so_lock);
353 spin_lock(&inode->i_lock);
354 switch (mode & (FMODE_READ | FMODE_WRITE)) { 440 switch (mode & (FMODE_READ | FMODE_WRITE)) {
355 case FMODE_READ: 441 case FMODE_READ:
356 state->n_rdonly--; 442 state->n_rdonly--;
@@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
361 case FMODE_READ|FMODE_WRITE: 447 case FMODE_READ|FMODE_WRITE:
362 state->n_rdwr--; 448 state->n_rdwr--;
363 } 449 }
364 oldstate = newstate = state->state; 450 newstate = FMODE_READ|FMODE_WRITE;
365 if (state->n_rdwr == 0) { 451 if (state->n_rdwr == 0) {
366 if (state->n_rdonly == 0) 452 if (state->n_rdonly == 0) {
367 newstate &= ~FMODE_READ; 453 newstate &= ~FMODE_READ;
368 if (state->n_wronly == 0) 454 call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
455 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
456 }
457 if (state->n_wronly == 0) {
369 newstate &= ~FMODE_WRITE; 458 newstate &= ~FMODE_WRITE;
459 call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
460 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
461 }
462 if (newstate == 0)
463 clear_bit(NFS_DELEGATED_STATE, &state->flags);
370 } 464 }
371 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { 465 nfs4_state_set_mode_locked(state, newstate);
372 nfs4_state_set_mode_locked(state, newstate);
373 oldstate = newstate;
374 }
375 spin_unlock(&inode->i_lock);
376 spin_unlock(&owner->so_lock); 466 spin_unlock(&owner->so_lock);
377 467
378 if (oldstate != newstate && nfs4_do_close(inode, state) == 0) 468 if (!call_close) {
379 return; 469 nfs4_put_open_state(state);
380 nfs4_put_open_state(state); 470 nfs4_put_state_owner(owner);
381 nfs4_put_state_owner(owner); 471 } else
472 nfs4_do_close(path, state);
382} 473}
383 474
384/* 475/*
@@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
415 atomic_set(&lsp->ls_count, 1); 506 atomic_set(&lsp->ls_count, 1);
416 lsp->ls_owner = fl_owner; 507 lsp->ls_owner = fl_owner;
417 spin_lock(&clp->cl_lock); 508 spin_lock(&clp->cl_lock);
418 lsp->ls_id = nfs4_alloc_lockowner_id(clp); 509 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
419 spin_unlock(&clp->cl_lock); 510 spin_unlock(&clp->cl_lock);
420 INIT_LIST_HEAD(&lsp->ls_locks); 511 INIT_LIST_HEAD(&lsp->ls_locks);
421 return lsp; 512 return lsp;
422} 513}
423 514
515static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
516{
517 struct nfs_client *clp = lsp->ls_state->owner->so_client;
518
519 spin_lock(&clp->cl_lock);
520 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
521 spin_unlock(&clp->cl_lock);
522 kfree(lsp);
523}
524
424/* 525/*
425 * Return a compatible lock_state. If no initialized lock_state structure 526 * Return a compatible lock_state. If no initialized lock_state structure
426 * exists, return an uninitialized one. 527 * exists, return an uninitialized one.
@@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
450 return NULL; 551 return NULL;
451 } 552 }
452 spin_unlock(&state->state_lock); 553 spin_unlock(&state->state_lock);
453 kfree(new); 554 if (new != NULL)
555 nfs4_free_lock_state(new);
454 return lsp; 556 return lsp;
455} 557}
456 558
@@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
471 if (list_empty(&state->lock_states)) 573 if (list_empty(&state->lock_states))
472 clear_bit(LK_STATE_IN_USE, &state->flags); 574 clear_bit(LK_STATE_IN_USE, &state->flags);
473 spin_unlock(&state->state_lock); 575 spin_unlock(&state->state_lock);
474 kfree(lsp); 576 nfs4_free_lock_state(lsp);
475} 577}
476 578
477static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) 579static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
513void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) 615void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
514{ 616{
515 struct nfs4_lock_state *lsp; 617 struct nfs4_lock_state *lsp;
618 int seq;
516 619
517 memcpy(dst, &state->stateid, sizeof(*dst)); 620 do {
621 seq = read_seqbegin(&state->seqlock);
622 memcpy(dst, &state->stateid, sizeof(*dst));
623 } while (read_seqretry(&state->seqlock, seq));
518 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) 624 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
519 return; 625 return;
520 626
@@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
557 * failed with a seqid incrementing error - 663 * failed with a seqid incrementing error -
558 * see comments nfs_fs.h:seqid_mutating_error() 664 * see comments nfs_fs.h:seqid_mutating_error()
559 */ 665 */
560static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) 666static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
561{ 667{
562 switch (status) { 668 switch (status) {
563 case 0: 669 case 0:
564 break; 670 break;
565 case -NFS4ERR_BAD_SEQID: 671 case -NFS4ERR_BAD_SEQID:
672 if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
673 return;
674 printk(KERN_WARNING "NFS: v4 server returned a bad"
675 "sequence-id error on an"
676 "unconfirmed sequence %p!\n",
677 seqid->sequence);
566 case -NFS4ERR_STALE_CLIENTID: 678 case -NFS4ERR_STALE_CLIENTID:
567 case -NFS4ERR_STALE_STATEID: 679 case -NFS4ERR_STALE_STATEID:
568 case -NFS4ERR_BAD_STATEID: 680 case -NFS4ERR_BAD_STATEID:
@@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
586 struct nfs4_state_owner, so_seqid); 698 struct nfs4_state_owner, so_seqid);
587 nfs4_drop_state_owner(sp); 699 nfs4_drop_state_owner(sp);
588 } 700 }
589 return nfs_increment_seqid(status, seqid); 701 nfs_increment_seqid(status, seqid);
590} 702}
591 703
592/* 704/*
@@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
596 */ 708 */
597void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) 709void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
598{ 710{
599 return nfs_increment_seqid(status, seqid); 711 nfs_increment_seqid(status, seqid);
600} 712}
601 713
602int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) 714int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
@@ -748,15 +860,21 @@ out_err:
748static void nfs4_state_mark_reclaim(struct nfs_client *clp) 860static void nfs4_state_mark_reclaim(struct nfs_client *clp)
749{ 861{
750 struct nfs4_state_owner *sp; 862 struct nfs4_state_owner *sp;
863 struct rb_node *pos;
751 struct nfs4_state *state; 864 struct nfs4_state *state;
752 struct nfs4_lock_state *lock; 865 struct nfs4_lock_state *lock;
753 866
754 /* Reset all sequence ids to zero */ 867 /* Reset all sequence ids to zero */
755 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 868 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
869 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
756 sp->so_seqid.counter = 0; 870 sp->so_seqid.counter = 0;
757 sp->so_seqid.flags = 0; 871 sp->so_seqid.flags = 0;
758 spin_lock(&sp->so_lock); 872 spin_lock(&sp->so_lock);
759 list_for_each_entry(state, &sp->so_states, open_states) { 873 list_for_each_entry(state, &sp->so_states, open_states) {
874 clear_bit(NFS_DELEGATED_STATE, &state->flags);
875 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
876 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
877 clear_bit(NFS_O_RDWR_STATE, &state->flags);
760 list_for_each_entry(lock, &state->lock_states, ls_locks) { 878 list_for_each_entry(lock, &state->lock_states, ls_locks) {
761 lock->ls_seqid.counter = 0; 879 lock->ls_seqid.counter = 0;
762 lock->ls_seqid.flags = 0; 880 lock->ls_seqid.flags = 0;
@@ -771,6 +889,7 @@ static int reclaimer(void *ptr)
771{ 889{
772 struct nfs_client *clp = ptr; 890 struct nfs_client *clp = ptr;
773 struct nfs4_state_owner *sp; 891 struct nfs4_state_owner *sp;
892 struct rb_node *pos;
774 struct nfs4_state_recovery_ops *ops; 893 struct nfs4_state_recovery_ops *ops;
775 struct rpc_cred *cred; 894 struct rpc_cred *cred;
776 int status = 0; 895 int status = 0;
@@ -816,7 +935,8 @@ restart_loop:
816 /* Mark all delegations for reclaim */ 935 /* Mark all delegations for reclaim */
817 nfs_delegation_mark_reclaim(clp); 936 nfs_delegation_mark_reclaim(clp);
818 /* Note: list is protected by exclusive lock on cl->cl_sem */ 937 /* Note: list is protected by exclusive lock on cl->cl_sem */
819 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 938 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
939 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
820 status = nfs4_reclaim_open_state(ops, sp); 940 status = nfs4_reclaim_open_state(ops, sp);
821 if (status < 0) { 941 if (status < 0) {
822 if (status == -NFS4ERR_NO_GRACE) { 942 if (status == -NFS4ERR_NO_GRACE) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8003c91ccb9a..c08738441f73 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int);
68#endif 68#endif
69 69
70/* lock,open owner id: 70/* lock,open owner id:
71 * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) 71 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
72 */ 72 */
73#define owner_id_maxsz (1 + 1) 73#define open_owner_id_maxsz (1 + 4)
74#define lock_owner_id_maxsz (1 + 4)
74#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 75#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
75#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 76#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
76#define op_encode_hdr_maxsz (1) 77#define op_encode_hdr_maxsz (1)
@@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int);
87#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) 88#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
88#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) 89#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
89#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) 90#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
91#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
92#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
90/* This is based on getfattr, which uses the most attributes: */ 93/* This is based on getfattr, which uses the most attributes: */
91#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ 94#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
92 3 + 3 + 3 + 2 * nfs4_name_maxsz)) 95 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
93#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ 96#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
94 nfs4_fattr_value_maxsz) 97 nfs4_fattr_value_maxsz)
95#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) 98#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int);
116 3 + (NFS4_VERIFIER_SIZE >> 2)) 119 3 + (NFS4_VERIFIER_SIZE >> 2))
117#define decode_setclientid_confirm_maxsz \ 120#define decode_setclientid_confirm_maxsz \
118 (op_decode_hdr_maxsz) 121 (op_decode_hdr_maxsz)
119#define encode_lookup_maxsz (op_encode_hdr_maxsz + \ 122#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
120 1 + ((3 + NFS4_FHSIZE) >> 2)) 123#define decode_lookup_maxsz (op_decode_hdr_maxsz)
124#define encode_share_access_maxsz \
125 (2)
126#define encode_createmode_maxsz (1 + nfs4_fattr_maxsz)
127#define encode_opentype_maxsz (1 + encode_createmode_maxsz)
128#define encode_claim_null_maxsz (1 + nfs4_name_maxsz)
129#define encode_open_maxsz (op_encode_hdr_maxsz + \
130 2 + encode_share_access_maxsz + 2 + \
131 open_owner_id_maxsz + \
132 encode_opentype_maxsz + \
133 encode_claim_null_maxsz)
134#define decode_ace_maxsz (3 + nfs4_owner_maxsz)
135#define decode_delegation_maxsz (1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \
136 decode_ace_maxsz)
137#define decode_change_info_maxsz (5)
138#define decode_open_maxsz (op_decode_hdr_maxsz + \
139 XDR_QUADLEN(NFS4_STATEID_SIZE) + \
140 decode_change_info_maxsz + 1 + \
141 nfs4_fattr_bitmap_maxsz + \
142 decode_delegation_maxsz)
121#define encode_remove_maxsz (op_encode_hdr_maxsz + \ 143#define encode_remove_maxsz (op_encode_hdr_maxsz + \
122 nfs4_name_maxsz) 144 nfs4_name_maxsz)
123#define encode_rename_maxsz (op_encode_hdr_maxsz + \ 145#define encode_rename_maxsz (op_encode_hdr_maxsz + \
@@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int);
134#define encode_create_maxsz (op_encode_hdr_maxsz + \ 156#define encode_create_maxsz (op_encode_hdr_maxsz + \
135 2 + nfs4_name_maxsz + \ 157 2 + nfs4_name_maxsz + \
136 nfs4_fattr_maxsz) 158 nfs4_fattr_maxsz)
137#define decode_create_maxsz (op_decode_hdr_maxsz + 8) 159#define decode_create_maxsz (op_decode_hdr_maxsz + \
160 decode_change_info_maxsz + \
161 nfs4_fattr_bitmap_maxsz)
138#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) 162#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
139#define decode_delegreturn_maxsz (op_decode_hdr_maxsz) 163#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
164#define encode_fs_locations_maxsz \
165 (encode_getattr_maxsz)
166#define decode_fs_locations_maxsz \
167 (0)
140#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ 168#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */
141#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ 169#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */
142#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ 170#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \
@@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int);
174 op_decode_hdr_maxsz + 2 + \ 202 op_decode_hdr_maxsz + 2 + \
175 decode_getattr_maxsz) 203 decode_getattr_maxsz)
176#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ 204#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
177 encode_putfh_maxsz + \ 205 encode_putfh_maxsz + \
178 op_encode_hdr_maxsz + \ 206 encode_savefh_maxsz + \
179 13 + 3 + 2 + 64 + \ 207 encode_open_maxsz + \
180 encode_getattr_maxsz + \ 208 encode_getfh_maxsz + \
181 encode_getfh_maxsz) 209 encode_getattr_maxsz + \
210 encode_restorefh_maxsz + \
211 encode_getattr_maxsz)
182#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 212#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
183 decode_putfh_maxsz + \ 213 decode_putfh_maxsz + \
184 op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \ 214 decode_savefh_maxsz + \
185 decode_getattr_maxsz + \ 215 decode_open_maxsz + \
186 decode_getfh_maxsz) 216 decode_getfh_maxsz + \
217 decode_getattr_maxsz + \
218 decode_restorefh_maxsz + \
219 decode_getattr_maxsz)
187#define NFS4_enc_open_confirm_sz \ 220#define NFS4_enc_open_confirm_sz \
188 (compound_encode_hdr_maxsz + \ 221 (compound_encode_hdr_maxsz + \
189 encode_putfh_maxsz + \ 222 encode_putfh_maxsz + \
@@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int);
193 op_decode_hdr_maxsz + 4) 226 op_decode_hdr_maxsz + 4)
194#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ 227#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \
195 encode_putfh_maxsz + \ 228 encode_putfh_maxsz + \
196 op_encode_hdr_maxsz + \ 229 encode_open_maxsz + \
197 11) 230 encode_getattr_maxsz)
198#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ 231#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
199 decode_putfh_maxsz + \ 232 decode_putfh_maxsz + \
200 op_decode_hdr_maxsz + \ 233 decode_open_maxsz + \
201 4 + 5 + 2 + 3) 234 decode_getattr_maxsz)
202#define NFS4_enc_open_downgrade_sz \ 235#define NFS4_enc_open_downgrade_sz \
203 (compound_encode_hdr_maxsz + \ 236 (compound_encode_hdr_maxsz + \
204 encode_putfh_maxsz + \ 237 encode_putfh_maxsz + \
@@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int);
256 op_encode_hdr_maxsz + \ 289 op_encode_hdr_maxsz + \
257 1 + 1 + 2 + 2 + \ 290 1 + 1 + 2 + 2 + \
258 1 + 4 + 1 + 2 + \ 291 1 + 4 + 1 + 2 + \
259 owner_id_maxsz) 292 lock_owner_id_maxsz)
260#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ 293#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \
261 decode_putfh_maxsz + \ 294 decode_putfh_maxsz + \
262 decode_getattr_maxsz + \ 295 decode_getattr_maxsz + \
263 op_decode_hdr_maxsz + \ 296 op_decode_hdr_maxsz + \
264 2 + 2 + 1 + 2 + \ 297 2 + 2 + 1 + 2 + \
265 owner_id_maxsz) 298 lock_owner_id_maxsz)
266#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ 299#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \
267 encode_putfh_maxsz + \ 300 encode_putfh_maxsz + \
268 encode_getattr_maxsz + \ 301 encode_getattr_maxsz + \
269 op_encode_hdr_maxsz + \ 302 op_encode_hdr_maxsz + \
270 1 + 2 + 2 + 2 + \ 303 1 + 2 + 2 + 2 + \
271 owner_id_maxsz) 304 lock_owner_id_maxsz)
272#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz) 305#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz)
273#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ 306#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \
274 encode_putfh_maxsz + \ 307 encode_putfh_maxsz + \
@@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int);
298 encode_getfh_maxsz) 331 encode_getfh_maxsz)
299#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ 332#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \
300 decode_putfh_maxsz + \ 333 decode_putfh_maxsz + \
301 op_decode_hdr_maxsz + \ 334 decode_lookup_maxsz + \
302 decode_getattr_maxsz + \ 335 decode_getattr_maxsz + \
303 decode_getfh_maxsz) 336 decode_getfh_maxsz)
304#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ 337#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
@@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int);
417#define NFS4_enc_fs_locations_sz \ 450#define NFS4_enc_fs_locations_sz \
418 (compound_encode_hdr_maxsz + \ 451 (compound_encode_hdr_maxsz + \
419 encode_putfh_maxsz + \ 452 encode_putfh_maxsz + \
420 encode_getattr_maxsz) 453 encode_lookup_maxsz + \
454 encode_fs_locations_maxsz)
421#define NFS4_dec_fs_locations_sz \ 455#define NFS4_dec_fs_locations_sz \
422 (compound_decode_hdr_maxsz + \ 456 (compound_decode_hdr_maxsz + \
423 decode_putfh_maxsz + \ 457 decode_putfh_maxsz + \
424 op_decode_hdr_maxsz + \ 458 decode_lookup_maxsz + \
425 nfs4_fattr_bitmap_maxsz) 459 decode_fs_locations_maxsz)
426 460
427static struct { 461static struct {
428 unsigned int mode; 462 unsigned int mode;
@@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
793 WRITE64(nfs4_lock_length(args->fl)); 827 WRITE64(nfs4_lock_length(args->fl));
794 WRITE32(args->new_lock_owner); 828 WRITE32(args->new_lock_owner);
795 if (args->new_lock_owner){ 829 if (args->new_lock_owner){
796 RESERVE_SPACE(4+NFS4_STATEID_SIZE+20); 830 RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
797 WRITE32(args->open_seqid->sequence->counter); 831 WRITE32(args->open_seqid->sequence->counter);
798 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); 832 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
799 WRITE32(args->lock_seqid->sequence->counter); 833 WRITE32(args->lock_seqid->sequence->counter);
800 WRITE64(args->lock_owner.clientid); 834 WRITE64(args->lock_owner.clientid);
801 WRITE32(4); 835 WRITE32(16);
802 WRITE32(args->lock_owner.id); 836 WRITEMEM("lock id:", 8);
837 WRITE64(args->lock_owner.id);
803 } 838 }
804 else { 839 else {
805 RESERVE_SPACE(NFS4_STATEID_SIZE+4); 840 RESERVE_SPACE(NFS4_STATEID_SIZE+4);
@@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
814{ 849{
815 __be32 *p; 850 __be32 *p;
816 851
817 RESERVE_SPACE(40); 852 RESERVE_SPACE(52);
818 WRITE32(OP_LOCKT); 853 WRITE32(OP_LOCKT);
819 WRITE32(nfs4_lock_type(args->fl, 0)); 854 WRITE32(nfs4_lock_type(args->fl, 0));
820 WRITE64(args->fl->fl_start); 855 WRITE64(args->fl->fl_start);
821 WRITE64(nfs4_lock_length(args->fl)); 856 WRITE64(nfs4_lock_length(args->fl));
822 WRITE64(args->lock_owner.clientid); 857 WRITE64(args->lock_owner.clientid);
823 WRITE32(4); 858 WRITE32(16);
824 WRITE32(args->lock_owner.id); 859 WRITEMEM("lock id:", 8);
860 WRITE64(args->lock_owner.id);
825 861
826 return 0; 862 return 0;
827} 863}
@@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
886 WRITE32(OP_OPEN); 922 WRITE32(OP_OPEN);
887 WRITE32(arg->seqid->sequence->counter); 923 WRITE32(arg->seqid->sequence->counter);
888 encode_share_access(xdr, arg->open_flags); 924 encode_share_access(xdr, arg->open_flags);
889 RESERVE_SPACE(16); 925 RESERVE_SPACE(28);
890 WRITE64(arg->clientid); 926 WRITE64(arg->clientid);
891 WRITE32(4); 927 WRITE32(16);
892 WRITE32(arg->id); 928 WRITEMEM("open id:", 8);
929 WRITE64(arg->id);
893} 930}
894 931
895static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 932static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
1071 1108
1072static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) 1109static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
1073{ 1110{
1074 struct rpc_auth *auth = req->rq_task->tk_auth; 1111 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1075 uint32_t attrs[2] = { 1112 uint32_t attrs[2] = {
1076 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, 1113 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
1077 FATTR4_WORD1_MOUNTED_ON_FILEID, 1114 FATTR4_WORD1_MOUNTED_ON_FILEID,
@@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1117 1154
1118static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) 1155static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
1119{ 1156{
1120 struct rpc_auth *auth = req->rq_task->tk_auth; 1157 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1121 unsigned int replen; 1158 unsigned int replen;
1122 __be32 *p; 1159 __be32 *p;
1123 1160
@@ -1735,7 +1772,7 @@ out:
1735 */ 1772 */
1736static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 1773static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
1737{ 1774{
1738 struct rpc_auth *auth = req->rq_task->tk_auth; 1775 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1739 struct xdr_stream xdr; 1776 struct xdr_stream xdr;
1740 struct compound_hdr hdr = { 1777 struct compound_hdr hdr = {
1741 .nops = 2, 1778 .nops = 2,
@@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
1795 struct nfs_getaclargs *args) 1832 struct nfs_getaclargs *args)
1796{ 1833{
1797 struct xdr_stream xdr; 1834 struct xdr_stream xdr;
1798 struct rpc_auth *auth = req->rq_task->tk_auth; 1835 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1799 struct compound_hdr hdr = { 1836 struct compound_hdr hdr = {
1800 .nops = 2, 1837 .nops = 2,
1801 }; 1838 };
@@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
2030 struct compound_hdr hdr = { 2067 struct compound_hdr hdr = {
2031 .nops = 3, 2068 .nops = 3,
2032 }; 2069 };
2033 struct rpc_auth *auth = req->rq_task->tk_auth; 2070 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
2034 int replen; 2071 int replen;
2035 int status; 2072 int status;
2036 2073
@@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3269static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3306static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3270{ 3307{
3271 __be32 *p; 3308 __be32 *p;
3272 uint32_t bmlen; 3309 uint32_t savewords, bmlen, i;
3273 int status; 3310 int status;
3274 3311
3275 status = decode_op_hdr(xdr, OP_OPEN); 3312 status = decode_op_hdr(xdr, OP_OPEN);
@@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3287 goto xdr_error; 3324 goto xdr_error;
3288 3325
3289 READ_BUF(bmlen << 2); 3326 READ_BUF(bmlen << 2);
3290 p += bmlen; 3327 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
3328 for (i = 0; i < savewords; ++i)
3329 READ32(res->attrset[i]);
3330 for (; i < NFS4_BITMAP_SIZE; i++)
3331 res->attrset[i] = 0;
3332
3291 return decode_delegation(xdr, res); 3333 return decode_delegation(xdr, res);
3292xdr_error: 3334xdr_error:
3293 dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen); 3335 dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 49d1008ce1d7..3490322d1145 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", 428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
429 program, version, NIPQUAD(servaddr)); 429 program, version, NIPQUAD(servaddr));
430 set_sockaddr(&sin, servaddr, 0); 430 set_sockaddr(&sin, servaddr, 0);
431 return rpcb_getport_external(&sin, program, version, proto); 431 return rpcb_getport_sync(&sin, program, version, proto);
432} 432}
433 433
434 434
@@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void)
496 NFS_MNT3_VERSION : NFS_MNT_VERSION; 496 NFS_MNT3_VERSION : NFS_MNT_VERSION;
497 497
498 set_sockaddr(&sin, servaddr, htons(mount_port)); 498 set_sockaddr(&sin, servaddr, htons(mount_port));
499 status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol); 499 status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL,
500 nfs_path, version, protocol, &fh);
500 if (status < 0) 501 if (status < 0)
501 printk(KERN_ERR "Root-NFS: Server returned error %d " 502 printk(KERN_ERR "Root-NFS: Server returned error %d "
502 "while mounting %s\n", status, nfs_path); 503 "while mounting %s\n", status, nfs_path);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c5bb51a29e80..f56dae5216f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
85 req->wb_offset = offset; 85 req->wb_offset = offset;
86 req->wb_pgbase = offset; 86 req->wb_pgbase = offset;
87 req->wb_bytes = count; 87 req->wb_bytes = count;
88 atomic_set(&req->wb_count, 1);
89 req->wb_context = get_nfs_open_context(ctx); 88 req->wb_context = get_nfs_open_context(ctx);
90 89 kref_init(&req->wb_kref);
91 return req; 90 return req;
92} 91}
93 92
@@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req)
109} 108}
110 109
111/** 110/**
112 * nfs_set_page_writeback_locked - Lock a request for writeback 111 * nfs_set_page_tag_locked - Tag a request as locked
113 * @req: 112 * @req:
114 */ 113 */
115int nfs_set_page_writeback_locked(struct nfs_page *req) 114static int nfs_set_page_tag_locked(struct nfs_page *req)
116{ 115{
117 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); 116 struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
118 117
119 if (!nfs_lock_request(req)) 118 if (!nfs_lock_request(req))
120 return 0; 119 return 0;
121 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); 120 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
122 return 1; 121 return 1;
123} 122}
124 123
125/** 124/**
126 * nfs_clear_page_writeback - Unlock request and wake up sleepers 125 * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
127 */ 126 */
128void nfs_clear_page_writeback(struct nfs_page *req) 127void nfs_clear_page_tag_locked(struct nfs_page *req)
129{ 128{
130 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); 129 struct inode *inode = req->wb_context->path.dentry->d_inode;
130 struct nfs_inode *nfsi = NFS_I(inode);
131 131
132 if (req->wb_page != NULL) { 132 if (req->wb_page != NULL) {
133 spin_lock(&nfsi->req_lock); 133 spin_lock(&inode->i_lock);
134 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); 134 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
135 spin_unlock(&nfsi->req_lock); 135 spin_unlock(&inode->i_lock);
136 } 136 }
137 nfs_unlock_request(req); 137 nfs_unlock_request(req);
138} 138}
@@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req)
160 * 160 *
161 * Note: Should never be called with the spinlock held! 161 * Note: Should never be called with the spinlock held!
162 */ 162 */
163void 163static void nfs_free_request(struct kref *kref)
164nfs_release_request(struct nfs_page *req)
165{ 164{
166 if (!atomic_dec_and_test(&req->wb_count)) 165 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
167 return;
168 166
169 /* Release struct file or cached credential */ 167 /* Release struct file or cached credential */
170 nfs_clear_request(req); 168 nfs_clear_request(req);
@@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req)
172 nfs_page_free(req); 170 nfs_page_free(req);
173} 171}
174 172
173void nfs_release_request(struct nfs_page *req)
174{
175 kref_put(&req->wb_kref, nfs_free_request);
176}
177
175static int nfs_wait_bit_interruptible(void *word) 178static int nfs_wait_bit_interruptible(void *word)
176{ 179{
177 int ret = 0; 180 int ret = 0;
@@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word)
193int 196int
194nfs_wait_on_request(struct nfs_page *req) 197nfs_wait_on_request(struct nfs_page *req)
195{ 198{
196 struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); 199 struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
197 sigset_t oldmask; 200 sigset_t oldmask;
198 int ret = 0; 201 int ret = 0;
199 202
@@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
379/** 382/**
380 * nfs_scan_list - Scan a list for matching requests 383 * nfs_scan_list - Scan a list for matching requests
381 * @nfsi: NFS inode 384 * @nfsi: NFS inode
382 * @head: One of the NFS inode request lists
383 * @dst: Destination list 385 * @dst: Destination list
384 * @idx_start: lower bound of page->index to scan 386 * @idx_start: lower bound of page->index to scan
385 * @npages: idx_start + npages sets the upper bound to scan. 387 * @npages: idx_start + npages sets the upper bound to scan.
388 * @tag: tag to scan for
386 * 389 *
387 * Moves elements from one of the inode request lists. 390 * Moves elements from one of the inode request lists.
388 * If the number of requests is set to 0, the entire address_space 391 * If the number of requests is set to 0, the entire address_space
389 * starting at index idx_start, is scanned. 392 * starting at index idx_start, is scanned.
390 * The requests are *not* checked to ensure that they form a contiguous set. 393 * The requests are *not* checked to ensure that they form a contiguous set.
391 * You must be holding the inode's req_lock when calling this function 394 * You must be holding the inode's i_lock when calling this function
392 */ 395 */
393int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, 396int nfs_scan_list(struct nfs_inode *nfsi,
394 struct list_head *dst, pgoff_t idx_start, 397 struct list_head *dst, pgoff_t idx_start,
395 unsigned int npages) 398 unsigned int npages, int tag)
396{ 399{
397 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 400 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
398 struct nfs_page *req; 401 struct nfs_page *req;
@@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
407 idx_end = idx_start + npages - 1; 410 idx_end = idx_start + npages - 1;
408 411
409 for (;;) { 412 for (;;) {
410 found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, 413 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
411 (void **)&pgvec[0], idx_start, 414 (void **)&pgvec[0], idx_start,
412 NFS_SCAN_MAXENTRIES); 415 NFS_SCAN_MAXENTRIES, tag);
413 if (found <= 0) 416 if (found <= 0)
414 break; 417 break;
415 for (i = 0; i < found; i++) { 418 for (i = 0; i < found; i++) {
@@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
417 if (req->wb_index > idx_end) 420 if (req->wb_index > idx_end)
418 goto out; 421 goto out;
419 idx_start = req->wb_index + 1; 422 idx_start = req->wb_index + 1;
420 if (req->wb_list_head != head) 423 if (nfs_set_page_tag_locked(req)) {
421 continue;
422 if (nfs_set_page_writeback_locked(req)) {
423 nfs_list_remove_request(req); 424 nfs_list_remove_request(req);
425 radix_tree_tag_clear(&nfsi->nfs_page_tree,
426 req->wb_index, tag);
424 nfs_list_add_request(req, dst); 427 nfs_list_add_request(req, dst);
425 res++; 428 res++;
429 if (res == INT_MAX)
430 goto out;
426 } 431 }
427 } 432 }
428 433 /* for latency reduction */
434 cond_resched_lock(&nfsi->vfs_inode.i_lock);
429 } 435 }
430out: 436out:
431 return res; 437 return res;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7bd7cb95c034..6ae2e58ed05a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req)
145 unlock_page(req->wb_page); 145 unlock_page(req->wb_page);
146 146
147 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 147 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
148 req->wb_context->dentry->d_inode->i_sb->s_id, 148 req->wb_context->path.dentry->d_inode->i_sb->s_id,
149 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 149 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
150 req->wb_bytes, 150 req->wb_bytes,
151 (long long)req_offset(req)); 151 (long long)req_offset(req));
152 nfs_clear_request(req); 152 nfs_clear_request(req);
@@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
164 int flags; 164 int flags;
165 165
166 data->req = req; 166 data->req = req;
167 data->inode = inode = req->wb_context->dentry->d_inode; 167 data->inode = inode = req->wb_context->path.dentry->d_inode;
168 data->cred = req->wb_context->cred; 168 data->cred = req->wb_context->cred;
169 169
170 data->args.fh = NFS_FH(inode); 170 data->args.fh = NFS_FH(inode);
@@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page)
483 */ 483 */
484 error = nfs_wb_page(inode, page); 484 error = nfs_wb_page(inode, page);
485 if (error) 485 if (error)
486 goto out_error; 486 goto out_unlock;
487 if (PageUptodate(page))
488 goto out_unlock;
487 489
488 error = -ESTALE; 490 error = -ESTALE;
489 if (NFS_STALE(inode)) 491 if (NFS_STALE(inode))
490 goto out_error; 492 goto out_unlock;
491 493
492 if (file == NULL) { 494 if (file == NULL) {
493 error = -EBADF; 495 error = -EBADF;
494 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 496 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
495 if (ctx == NULL) 497 if (ctx == NULL)
496 goto out_error; 498 goto out_unlock;
497 } else 499 } else
498 ctx = get_nfs_open_context((struct nfs_open_context *) 500 ctx = get_nfs_open_context((struct nfs_open_context *)
499 file->private_data); 501 file->private_data);
@@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page)
502 504
503 put_nfs_open_context(ctx); 505 put_nfs_open_context(ctx);
504 return error; 506 return error;
505 507out_unlock:
506out_error:
507 unlock_page(page); 508 unlock_page(page);
508 return error; 509 return error;
509} 510}
@@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page)
520 struct inode *inode = page->mapping->host; 521 struct inode *inode = page->mapping->host;
521 struct nfs_page *new; 522 struct nfs_page *new;
522 unsigned int len; 523 unsigned int len;
524 int error;
525
526 error = nfs_wb_page(inode, page);
527 if (error)
528 goto out_unlock;
529 if (PageUptodate(page))
530 goto out_unlock;
523 531
524 nfs_wb_page(inode, page);
525 len = nfs_page_length(page); 532 len = nfs_page_length(page);
526 if (len == 0) 533 if (len == 0)
527 return nfs_return_empty_page(page); 534 return nfs_return_empty_page(page);
535
528 new = nfs_create_request(desc->ctx, inode, page, 0, len); 536 new = nfs_create_request(desc->ctx, inode, page, 0, len);
529 if (IS_ERR(new)) { 537 if (IS_ERR(new))
530 SetPageError(page); 538 goto out_error;
531 unlock_page(page); 539
532 return PTR_ERR(new);
533 }
534 if (len < PAGE_CACHE_SIZE) 540 if (len < PAGE_CACHE_SIZE)
535 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); 541 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
536 nfs_pageio_add_request(desc->pgio, new); 542 nfs_pageio_add_request(desc->pgio, new);
537 return 0; 543 return 0;
544out_error:
545 error = PTR_ERR(new);
546 SetPageError(page);
547out_unlock:
548 unlock_page(page);
549 return error;
538} 550}
539 551
540int nfs_readpages(struct file *filp, struct address_space *mapping, 552int nfs_readpages(struct file *filp, struct address_space *mapping,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ca20d3cc2609..a2b1af89ca1a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -45,6 +45,7 @@
45#include <linux/inet.h> 45#include <linux/inet.h>
46#include <linux/nfs_xdr.h> 46#include <linux/nfs_xdr.h>
47#include <linux/magic.h> 47#include <linux/magic.h>
48#include <linux/parser.h>
48 49
49#include <asm/system.h> 50#include <asm/system.h>
50#include <asm/uaccess.h> 51#include <asm/uaccess.h>
@@ -57,6 +58,167 @@
57 58
58#define NFSDBG_FACILITY NFSDBG_VFS 59#define NFSDBG_FACILITY NFSDBG_VFS
59 60
61
62struct nfs_parsed_mount_data {
63 int flags;
64 int rsize, wsize;
65 int timeo, retrans;
66 int acregmin, acregmax,
67 acdirmin, acdirmax;
68 int namlen;
69 unsigned int bsize;
70 unsigned int auth_flavor_len;
71 rpc_authflavor_t auth_flavors[1];
72 char *client_address;
73
74 struct {
75 struct sockaddr_in address;
76 unsigned int program;
77 unsigned int version;
78 unsigned short port;
79 int protocol;
80 } mount_server;
81
82 struct {
83 struct sockaddr_in address;
84 char *hostname;
85 char *export_path;
86 unsigned int program;
87 int protocol;
88 } nfs_server;
89};
90
91enum {
92 /* Mount options that take no arguments */
93 Opt_soft, Opt_hard,
94 Opt_intr, Opt_nointr,
95 Opt_posix, Opt_noposix,
96 Opt_cto, Opt_nocto,
97 Opt_ac, Opt_noac,
98 Opt_lock, Opt_nolock,
99 Opt_v2, Opt_v3,
100 Opt_udp, Opt_tcp,
101 Opt_acl, Opt_noacl,
102 Opt_rdirplus, Opt_nordirplus,
103 Opt_sharecache, Opt_nosharecache,
104
105 /* Mount options that take integer arguments */
106 Opt_port,
107 Opt_rsize, Opt_wsize, Opt_bsize,
108 Opt_timeo, Opt_retrans,
109 Opt_acregmin, Opt_acregmax,
110 Opt_acdirmin, Opt_acdirmax,
111 Opt_actimeo,
112 Opt_namelen,
113 Opt_mountport,
114 Opt_mountprog, Opt_mountvers,
115 Opt_nfsprog, Opt_nfsvers,
116
117 /* Mount options that take string arguments */
118 Opt_sec, Opt_proto, Opt_mountproto,
119 Opt_addr, Opt_mounthost, Opt_clientaddr,
120
121 /* Mount options that are ignored */
122 Opt_userspace, Opt_deprecated,
123
124 Opt_err
125};
126
127static match_table_t nfs_mount_option_tokens = {
128 { Opt_userspace, "bg" },
129 { Opt_userspace, "fg" },
130 { Opt_soft, "soft" },
131 { Opt_hard, "hard" },
132 { Opt_intr, "intr" },
133 { Opt_nointr, "nointr" },
134 { Opt_posix, "posix" },
135 { Opt_noposix, "noposix" },
136 { Opt_cto, "cto" },
137 { Opt_nocto, "nocto" },
138 { Opt_ac, "ac" },
139 { Opt_noac, "noac" },
140 { Opt_lock, "lock" },
141 { Opt_nolock, "nolock" },
142 { Opt_v2, "v2" },
143 { Opt_v3, "v3" },
144 { Opt_udp, "udp" },
145 { Opt_tcp, "tcp" },
146 { Opt_acl, "acl" },
147 { Opt_noacl, "noacl" },
148 { Opt_rdirplus, "rdirplus" },
149 { Opt_nordirplus, "nordirplus" },
150 { Opt_sharecache, "sharecache" },
151 { Opt_nosharecache, "nosharecache" },
152
153 { Opt_port, "port=%u" },
154 { Opt_rsize, "rsize=%u" },
155 { Opt_wsize, "wsize=%u" },
156 { Opt_bsize, "bsize=%u" },
157 { Opt_timeo, "timeo=%u" },
158 { Opt_retrans, "retrans=%u" },
159 { Opt_acregmin, "acregmin=%u" },
160 { Opt_acregmax, "acregmax=%u" },
161 { Opt_acdirmin, "acdirmin=%u" },
162 { Opt_acdirmax, "acdirmax=%u" },
163 { Opt_actimeo, "actimeo=%u" },
164 { Opt_userspace, "retry=%u" },
165 { Opt_namelen, "namlen=%u" },
166 { Opt_mountport, "mountport=%u" },
167 { Opt_mountprog, "mountprog=%u" },
168 { Opt_mountvers, "mountvers=%u" },
169 { Opt_nfsprog, "nfsprog=%u" },
170 { Opt_nfsvers, "nfsvers=%u" },
171 { Opt_nfsvers, "vers=%u" },
172
173 { Opt_sec, "sec=%s" },
174 { Opt_proto, "proto=%s" },
175 { Opt_mountproto, "mountproto=%s" },
176 { Opt_addr, "addr=%s" },
177 { Opt_clientaddr, "clientaddr=%s" },
178 { Opt_mounthost, "mounthost=%s" },
179
180 { Opt_err, NULL }
181};
182
183enum {
184 Opt_xprt_udp, Opt_xprt_tcp,
185
186 Opt_xprt_err
187};
188
189static match_table_t nfs_xprt_protocol_tokens = {
190 { Opt_xprt_udp, "udp" },
191 { Opt_xprt_tcp, "tcp" },
192
193 { Opt_xprt_err, NULL }
194};
195
196enum {
197 Opt_sec_none, Opt_sec_sys,
198 Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
199 Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
200 Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
201
202 Opt_sec_err
203};
204
205static match_table_t nfs_secflavor_tokens = {
206 { Opt_sec_none, "none" },
207 { Opt_sec_none, "null" },
208 { Opt_sec_sys, "sys" },
209
210 { Opt_sec_krb5, "krb5" },
211 { Opt_sec_krb5i, "krb5i" },
212 { Opt_sec_krb5p, "krb5p" },
213
214 { Opt_sec_lkey, "lkey" },
215 { Opt_sec_lkeyi, "lkeyi" },
216 { Opt_sec_lkeyp, "lkeyp" },
217
218 { Opt_sec_err, NULL }
219};
220
221
60static void nfs_umount_begin(struct vfsmount *, int); 222static void nfs_umount_begin(struct vfsmount *, int);
61static int nfs_statfs(struct dentry *, struct kstatfs *); 223static int nfs_statfs(struct dentry *, struct kstatfs *);
62static int nfs_show_options(struct seq_file *, struct vfsmount *); 224static int nfs_show_options(struct seq_file *, struct vfsmount *);
@@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
263 { RPC_AUTH_GSS_SPKM, "spkm" }, 425 { RPC_AUTH_GSS_SPKM, "spkm" },
264 { RPC_AUTH_GSS_SPKMI, "spkmi" }, 426 { RPC_AUTH_GSS_SPKMI, "spkmi" },
265 { RPC_AUTH_GSS_SPKMP, "spkmp" }, 427 { RPC_AUTH_GSS_SPKMP, "spkmp" },
266 { -1, "unknown" } 428 { UINT_MAX, "unknown" }
267 }; 429 };
268 int i; 430 int i;
269 431
270 for (i=0; sec_flavours[i].flavour != -1; i++) { 432 for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) {
271 if (sec_flavours[i].flavour == flavour) 433 if (sec_flavours[i].flavour == flavour)
272 break; 434 break;
273 } 435 }
@@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
291 { NFS_MOUNT_NONLM, ",nolock", "" }, 453 { NFS_MOUNT_NONLM, ",nolock", "" },
292 { NFS_MOUNT_NOACL, ",noacl", "" }, 454 { NFS_MOUNT_NOACL, ",noacl", "" },
293 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, 455 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
456 { NFS_MOUNT_UNSHARED, ",nosharecache", ""},
294 { 0, NULL, NULL } 457 { 0, NULL, NULL }
295 }; 458 };
296 const struct proc_nfs_info *nfs_infop; 459 const struct proc_nfs_info *nfs_infop;
@@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
430 */ 593 */
431static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) 594static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
432{ 595{
596 struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
597 struct rpc_clnt *rpc;
598
433 shrink_submounts(vfsmnt, &nfs_automount_list); 599 shrink_submounts(vfsmnt, &nfs_automount_list);
600
601 if (!(flags & MNT_FORCE))
602 return;
603 /* -EIO all pending I/O */
604 rpc = server->client_acl;
605 if (!IS_ERR(rpc))
606 rpc_killall_tasks(rpc);
607 rpc = server->client;
608 if (!IS_ERR(rpc))
609 rpc_killall_tasks(rpc);
434} 610}
435 611
436/* 612/*
437 * Validate the NFS2/NFS3 mount data 613 * Sanity-check a server address provided by the mount command
438 * - fills in the mount root filehandle
439 */ 614 */
440static int nfs_validate_mount_data(struct nfs_mount_data *data, 615static int nfs_verify_server_address(struct sockaddr *addr)
441 struct nfs_fh *mntfh)
442{ 616{
443 if (data == NULL) { 617 switch (addr->sa_family) {
444 dprintk("%s: missing data argument\n", __FUNCTION__); 618 case AF_INET: {
445 return -EINVAL; 619 struct sockaddr_in *sa = (struct sockaddr_in *) addr;
620 if (sa->sin_addr.s_addr != INADDR_ANY)
621 return 1;
622 break;
623 }
446 } 624 }
447 625
448 if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { 626 return 0;
449 dprintk("%s: bad mount version\n", __FUNCTION__); 627}
450 return -EINVAL; 628
629/*
630 * Error-check and convert a string of mount options from user space into
631 * a data structure
632 */
633static int nfs_parse_mount_options(char *raw,
634 struct nfs_parsed_mount_data *mnt)
635{
636 char *p, *string;
637
638 if (!raw) {
639 dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
640 return 1;
451 } 641 }
642 dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
452 643
453 switch (data->version) { 644 while ((p = strsep(&raw, ",")) != NULL) {
454 case 1: 645 substring_t args[MAX_OPT_ARGS];
455 data->namlen = 0; 646 int option, token;
456 case 2: 647
457 data->bsize = 0; 648 if (!*p)
458 case 3: 649 continue;
459 if (data->flags & NFS_MOUNT_VER3) { 650
460 dprintk("%s: mount structure version %d does not support NFSv3\n", 651 dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p);
461 __FUNCTION__, 652
462 data->version); 653 token = match_token(p, nfs_mount_option_tokens, args);
463 return -EINVAL; 654 switch (token) {
655 case Opt_soft:
656 mnt->flags |= NFS_MOUNT_SOFT;
657 break;
658 case Opt_hard:
659 mnt->flags &= ~NFS_MOUNT_SOFT;
660 break;
661 case Opt_intr:
662 mnt->flags |= NFS_MOUNT_INTR;
663 break;
664 case Opt_nointr:
665 mnt->flags &= ~NFS_MOUNT_INTR;
666 break;
667 case Opt_posix:
668 mnt->flags |= NFS_MOUNT_POSIX;
669 break;
670 case Opt_noposix:
671 mnt->flags &= ~NFS_MOUNT_POSIX;
672 break;
673 case Opt_cto:
674 mnt->flags &= ~NFS_MOUNT_NOCTO;
675 break;
676 case Opt_nocto:
677 mnt->flags |= NFS_MOUNT_NOCTO;
678 break;
679 case Opt_ac:
680 mnt->flags &= ~NFS_MOUNT_NOAC;
681 break;
682 case Opt_noac:
683 mnt->flags |= NFS_MOUNT_NOAC;
684 break;
685 case Opt_lock:
686 mnt->flags &= ~NFS_MOUNT_NONLM;
687 break;
688 case Opt_nolock:
689 mnt->flags |= NFS_MOUNT_NONLM;
690 break;
691 case Opt_v2:
692 mnt->flags &= ~NFS_MOUNT_VER3;
693 break;
694 case Opt_v3:
695 mnt->flags |= NFS_MOUNT_VER3;
696 break;
697 case Opt_udp:
698 mnt->flags &= ~NFS_MOUNT_TCP;
699 mnt->nfs_server.protocol = IPPROTO_UDP;
700 mnt->timeo = 7;
701 mnt->retrans = 5;
702 break;
703 case Opt_tcp:
704 mnt->flags |= NFS_MOUNT_TCP;
705 mnt->nfs_server.protocol = IPPROTO_TCP;
706 mnt->timeo = 600;
707 mnt->retrans = 2;
708 break;
709 case Opt_acl:
710 mnt->flags &= ~NFS_MOUNT_NOACL;
711 break;
712 case Opt_noacl:
713 mnt->flags |= NFS_MOUNT_NOACL;
714 break;
715 case Opt_rdirplus:
716 mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
717 break;
718 case Opt_nordirplus:
719 mnt->flags |= NFS_MOUNT_NORDIRPLUS;
720 break;
721 case Opt_sharecache:
722 mnt->flags &= ~NFS_MOUNT_UNSHARED;
723 break;
724 case Opt_nosharecache:
725 mnt->flags |= NFS_MOUNT_UNSHARED;
726 break;
727
728 case Opt_port:
729 if (match_int(args, &option))
730 return 0;
731 if (option < 0 || option > 65535)
732 return 0;
733 mnt->nfs_server.address.sin_port = htonl(option);
734 break;
735 case Opt_rsize:
736 if (match_int(args, &mnt->rsize))
737 return 0;
738 break;
739 case Opt_wsize:
740 if (match_int(args, &mnt->wsize))
741 return 0;
742 break;
743 case Opt_bsize:
744 if (match_int(args, &option))
745 return 0;
746 if (option < 0)
747 return 0;
748 mnt->bsize = option;
749 break;
750 case Opt_timeo:
751 if (match_int(args, &mnt->timeo))
752 return 0;
753 break;
754 case Opt_retrans:
755 if (match_int(args, &mnt->retrans))
756 return 0;
757 break;
758 case Opt_acregmin:
759 if (match_int(args, &mnt->acregmin))
760 return 0;
761 break;
762 case Opt_acregmax:
763 if (match_int(args, &mnt->acregmax))
764 return 0;
765 break;
766 case Opt_acdirmin:
767 if (match_int(args, &mnt->acdirmin))
768 return 0;
769 break;
770 case Opt_acdirmax:
771 if (match_int(args, &mnt->acdirmax))
772 return 0;
773 break;
774 case Opt_actimeo:
775 if (match_int(args, &option))
776 return 0;
777 if (option < 0)
778 return 0;
779 mnt->acregmin =
780 mnt->acregmax =
781 mnt->acdirmin =
782 mnt->acdirmax = option;
783 break;
784 case Opt_namelen:
785 if (match_int(args, &mnt->namlen))
786 return 0;
787 break;
788 case Opt_mountport:
789 if (match_int(args, &option))
790 return 0;
791 if (option < 0 || option > 65535)
792 return 0;
793 mnt->mount_server.port = option;
794 break;
795 case Opt_mountprog:
796 if (match_int(args, &option))
797 return 0;
798 if (option < 0)
799 return 0;
800 mnt->mount_server.program = option;
801 break;
802 case Opt_mountvers:
803 if (match_int(args, &option))
804 return 0;
805 if (option < 0)
806 return 0;
807 mnt->mount_server.version = option;
808 break;
809 case Opt_nfsprog:
810 if (match_int(args, &option))
811 return 0;
812 if (option < 0)
813 return 0;
814 mnt->nfs_server.program = option;
815 break;
816 case Opt_nfsvers:
817 if (match_int(args, &option))
818 return 0;
819 switch (option) {
820 case 2:
821 mnt->flags &= ~NFS_MOUNT_VER3;
822 break;
823 case 3:
824 mnt->flags |= NFS_MOUNT_VER3;
825 break;
826 default:
827 goto out_unrec_vers;
464 } 828 }
465 data->root.size = NFS2_FHSIZE; 829 break;
466 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); 830
467 case 4: 831 case Opt_sec:
468 if (data->flags & NFS_MOUNT_SECFLAVOUR) { 832 string = match_strdup(args);
469 dprintk("%s: mount structure version %d does not support strong security\n", 833 if (string == NULL)
470 __FUNCTION__, 834 goto out_nomem;
471 data->version); 835 token = match_token(string, nfs_secflavor_tokens, args);
472 return -EINVAL; 836 kfree(string);
837
838 /*
839 * The flags setting is for v2/v3. The flavor_len
840 * setting is for v4. v2/v3 also need to know the
841 * difference between NULL and UNIX.
842 */
843 switch (token) {
844 case Opt_sec_none:
845 mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
846 mnt->auth_flavor_len = 0;
847 mnt->auth_flavors[0] = RPC_AUTH_NULL;
848 break;
849 case Opt_sec_sys:
850 mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
851 mnt->auth_flavor_len = 0;
852 mnt->auth_flavors[0] = RPC_AUTH_UNIX;
853 break;
854 case Opt_sec_krb5:
855 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
856 mnt->auth_flavor_len = 1;
857 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
858 break;
859 case Opt_sec_krb5i:
860 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
861 mnt->auth_flavor_len = 1;
862 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
863 break;
864 case Opt_sec_krb5p:
865 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
866 mnt->auth_flavor_len = 1;
867 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
868 break;
869 case Opt_sec_lkey:
870 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
871 mnt->auth_flavor_len = 1;
872 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
873 break;
874 case Opt_sec_lkeyi:
875 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
876 mnt->auth_flavor_len = 1;
877 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
878 break;
879 case Opt_sec_lkeyp:
880 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
881 mnt->auth_flavor_len = 1;
882 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
883 break;
884 case Opt_sec_spkm:
885 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
886 mnt->auth_flavor_len = 1;
887 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
888 break;
889 case Opt_sec_spkmi:
890 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
891 mnt->auth_flavor_len = 1;
892 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
893 break;
894 case Opt_sec_spkmp:
895 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
896 mnt->auth_flavor_len = 1;
897 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
898 break;
899 default:
900 goto out_unrec_sec;
473 } 901 }
474 case 5: 902 break;
475 memset(data->context, 0, sizeof(data->context)); 903 case Opt_proto:
476 } 904 string = match_strdup(args);
905 if (string == NULL)
906 goto out_nomem;
907 token = match_token(string,
908 nfs_xprt_protocol_tokens, args);
909 kfree(string);
910
911 switch (token) {
912 case Opt_udp:
913 mnt->flags &= ~NFS_MOUNT_TCP;
914 mnt->nfs_server.protocol = IPPROTO_UDP;
915 mnt->timeo = 7;
916 mnt->retrans = 5;
917 break;
918 case Opt_tcp:
919 mnt->flags |= NFS_MOUNT_TCP;
920 mnt->nfs_server.protocol = IPPROTO_TCP;
921 mnt->timeo = 600;
922 mnt->retrans = 2;
923 break;
924 default:
925 goto out_unrec_xprt;
926 }
927 break;
928 case Opt_mountproto:
929 string = match_strdup(args);
930 if (string == NULL)
931 goto out_nomem;
932 token = match_token(string,
933 nfs_xprt_protocol_tokens, args);
934 kfree(string);
935
936 switch (token) {
937 case Opt_udp:
938 mnt->mount_server.protocol = IPPROTO_UDP;
939 break;
940 case Opt_tcp:
941 mnt->mount_server.protocol = IPPROTO_TCP;
942 break;
943 default:
944 goto out_unrec_xprt;
945 }
946 break;
947 case Opt_addr:
948 string = match_strdup(args);
949 if (string == NULL)
950 goto out_nomem;
951 mnt->nfs_server.address.sin_family = AF_INET;
952 mnt->nfs_server.address.sin_addr.s_addr =
953 in_aton(string);
954 kfree(string);
955 break;
956 case Opt_clientaddr:
957 string = match_strdup(args);
958 if (string == NULL)
959 goto out_nomem;
960 mnt->client_address = string;
961 break;
962 case Opt_mounthost:
963 string = match_strdup(args);
964 if (string == NULL)
965 goto out_nomem;
966 mnt->mount_server.address.sin_family = AF_INET;
967 mnt->mount_server.address.sin_addr.s_addr =
968 in_aton(string);
969 kfree(string);
970 break;
477 971
478 /* Set the pseudoflavor */ 972 case Opt_userspace:
479 if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) 973 case Opt_deprecated:
480 data->pseudoflavor = RPC_AUTH_UNIX; 974 break;
481 975
482#ifndef CONFIG_NFS_V3 976 default:
483 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ 977 goto out_unknown;
484 if (data->flags & NFS_MOUNT_VER3) { 978 }
485 dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
486 return -EPROTONOSUPPORT;
487 } 979 }
488#endif /* CONFIG_NFS_V3 */
489 980
490 /* We now require that the mount process passes the remote address */ 981 return 1;
491 if (data->addr.sin_addr.s_addr == INADDR_ANY) { 982
492 dprintk("%s: mount program didn't pass remote address!\n", 983out_nomem:
493 __FUNCTION__); 984 printk(KERN_INFO "NFS: not enough memory to parse option\n");
494 return -EINVAL; 985 return 0;
986
987out_unrec_vers:
988 printk(KERN_INFO "NFS: unrecognized NFS version number\n");
989 return 0;
990
991out_unrec_xprt:
992 printk(KERN_INFO "NFS: unrecognized transport protocol\n");
993 return 0;
994
995out_unrec_sec:
996 printk(KERN_INFO "NFS: unrecognized security flavor\n");
997 return 0;
998
999out_unknown:
1000 printk(KERN_INFO "NFS: unknown mount option: %s\n", p);
1001 return 0;
1002}
1003
1004/*
1005 * Use the remote server's MOUNT service to request the NFS file handle
1006 * corresponding to the provided path.
1007 */
1008static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1009 struct nfs_fh *root_fh)
1010{
1011 struct sockaddr_in sin;
1012 int status;
1013
1014 if (args->mount_server.version == 0) {
1015 if (args->flags & NFS_MOUNT_VER3)
1016 args->mount_server.version = NFS_MNT3_VERSION;
1017 else
1018 args->mount_server.version = NFS_MNT_VERSION;
495 } 1019 }
496 1020
497 /* Prepare the root filehandle */ 1021 /*
498 if (data->flags & NFS_MOUNT_VER3) 1022 * Construct the mount server's address.
499 mntfh->size = data->root.size; 1023 */
1024 if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
1025 sin = args->mount_server.address;
500 else 1026 else
501 mntfh->size = NFS2_FHSIZE; 1027 sin = args->nfs_server.address;
1028 if (args->mount_server.port == 0) {
1029 status = rpcb_getport_sync(&sin,
1030 args->mount_server.program,
1031 args->mount_server.version,
1032 args->mount_server.protocol);
1033 if (status < 0)
1034 goto out_err;
1035 sin.sin_port = htons(status);
1036 } else
1037 sin.sin_port = htons(args->mount_server.port);
1038
1039 /*
1040 * Now ask the mount server to map our export path
1041 * to a file handle.
1042 */
1043 status = nfs_mount((struct sockaddr *) &sin,
1044 sizeof(sin),
1045 args->nfs_server.hostname,
1046 args->nfs_server.export_path,
1047 args->mount_server.version,
1048 args->mount_server.protocol,
1049 root_fh);
1050 if (status < 0)
1051 goto out_err;
1052
1053 return status;
502 1054
503 if (mntfh->size > sizeof(mntfh->data)) { 1055out_err:
504 dprintk("%s: invalid root filehandle\n", __FUNCTION__); 1056 dfprintk(MOUNT, "NFS: unable to contact server on host "
505 return -EINVAL; 1057 NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
1058 return status;
1059}
1060
1061/*
1062 * Validate the NFS2/NFS3 mount data
1063 * - fills in the mount root filehandle
1064 *
1065 * For option strings, user space handles the following behaviors:
1066 *
1067 * + DNS: mapping server host name to IP address ("addr=" option)
1068 *
1069 * + failure mode: how to behave if a mount request can't be handled
1070 * immediately ("fg/bg" option)
1071 *
1072 * + retry: how often to retry a mount request ("retry=" option)
1073 *
1074 * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
1075 * mountproto=tcp after mountproto=udp, and so on
1076 *
1077 * XXX: as far as I can tell, changing the NFS program number is not
1078 * supported in the NFS client.
1079 */
1080static int nfs_validate_mount_data(struct nfs_mount_data **options,
1081 struct nfs_fh *mntfh,
1082 const char *dev_name)
1083{
1084 struct nfs_mount_data *data = *options;
1085
1086 if (data == NULL)
1087 goto out_no_data;
1088
1089 switch (data->version) {
1090 case 1:
1091 data->namlen = 0;
1092 case 2:
1093 data->bsize = 0;
1094 case 3:
1095 if (data->flags & NFS_MOUNT_VER3)
1096 goto out_no_v3;
1097 data->root.size = NFS2_FHSIZE;
1098 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
1099 case 4:
1100 if (data->flags & NFS_MOUNT_SECFLAVOUR)
1101 goto out_no_sec;
1102 case 5:
1103 memset(data->context, 0, sizeof(data->context));
1104 case 6:
1105 if (data->flags & NFS_MOUNT_VER3)
1106 mntfh->size = data->root.size;
1107 else
1108 mntfh->size = NFS2_FHSIZE;
1109
1110 if (mntfh->size > sizeof(mntfh->data))
1111 goto out_invalid_fh;
1112
1113 memcpy(mntfh->data, data->root.data, mntfh->size);
1114 if (mntfh->size < sizeof(mntfh->data))
1115 memset(mntfh->data + mntfh->size, 0,
1116 sizeof(mntfh->data) - mntfh->size);
1117 break;
1118 default: {
1119 unsigned int len;
1120 char *c;
1121 int status;
1122 struct nfs_parsed_mount_data args = {
1123 .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
1124 .rsize = NFS_MAX_FILE_IO_SIZE,
1125 .wsize = NFS_MAX_FILE_IO_SIZE,
1126 .timeo = 600,
1127 .retrans = 2,
1128 .acregmin = 3,
1129 .acregmax = 60,
1130 .acdirmin = 30,
1131 .acdirmax = 60,
1132 .mount_server.protocol = IPPROTO_UDP,
1133 .mount_server.program = NFS_MNT_PROGRAM,
1134 .nfs_server.protocol = IPPROTO_TCP,
1135 .nfs_server.program = NFS_PROGRAM,
1136 };
1137
1138 if (nfs_parse_mount_options((char *) *options, &args) == 0)
1139 return -EINVAL;
1140
1141 data = kzalloc(sizeof(*data), GFP_KERNEL);
1142 if (data == NULL)
1143 return -ENOMEM;
1144
1145 /*
1146 * NB: after this point, caller will free "data"
1147 * if we return an error
1148 */
1149 *options = data;
1150
1151 c = strchr(dev_name, ':');
1152 if (c == NULL)
1153 return -EINVAL;
1154 len = c - dev_name - 1;
1155 if (len > sizeof(data->hostname))
1156 return -EINVAL;
1157 strncpy(data->hostname, dev_name, len);
1158 args.nfs_server.hostname = data->hostname;
1159
1160 c++;
1161 if (strlen(c) > NFS_MAXPATHLEN)
1162 return -EINVAL;
1163 args.nfs_server.export_path = c;
1164
1165 status = nfs_try_mount(&args, mntfh);
1166 if (status)
1167 return -EINVAL;
1168
1169 /*
1170 * Translate to nfs_mount_data, which nfs_fill_super
1171 * can deal with.
1172 */
1173 data->version = 6;
1174 data->flags = args.flags;
1175 data->rsize = args.rsize;
1176 data->wsize = args.wsize;
1177 data->timeo = args.timeo;
1178 data->retrans = args.retrans;
1179 data->acregmin = args.acregmin;
1180 data->acregmax = args.acregmax;
1181 data->acdirmin = args.acdirmin;
1182 data->acdirmax = args.acdirmax;
1183 data->addr = args.nfs_server.address;
1184 data->namlen = args.namlen;
1185 data->bsize = args.bsize;
1186 data->pseudoflavor = args.auth_flavors[0];
1187
1188 break;
1189 }
506 } 1190 }
507 1191
508 memcpy(mntfh->data, data->root.data, mntfh->size); 1192 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
509 if (mntfh->size < sizeof(mntfh->data)) 1193 data->pseudoflavor = RPC_AUTH_UNIX;
510 memset(mntfh->data + mntfh->size, 0, 1194
511 sizeof(mntfh->data) - mntfh->size); 1195#ifndef CONFIG_NFS_V3
1196 if (data->flags & NFS_MOUNT_VER3)
1197 goto out_v3_not_compiled;
1198#endif /* !CONFIG_NFS_V3 */
1199
1200 if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
1201 goto out_no_address;
512 1202
513 return 0; 1203 return 0;
1204
1205out_no_data:
1206 dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
1207 return -EINVAL;
1208
1209out_no_v3:
1210 dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
1211 data->version);
1212 return -EINVAL;
1213
1214out_no_sec:
1215 dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
1216 return -EINVAL;
1217
1218#ifndef CONFIG_NFS_V3
1219out_v3_not_compiled:
1220 dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n");
1221 return -EPROTONOSUPPORT;
1222#endif /* !CONFIG_NFS_V3 */
1223
1224out_no_address:
1225 dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
1226 return -EINVAL;
1227
1228out_invalid_fh:
1229 dfprintk(MOUNT, "NFS: invalid root filehandle\n");
1230 return -EINVAL;
514} 1231}
515 1232
516/* 1233/*
@@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data)
600{ 1317{
601 struct nfs_server *server = data, *old = NFS_SB(sb); 1318 struct nfs_server *server = data, *old = NFS_SB(sb);
602 1319
603 if (old->nfs_client != server->nfs_client) 1320 if (memcmp(&old->nfs_client->cl_addr,
1321 &server->nfs_client->cl_addr,
1322 sizeof(old->nfs_client->cl_addr)) != 0)
1323 return 0;
1324 /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
1325 if (old->flags & NFS_MOUNT_UNSHARED)
604 return 0; 1326 return 0;
605 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) 1327 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
606 return 0; 1328 return 0;
607 return 1; 1329 return 1;
608} 1330}
609 1331
1332#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
1333
1334static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
1335{
1336 const struct nfs_server *a = s->s_fs_info;
1337 const struct rpc_clnt *clnt_a = a->client;
1338 const struct rpc_clnt *clnt_b = b->client;
1339
1340 if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
1341 goto Ebusy;
1342 if (a->nfs_client != b->nfs_client)
1343 goto Ebusy;
1344 if (a->flags != b->flags)
1345 goto Ebusy;
1346 if (a->wsize != b->wsize)
1347 goto Ebusy;
1348 if (a->rsize != b->rsize)
1349 goto Ebusy;
1350 if (a->acregmin != b->acregmin)
1351 goto Ebusy;
1352 if (a->acregmax != b->acregmax)
1353 goto Ebusy;
1354 if (a->acdirmin != b->acdirmin)
1355 goto Ebusy;
1356 if (a->acdirmax != b->acdirmax)
1357 goto Ebusy;
1358 if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
1359 goto Ebusy;
1360 return 0;
1361Ebusy:
1362 return -EBUSY;
1363}
1364
610static int nfs_get_sb(struct file_system_type *fs_type, 1365static int nfs_get_sb(struct file_system_type *fs_type,
611 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1366 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
612{ 1367{
@@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type,
615 struct nfs_fh mntfh; 1370 struct nfs_fh mntfh;
616 struct nfs_mount_data *data = raw_data; 1371 struct nfs_mount_data *data = raw_data;
617 struct dentry *mntroot; 1372 struct dentry *mntroot;
1373 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
618 int error; 1374 int error;
619 1375
620 /* Validate the mount data */ 1376 /* Validate the mount data */
621 error = nfs_validate_mount_data(data, &mntfh); 1377 error = nfs_validate_mount_data(&data, &mntfh, dev_name);
622 if (error < 0) 1378 if (error < 0)
623 return error; 1379 goto out;
624 1380
625 /* Get a volume representation */ 1381 /* Get a volume representation */
626 server = nfs_create_server(data, &mntfh); 1382 server = nfs_create_server(data, &mntfh);
627 if (IS_ERR(server)) { 1383 if (IS_ERR(server)) {
628 error = PTR_ERR(server); 1384 error = PTR_ERR(server);
629 goto out_err_noserver; 1385 goto out;
630 } 1386 }
631 1387
1388 if (server->flags & NFS_MOUNT_UNSHARED)
1389 compare_super = NULL;
1390
632 /* Get a superblock - note that we may end up sharing one that already exists */ 1391 /* Get a superblock - note that we may end up sharing one that already exists */
633 s = sget(fs_type, nfs_compare_super, nfs_set_super, server); 1392 s = sget(fs_type, compare_super, nfs_set_super, server);
634 if (IS_ERR(s)) { 1393 if (IS_ERR(s)) {
635 error = PTR_ERR(s); 1394 error = PTR_ERR(s);
636 goto out_err_nosb; 1395 goto out_err_nosb;
637 } 1396 }
638 1397
639 if (s->s_fs_info != server) { 1398 if (s->s_fs_info != server) {
1399 error = nfs_compare_mount_options(s, server, flags);
640 nfs_free_server(server); 1400 nfs_free_server(server);
641 server = NULL; 1401 server = NULL;
1402 if (error < 0)
1403 goto error_splat_super;
642 } 1404 }
643 1405
644 if (!s->s_root) { 1406 if (!s->s_root) {
@@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type,
656 s->s_flags |= MS_ACTIVE; 1418 s->s_flags |= MS_ACTIVE;
657 mnt->mnt_sb = s; 1419 mnt->mnt_sb = s;
658 mnt->mnt_root = mntroot; 1420 mnt->mnt_root = mntroot;
659 return 0; 1421 error = 0;
1422
1423out:
1424 if (data != raw_data)
1425 kfree(data);
1426 return error;
660 1427
661out_err_nosb: 1428out_err_nosb:
662 nfs_free_server(server); 1429 nfs_free_server(server);
663out_err_noserver: 1430 goto out;
664 return error;
665 1431
666error_splat_super: 1432error_splat_super:
667 up_write(&s->s_umount); 1433 up_write(&s->s_umount);
668 deactivate_super(s); 1434 deactivate_super(s);
669 return error; 1435 goto out;
670} 1436}
671 1437
672/* 1438/*
@@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
691 struct super_block *s; 1457 struct super_block *s;
692 struct nfs_server *server; 1458 struct nfs_server *server;
693 struct dentry *mntroot; 1459 struct dentry *mntroot;
1460 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
694 int error; 1461 int error;
695 1462
696 dprintk("--> nfs_xdev_get_sb()\n"); 1463 dprintk("--> nfs_xdev_get_sb()\n");
@@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
702 goto out_err_noserver; 1469 goto out_err_noserver;
703 } 1470 }
704 1471
1472 if (server->flags & NFS_MOUNT_UNSHARED)
1473 compare_super = NULL;
1474
705 /* Get a superblock - note that we may end up sharing one that already exists */ 1475 /* Get a superblock - note that we may end up sharing one that already exists */
706 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); 1476 s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
707 if (IS_ERR(s)) { 1477 if (IS_ERR(s)) {
708 error = PTR_ERR(s); 1478 error = PTR_ERR(s);
709 goto out_err_nosb; 1479 goto out_err_nosb;
710 } 1480 }
711 1481
712 if (s->s_fs_info != server) { 1482 if (s->s_fs_info != server) {
1483 error = nfs_compare_mount_options(s, server, flags);
713 nfs_free_server(server); 1484 nfs_free_server(server);
714 server = NULL; 1485 server = NULL;
1486 if (error < 0)
1487 goto error_splat_super;
715 } 1488 }
716 1489
717 if (!s->s_root) { 1490 if (!s->s_root) {
@@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb)
772 nfs_initialise_sb(sb); 1545 nfs_initialise_sb(sb);
773} 1546}
774 1547
775static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) 1548/*
1549 * Validate NFSv4 mount options
1550 */
1551static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
1552 const char *dev_name,
1553 struct sockaddr_in *addr,
1554 rpc_authflavor_t *authflavour,
1555 char **hostname,
1556 char **mntpath,
1557 char **ip_addr)
776{ 1558{
777 void *p = NULL; 1559 struct nfs4_mount_data *data = *options;
778 1560 char *c;
779 if (!src->len) 1561
780 return ERR_PTR(-EINVAL); 1562 if (data == NULL)
781 if (src->len < maxlen) 1563 goto out_no_data;
782 maxlen = src->len; 1564
783 if (dst == NULL) { 1565 switch (data->version) {
784 p = dst = kmalloc(maxlen + 1, GFP_KERNEL); 1566 case 1:
785 if (p == NULL) 1567 if (data->host_addrlen != sizeof(*addr))
786 return ERR_PTR(-ENOMEM); 1568 goto out_no_address;
787 } 1569 if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
788 if (copy_from_user(dst, src->data, maxlen)) { 1570 return -EFAULT;
789 kfree(p); 1571 if (addr->sin_port == 0)
790 return ERR_PTR(-EFAULT); 1572 addr->sin_port = htons(NFS_PORT);
1573 if (!nfs_verify_server_address((struct sockaddr *) addr))
1574 goto out_no_address;
1575
1576 switch (data->auth_flavourlen) {
1577 case 0:
1578 *authflavour = RPC_AUTH_UNIX;
1579 break;
1580 case 1:
1581 if (copy_from_user(authflavour, data->auth_flavours,
1582 sizeof(*authflavour)))
1583 return -EFAULT;
1584 break;
1585 default:
1586 goto out_inval_auth;
1587 }
1588
1589 c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
1590 if (IS_ERR(c))
1591 return PTR_ERR(c);
1592 *hostname = c;
1593
1594 c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
1595 if (IS_ERR(c))
1596 return PTR_ERR(c);
1597 *mntpath = c;
1598 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
1599
1600 c = strndup_user(data->client_addr.data, 16);
1601 if (IS_ERR(c))
1602 return PTR_ERR(c);
1603 *ip_addr = c;
1604
1605 break;
1606 default: {
1607 unsigned int len;
1608 struct nfs_parsed_mount_data args = {
1609 .rsize = NFS_MAX_FILE_IO_SIZE,
1610 .wsize = NFS_MAX_FILE_IO_SIZE,
1611 .timeo = 600,
1612 .retrans = 2,
1613 .acregmin = 3,
1614 .acregmax = 60,
1615 .acdirmin = 30,
1616 .acdirmax = 60,
1617 .nfs_server.protocol = IPPROTO_TCP,
1618 };
1619
1620 if (nfs_parse_mount_options((char *) *options, &args) == 0)
1621 return -EINVAL;
1622
1623 if (!nfs_verify_server_address((struct sockaddr *)
1624 &args.nfs_server.address))
1625 return -EINVAL;
1626 *addr = args.nfs_server.address;
1627
1628 switch (args.auth_flavor_len) {
1629 case 0:
1630 *authflavour = RPC_AUTH_UNIX;
1631 break;
1632 case 1:
1633 *authflavour = (rpc_authflavor_t) args.auth_flavors[0];
1634 break;
1635 default:
1636 goto out_inval_auth;
1637 }
1638
1639 /*
1640 * Translate to nfs4_mount_data, which nfs4_fill_super
1641 * can deal with.
1642 */
1643 data = kzalloc(sizeof(*data), GFP_KERNEL);
1644 if (data == NULL)
1645 return -ENOMEM;
1646 *options = data;
1647
1648 data->version = 1;
1649 data->flags = args.flags & NFS4_MOUNT_FLAGMASK;
1650 data->rsize = args.rsize;
1651 data->wsize = args.wsize;
1652 data->timeo = args.timeo;
1653 data->retrans = args.retrans;
1654 data->acregmin = args.acregmin;
1655 data->acregmax = args.acregmax;
1656 data->acdirmin = args.acdirmin;
1657 data->acdirmax = args.acdirmax;
1658 data->proto = args.nfs_server.protocol;
1659
1660 /*
1661 * Split "dev_name" into "hostname:mntpath".
1662 */
1663 c = strchr(dev_name, ':');
1664 if (c == NULL)
1665 return -EINVAL;
1666 /* while calculating len, pretend ':' is '\0' */
1667 len = c - dev_name;
1668 if (len > NFS4_MAXNAMLEN)
1669 return -EINVAL;
1670 *hostname = kzalloc(len, GFP_KERNEL);
1671 if (*hostname == NULL)
1672 return -ENOMEM;
1673 strncpy(*hostname, dev_name, len - 1);
1674
1675 c++; /* step over the ':' */
1676 len = strlen(c);
1677 if (len > NFS4_MAXPATHLEN)
1678 return -EINVAL;
1679 *mntpath = kzalloc(len + 1, GFP_KERNEL);
1680 if (*mntpath == NULL)
1681 return -ENOMEM;
1682 strncpy(*mntpath, c, len);
1683
1684 dprintk("MNTPATH: %s\n", *mntpath);
1685
1686 *ip_addr = args.client_address;
1687
1688 break;
1689 }
791 } 1690 }
792 dst[maxlen] = '\0'; 1691
793 return dst; 1692 return 0;
1693
1694out_no_data:
1695 dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
1696 return -EINVAL;
1697
1698out_inval_auth:
1699 dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
1700 data->auth_flavourlen);
1701 return -EINVAL;
1702
1703out_no_address:
1704 dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
1705 return -EINVAL;
794} 1706}
795 1707
796/* 1708/*
@@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
806 rpc_authflavor_t authflavour; 1718 rpc_authflavor_t authflavour;
807 struct nfs_fh mntfh; 1719 struct nfs_fh mntfh;
808 struct dentry *mntroot; 1720 struct dentry *mntroot;
809 char *mntpath = NULL, *hostname = NULL, ip_addr[16]; 1721 char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
810 void *p; 1722 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
811 int error; 1723 int error;
812 1724
813 if (data == NULL) { 1725 /* Validate the mount data */
814 dprintk("%s: missing data argument\n", __FUNCTION__); 1726 error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
815 return -EINVAL; 1727 &hostname, &mntpath, &ip_addr);
816 } 1728 if (error < 0)
817 if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { 1729 goto out;
818 dprintk("%s: bad mount version\n", __FUNCTION__);
819 return -EINVAL;
820 }
821
822 /* We now require that the mount process passes the remote address */
823 if (data->host_addrlen != sizeof(addr))
824 return -EINVAL;
825
826 if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
827 return -EFAULT;
828
829 if (addr.sin_family != AF_INET ||
830 addr.sin_addr.s_addr == INADDR_ANY
831 ) {
832 dprintk("%s: mount program didn't pass remote IP address!\n",
833 __FUNCTION__);
834 return -EINVAL;
835 }
836 /* RFC3530: The default port for NFS is 2049 */
837 if (addr.sin_port == 0)
838 addr.sin_port = htons(NFS_PORT);
839
840 /* Grab the authentication type */
841 authflavour = RPC_AUTH_UNIX;
842 if (data->auth_flavourlen != 0) {
843 if (data->auth_flavourlen != 1) {
844 dprintk("%s: Invalid number of RPC auth flavours %d.\n",
845 __FUNCTION__, data->auth_flavourlen);
846 error = -EINVAL;
847 goto out_err_noserver;
848 }
849
850 if (copy_from_user(&authflavour, data->auth_flavours,
851 sizeof(authflavour))) {
852 error = -EFAULT;
853 goto out_err_noserver;
854 }
855 }
856
857 p = nfs_copy_user_string(NULL, &data->hostname, 256);
858 if (IS_ERR(p))
859 goto out_err;
860 hostname = p;
861
862 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
863 if (IS_ERR(p))
864 goto out_err;
865 mntpath = p;
866
867 dprintk("MNTPATH: %s\n", mntpath);
868
869 p = nfs_copy_user_string(ip_addr, &data->client_addr,
870 sizeof(ip_addr) - 1);
871 if (IS_ERR(p))
872 goto out_err;
873 1730
874 /* Get a volume representation */ 1731 /* Get a volume representation */
875 server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, 1732 server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
876 authflavour, &mntfh); 1733 authflavour, &mntfh);
877 if (IS_ERR(server)) { 1734 if (IS_ERR(server)) {
878 error = PTR_ERR(server); 1735 error = PTR_ERR(server);
879 goto out_err_noserver; 1736 goto out;
880 } 1737 }
881 1738
1739 if (server->flags & NFS4_MOUNT_UNSHARED)
1740 compare_super = NULL;
1741
882 /* Get a superblock - note that we may end up sharing one that already exists */ 1742 /* Get a superblock - note that we may end up sharing one that already exists */
883 s = sget(fs_type, nfs_compare_super, nfs_set_super, server); 1743 s = sget(fs_type, compare_super, nfs_set_super, server);
884 if (IS_ERR(s)) { 1744 if (IS_ERR(s)) {
885 error = PTR_ERR(s); 1745 error = PTR_ERR(s);
886 goto out_free; 1746 goto out_free;
@@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
906 s->s_flags |= MS_ACTIVE; 1766 s->s_flags |= MS_ACTIVE;
907 mnt->mnt_sb = s; 1767 mnt->mnt_sb = s;
908 mnt->mnt_root = mntroot; 1768 mnt->mnt_root = mntroot;
1769 error = 0;
1770
1771out:
1772 kfree(ip_addr);
909 kfree(mntpath); 1773 kfree(mntpath);
910 kfree(hostname); 1774 kfree(hostname);
911 return 0; 1775 return error;
912
913out_err:
914 error = PTR_ERR(p);
915 goto out_err_noserver;
916 1776
917out_free: 1777out_free:
918 nfs_free_server(server); 1778 nfs_free_server(server);
919out_err_noserver: 1779 goto out;
920 kfree(mntpath);
921 kfree(hostname);
922 return error;
923 1780
924error_splat_super: 1781error_splat_super:
925 up_write(&s->s_umount); 1782 up_write(&s->s_umount);
926 deactivate_super(s); 1783 deactivate_super(s);
927 goto out_err_noserver; 1784 goto out;
928} 1785}
929 1786
930static void nfs4_kill_super(struct super_block *sb) 1787static void nfs4_kill_super(struct super_block *sb)
@@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
949 struct super_block *s; 1806 struct super_block *s;
950 struct nfs_server *server; 1807 struct nfs_server *server;
951 struct dentry *mntroot; 1808 struct dentry *mntroot;
1809 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
952 int error; 1810 int error;
953 1811
954 dprintk("--> nfs4_xdev_get_sb()\n"); 1812 dprintk("--> nfs4_xdev_get_sb()\n");
@@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
960 goto out_err_noserver; 1818 goto out_err_noserver;
961 } 1819 }
962 1820
1821 if (server->flags & NFS4_MOUNT_UNSHARED)
1822 compare_super = NULL;
1823
963 /* Get a superblock - note that we may end up sharing one that already exists */ 1824 /* Get a superblock - note that we may end up sharing one that already exists */
964 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); 1825 s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
965 if (IS_ERR(s)) { 1826 if (IS_ERR(s)) {
966 error = PTR_ERR(s); 1827 error = PTR_ERR(s);
967 goto out_err_nosb; 1828 goto out_err_nosb;
@@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
1016 struct nfs_server *server; 1877 struct nfs_server *server;
1017 struct dentry *mntroot; 1878 struct dentry *mntroot;
1018 struct nfs_fh mntfh; 1879 struct nfs_fh mntfh;
1880 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
1019 int error; 1881 int error;
1020 1882
1021 dprintk("--> nfs4_referral_get_sb()\n"); 1883 dprintk("--> nfs4_referral_get_sb()\n");
@@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
1027 goto out_err_noserver; 1889 goto out_err_noserver;
1028 } 1890 }
1029 1891
1892 if (server->flags & NFS4_MOUNT_UNSHARED)
1893 compare_super = NULL;
1894
1030 /* Get a superblock - note that we may end up sharing one that already exists */ 1895 /* Get a superblock - note that we may end up sharing one that already exists */
1031 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); 1896 s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
1032 if (IS_ERR(s)) { 1897 if (IS_ERR(s)) {
1033 error = PTR_ERR(s); 1898 error = PTR_ERR(s);
1034 goto out_err_nosb; 1899 goto out_err_nosb;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af344a158e01..73ac992ece85 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page)
117 if (PagePrivate(page)) { 117 if (PagePrivate(page)) {
118 req = (struct nfs_page *)page_private(page); 118 req = (struct nfs_page *)page_private(page);
119 if (req != NULL) 119 if (req != NULL)
120 atomic_inc(&req->wb_count); 120 kref_get(&req->wb_kref);
121 } 121 }
122 return req; 122 return req;
123} 123}
124 124
125static struct nfs_page *nfs_page_find_request(struct page *page) 125static struct nfs_page *nfs_page_find_request(struct page *page)
126{ 126{
127 struct inode *inode = page->mapping->host;
127 struct nfs_page *req = NULL; 128 struct nfs_page *req = NULL;
128 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
129 129
130 spin_lock(req_lock); 130 spin_lock(&inode->i_lock);
131 req = nfs_page_find_request_locked(page); 131 req = nfs_page_find_request_locked(page);
132 spin_unlock(req_lock); 132 spin_unlock(&inode->i_lock);
133 return req; 133 return req;
134} 134}
135 135
@@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
191 } 191 }
192 /* Update file length */ 192 /* Update file length */
193 nfs_grow_file(page, offset, count); 193 nfs_grow_file(page, offset, count);
194 /* Set the PG_uptodate flag? */
195 nfs_mark_uptodate(page, offset, count);
196 nfs_unlock_request(req); 194 nfs_unlock_request(req);
197 return 0; 195 return 0;
198} 196}
@@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page)
253static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 251static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
254 struct page *page) 252 struct page *page)
255{ 253{
254 struct inode *inode = page->mapping->host;
255 struct nfs_inode *nfsi = NFS_I(inode);
256 struct nfs_page *req; 256 struct nfs_page *req;
257 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
258 spinlock_t *req_lock = &nfsi->req_lock;
259 int ret; 257 int ret;
260 258
261 spin_lock(req_lock); 259 spin_lock(&inode->i_lock);
262 for(;;) { 260 for(;;) {
263 req = nfs_page_find_request_locked(page); 261 req = nfs_page_find_request_locked(page);
264 if (req == NULL) { 262 if (req == NULL) {
265 spin_unlock(req_lock); 263 spin_unlock(&inode->i_lock);
266 return 1; 264 return 1;
267 } 265 }
268 if (nfs_lock_request_dontget(req)) 266 if (nfs_lock_request_dontget(req))
@@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
272 * succeed provided that someone hasn't already marked the 270 * succeed provided that someone hasn't already marked the
273 * request as dirty (in which case we don't care). 271 * request as dirty (in which case we don't care).
274 */ 272 */
275 spin_unlock(req_lock); 273 spin_unlock(&inode->i_lock);
276 ret = nfs_wait_on_request(req); 274 ret = nfs_wait_on_request(req);
277 nfs_release_request(req); 275 nfs_release_request(req);
278 if (ret != 0) 276 if (ret != 0)
279 return ret; 277 return ret;
280 spin_lock(req_lock); 278 spin_lock(&inode->i_lock);
281 } 279 }
282 if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { 280 if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
283 /* This request is marked for commit */ 281 /* This request is marked for commit */
284 spin_unlock(req_lock); 282 spin_unlock(&inode->i_lock);
285 nfs_unlock_request(req); 283 nfs_unlock_request(req);
286 nfs_pageio_complete(pgio); 284 nfs_pageio_complete(pgio);
287 return 1; 285 return 1;
288 } 286 }
289 if (nfs_set_page_writeback(page) != 0) { 287 if (nfs_set_page_writeback(page) != 0) {
290 spin_unlock(req_lock); 288 spin_unlock(&inode->i_lock);
291 BUG(); 289 BUG();
292 } 290 }
293 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, 291 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
294 NFS_PAGE_TAG_WRITEBACK); 292 NFS_PAGE_TAG_LOCKED);
295 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); 293 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
296 spin_unlock(req_lock); 294 spin_unlock(&inode->i_lock);
297 nfs_pageio_add_request(pgio, req); 295 nfs_pageio_add_request(pgio, req);
298 return ret; 296 return ret;
299} 297}
@@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
400 if (PageDirty(req->wb_page)) 398 if (PageDirty(req->wb_page))
401 set_bit(PG_NEED_FLUSH, &req->wb_flags); 399 set_bit(PG_NEED_FLUSH, &req->wb_flags);
402 nfsi->npages++; 400 nfsi->npages++;
403 atomic_inc(&req->wb_count); 401 kref_get(&req->wb_kref);
404 return 0; 402 return 0;
405} 403}
406 404
@@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
409 */ 407 */
410static void nfs_inode_remove_request(struct nfs_page *req) 408static void nfs_inode_remove_request(struct nfs_page *req)
411{ 409{
412 struct inode *inode = req->wb_context->dentry->d_inode; 410 struct inode *inode = req->wb_context->path.dentry->d_inode;
413 struct nfs_inode *nfsi = NFS_I(inode); 411 struct nfs_inode *nfsi = NFS_I(inode);
414 412
415 BUG_ON (!NFS_WBACK_BUSY(req)); 413 BUG_ON (!NFS_WBACK_BUSY(req));
416 414
417 spin_lock(&nfsi->req_lock); 415 spin_lock(&inode->i_lock);
418 set_page_private(req->wb_page, 0); 416 set_page_private(req->wb_page, 0);
419 ClearPagePrivate(req->wb_page); 417 ClearPagePrivate(req->wb_page);
420 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 418 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
@@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
422 __set_page_dirty_nobuffers(req->wb_page); 420 __set_page_dirty_nobuffers(req->wb_page);
423 nfsi->npages--; 421 nfsi->npages--;
424 if (!nfsi->npages) { 422 if (!nfsi->npages) {
425 spin_unlock(&nfsi->req_lock); 423 spin_unlock(&inode->i_lock);
426 nfs_end_data_update(inode); 424 nfs_end_data_update(inode);
427 iput(inode); 425 iput(inode);
428 } else 426 } else
429 spin_unlock(&nfsi->req_lock); 427 spin_unlock(&inode->i_lock);
430 nfs_clear_request(req); 428 nfs_clear_request(req);
431 nfs_release_request(req); 429 nfs_release_request(req);
432} 430}
@@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req)
457static void 455static void
458nfs_mark_request_commit(struct nfs_page *req) 456nfs_mark_request_commit(struct nfs_page *req)
459{ 457{
460 struct inode *inode = req->wb_context->dentry->d_inode; 458 struct inode *inode = req->wb_context->path.dentry->d_inode;
461 struct nfs_inode *nfsi = NFS_I(inode); 459 struct nfs_inode *nfsi = NFS_I(inode);
462 460
463 spin_lock(&nfsi->req_lock); 461 spin_lock(&inode->i_lock);
464 nfs_list_add_request(req, &nfsi->commit);
465 nfsi->ncommit++; 462 nfsi->ncommit++;
466 set_bit(PG_NEED_COMMIT, &(req)->wb_flags); 463 set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
467 spin_unlock(&nfsi->req_lock); 464 radix_tree_tag_set(&nfsi->nfs_page_tree,
465 req->wb_index,
466 NFS_PAGE_TAG_COMMIT);
467 spin_unlock(&inode->i_lock);
468 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 468 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
469 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 469 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
470} 470}
@@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
526 idx_end = idx_start + npages - 1; 526 idx_end = idx_start + npages - 1;
527 527
528 next = idx_start; 528 next = idx_start;
529 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { 529 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
530 if (req->wb_index > idx_end) 530 if (req->wb_index > idx_end)
531 break; 531 break;
532 532
533 next = req->wb_index + 1; 533 next = req->wb_index + 1;
534 BUG_ON(!NFS_WBACK_BUSY(req)); 534 BUG_ON(!NFS_WBACK_BUSY(req));
535 535
536 atomic_inc(&req->wb_count); 536 kref_get(&req->wb_kref);
537 spin_unlock(&nfsi->req_lock); 537 spin_unlock(&inode->i_lock);
538 error = nfs_wait_on_request(req); 538 error = nfs_wait_on_request(req);
539 nfs_release_request(req); 539 nfs_release_request(req);
540 spin_lock(&nfsi->req_lock); 540 spin_lock(&inode->i_lock);
541 if (error < 0) 541 if (error < 0)
542 return error; 542 return error;
543 res++; 543 res++;
@@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
577 int res = 0; 577 int res = 0;
578 578
579 if (nfsi->ncommit != 0) { 579 if (nfsi->ncommit != 0) {
580 res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); 580 res = nfs_scan_list(nfsi, dst, idx_start, npages,
581 NFS_PAGE_TAG_COMMIT);
581 nfsi->ncommit -= res; 582 nfsi->ncommit -= res;
582 if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
583 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
584 } 583 }
585 return res; 584 return res;
586} 585}
@@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
603{ 602{
604 struct address_space *mapping = page->mapping; 603 struct address_space *mapping = page->mapping;
605 struct inode *inode = mapping->host; 604 struct inode *inode = mapping->host;
606 struct nfs_inode *nfsi = NFS_I(inode);
607 struct nfs_page *req, *new = NULL; 605 struct nfs_page *req, *new = NULL;
608 pgoff_t rqend, end; 606 pgoff_t rqend, end;
609 607
@@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
613 /* Loop over all inode entries and see if we find 611 /* Loop over all inode entries and see if we find
614 * A request for the page we wish to update 612 * A request for the page we wish to update
615 */ 613 */
616 spin_lock(&nfsi->req_lock); 614 spin_lock(&inode->i_lock);
617 req = nfs_page_find_request_locked(page); 615 req = nfs_page_find_request_locked(page);
618 if (req) { 616 if (req) {
619 if (!nfs_lock_request_dontget(req)) { 617 if (!nfs_lock_request_dontget(req)) {
620 int error; 618 int error;
621 619
622 spin_unlock(&nfsi->req_lock); 620 spin_unlock(&inode->i_lock);
623 error = nfs_wait_on_request(req); 621 error = nfs_wait_on_request(req);
624 nfs_release_request(req); 622 nfs_release_request(req);
625 if (error < 0) { 623 if (error < 0) {
@@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
629 } 627 }
630 continue; 628 continue;
631 } 629 }
632 spin_unlock(&nfsi->req_lock); 630 spin_unlock(&inode->i_lock);
633 if (new) 631 if (new)
634 nfs_release_request(new); 632 nfs_release_request(new);
635 break; 633 break;
@@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
640 nfs_lock_request_dontget(new); 638 nfs_lock_request_dontget(new);
641 error = nfs_inode_add_request(inode, new); 639 error = nfs_inode_add_request(inode, new);
642 if (error) { 640 if (error) {
643 spin_unlock(&nfsi->req_lock); 641 spin_unlock(&inode->i_lock);
644 nfs_unlock_request(new); 642 nfs_unlock_request(new);
645 return ERR_PTR(error); 643 return ERR_PTR(error);
646 } 644 }
647 spin_unlock(&nfsi->req_lock); 645 spin_unlock(&inode->i_lock);
648 return new; 646 return new;
649 } 647 }
650 spin_unlock(&nfsi->req_lock); 648 spin_unlock(&inode->i_lock);
651 649
652 new = nfs_create_request(ctx, inode, page, offset, bytes); 650 new = nfs_create_request(ctx, inode, page, offset, bytes);
653 if (IS_ERR(new)) 651 if (IS_ERR(new))
@@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page,
751static void nfs_writepage_release(struct nfs_page *req) 749static void nfs_writepage_release(struct nfs_page *req)
752{ 750{
753 751
754 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { 752 if (PageError(req->wb_page)) {
753 nfs_end_page_writeback(req->wb_page);
754 nfs_inode_remove_request(req);
755 } else if (!nfs_reschedule_unstable_write(req)) {
756 /* Set the PG_uptodate flag */
757 nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
755 nfs_end_page_writeback(req->wb_page); 758 nfs_end_page_writeback(req->wb_page);
756 nfs_inode_remove_request(req); 759 nfs_inode_remove_request(req);
757 } else 760 } else
758 nfs_end_page_writeback(req->wb_page); 761 nfs_end_page_writeback(req->wb_page);
759 nfs_clear_page_writeback(req); 762 nfs_clear_page_tag_locked(req);
760} 763}
761 764
762static inline int flush_task_priority(int how) 765static inline int flush_task_priority(int how)
@@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
786 * NB: take care not to mess about with data->commit et al. */ 789 * NB: take care not to mess about with data->commit et al. */
787 790
788 data->req = req; 791 data->req = req;
789 data->inode = inode = req->wb_context->dentry->d_inode; 792 data->inode = inode = req->wb_context->path.dentry->d_inode;
790 data->cred = req->wb_context->cred; 793 data->cred = req->wb_context->cred;
791 794
792 data->args.fh = NFS_FH(inode); 795 data->args.fh = NFS_FH(inode);
@@ -885,7 +888,7 @@ out_bad:
885 } 888 }
886 nfs_redirty_request(req); 889 nfs_redirty_request(req);
887 nfs_end_page_writeback(req->wb_page); 890 nfs_end_page_writeback(req->wb_page);
888 nfs_clear_page_writeback(req); 891 nfs_clear_page_tag_locked(req);
889 return -ENOMEM; 892 return -ENOMEM;
890} 893}
891 894
@@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
928 nfs_list_remove_request(req); 931 nfs_list_remove_request(req);
929 nfs_redirty_request(req); 932 nfs_redirty_request(req);
930 nfs_end_page_writeback(req->wb_page); 933 nfs_end_page_writeback(req->wb_page);
931 nfs_clear_page_writeback(req); 934 nfs_clear_page_tag_locked(req);
932 } 935 }
933 return -ENOMEM; 936 return -ENOMEM;
934} 937}
@@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
954 struct page *page = req->wb_page; 957 struct page *page = req->wb_page;
955 958
956 dprintk("NFS: write (%s/%Ld %d@%Ld)", 959 dprintk("NFS: write (%s/%Ld %d@%Ld)",
957 req->wb_context->dentry->d_inode->i_sb->s_id, 960 req->wb_context->path.dentry->d_inode->i_sb->s_id,
958 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 961 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
959 req->wb_bytes, 962 req->wb_bytes,
960 (long long)req_offset(req)); 963 (long long)req_offset(req));
961 964
@@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
970 } 973 }
971 974
972 if (nfs_write_need_commit(data)) { 975 if (nfs_write_need_commit(data)) {
973 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; 976 struct inode *inode = page->mapping->host;
974 977
975 spin_lock(req_lock); 978 spin_lock(&inode->i_lock);
976 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { 979 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
977 /* Do nothing we need to resend the writes */ 980 /* Do nothing we need to resend the writes */
978 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { 981 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
@@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
983 clear_bit(PG_NEED_COMMIT, &req->wb_flags); 986 clear_bit(PG_NEED_COMMIT, &req->wb_flags);
984 dprintk(" server reboot detected\n"); 987 dprintk(" server reboot detected\n");
985 } 988 }
986 spin_unlock(req_lock); 989 spin_unlock(&inode->i_lock);
987 } else 990 } else
988 dprintk(" OK\n"); 991 dprintk(" OK\n");
989 992
@@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1020 page = req->wb_page; 1023 page = req->wb_page;
1021 1024
1022 dprintk("NFS: write (%s/%Ld %d@%Ld)", 1025 dprintk("NFS: write (%s/%Ld %d@%Ld)",
1023 req->wb_context->dentry->d_inode->i_sb->s_id, 1026 req->wb_context->path.dentry->d_inode->i_sb->s_id,
1024 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 1027 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
1025 req->wb_bytes, 1028 req->wb_bytes,
1026 (long long)req_offset(req)); 1029 (long long)req_offset(req));
1027 1030
@@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1039 dprintk(" marked for commit\n"); 1042 dprintk(" marked for commit\n");
1040 goto next; 1043 goto next;
1041 } 1044 }
1045 /* Set the PG_uptodate flag? */
1046 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
1042 dprintk(" OK\n"); 1047 dprintk(" OK\n");
1043remove_request: 1048remove_request:
1044 nfs_end_page_writeback(page); 1049 nfs_end_page_writeback(page);
1045 nfs_inode_remove_request(req); 1050 nfs_inode_remove_request(req);
1046 next: 1051 next:
1047 nfs_clear_page_writeback(req); 1052 nfs_clear_page_tag_locked(req);
1048 } 1053 }
1049} 1054}
1050 1055
@@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
1157 1162
1158 list_splice_init(head, &data->pages); 1163 list_splice_init(head, &data->pages);
1159 first = nfs_list_entry(data->pages.next); 1164 first = nfs_list_entry(data->pages.next);
1160 inode = first->wb_context->dentry->d_inode; 1165 inode = first->wb_context->path.dentry->d_inode;
1161 1166
1162 data->inode = inode; 1167 data->inode = inode;
1163 data->cred = first->wb_context->cred; 1168 data->cred = first->wb_context->cred;
@@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1207 nfs_list_remove_request(req); 1212 nfs_list_remove_request(req);
1208 nfs_mark_request_commit(req); 1213 nfs_mark_request_commit(req);
1209 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1214 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1210 nfs_clear_page_writeback(req); 1215 nfs_clear_page_tag_locked(req);
1211 } 1216 }
1212 return -ENOMEM; 1217 return -ENOMEM;
1213} 1218}
@@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1234 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1239 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1235 1240
1236 dprintk("NFS: commit (%s/%Ld %d@%Ld)", 1241 dprintk("NFS: commit (%s/%Ld %d@%Ld)",
1237 req->wb_context->dentry->d_inode->i_sb->s_id, 1242 req->wb_context->path.dentry->d_inode->i_sb->s_id,
1238 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 1243 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
1239 req->wb_bytes, 1244 req->wb_bytes,
1240 (long long)req_offset(req)); 1245 (long long)req_offset(req));
1241 if (task->tk_status < 0) { 1246 if (task->tk_status < 0) {
@@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1249 * returned by the server against all stored verfs. */ 1254 * returned by the server against all stored verfs. */
1250 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { 1255 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1251 /* We have a match */ 1256 /* We have a match */
1257 /* Set the PG_uptodate flag */
1258 nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
1259 req->wb_bytes);
1252 nfs_inode_remove_request(req); 1260 nfs_inode_remove_request(req);
1253 dprintk(" OK\n"); 1261 dprintk(" OK\n");
1254 goto next; 1262 goto next;
@@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1257 dprintk(" mismatch\n"); 1265 dprintk(" mismatch\n");
1258 nfs_redirty_request(req); 1266 nfs_redirty_request(req);
1259 next: 1267 next:
1260 nfs_clear_page_writeback(req); 1268 nfs_clear_page_tag_locked(req);
1261 } 1269 }
1262} 1270}
1263 1271
@@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = {
1268 1276
1269int nfs_commit_inode(struct inode *inode, int how) 1277int nfs_commit_inode(struct inode *inode, int how)
1270{ 1278{
1271 struct nfs_inode *nfsi = NFS_I(inode);
1272 LIST_HEAD(head); 1279 LIST_HEAD(head);
1273 int res; 1280 int res;
1274 1281
1275 spin_lock(&nfsi->req_lock); 1282 spin_lock(&inode->i_lock);
1276 res = nfs_scan_commit(inode, &head, 0, 0); 1283 res = nfs_scan_commit(inode, &head, 0, 0);
1277 spin_unlock(&nfsi->req_lock); 1284 spin_unlock(&inode->i_lock);
1278 if (res) { 1285 if (res) {
1279 int error = nfs_commit_list(inode, &head, how); 1286 int error = nfs_commit_list(inode, &head, how);
1280 if (error < 0) 1287 if (error < 0)
@@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
1292long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) 1299long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
1293{ 1300{
1294 struct inode *inode = mapping->host; 1301 struct inode *inode = mapping->host;
1295 struct nfs_inode *nfsi = NFS_I(inode);
1296 pgoff_t idx_start, idx_end; 1302 pgoff_t idx_start, idx_end;
1297 unsigned int npages = 0; 1303 unsigned int npages = 0;
1298 LIST_HEAD(head); 1304 LIST_HEAD(head);
@@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
1314 } 1320 }
1315 } 1321 }
1316 how &= ~FLUSH_NOCOMMIT; 1322 how &= ~FLUSH_NOCOMMIT;
1317 spin_lock(&nfsi->req_lock); 1323 spin_lock(&inode->i_lock);
1318 do { 1324 do {
1319 ret = nfs_wait_on_requests_locked(inode, idx_start, npages); 1325 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1320 if (ret != 0) 1326 if (ret != 0)
@@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
1325 if (pages == 0) 1331 if (pages == 0)
1326 break; 1332 break;
1327 if (how & FLUSH_INVALIDATE) { 1333 if (how & FLUSH_INVALIDATE) {
1328 spin_unlock(&nfsi->req_lock); 1334 spin_unlock(&inode->i_lock);
1329 nfs_cancel_commit_list(&head); 1335 nfs_cancel_commit_list(&head);
1330 ret = pages; 1336 ret = pages;
1331 spin_lock(&nfsi->req_lock); 1337 spin_lock(&inode->i_lock);
1332 continue; 1338 continue;
1333 } 1339 }
1334 pages += nfs_scan_commit(inode, &head, 0, 0); 1340 pages += nfs_scan_commit(inode, &head, 0, 0);
1335 spin_unlock(&nfsi->req_lock); 1341 spin_unlock(&inode->i_lock);
1336 ret = nfs_commit_list(inode, &head, how); 1342 ret = nfs_commit_list(inode, &head, how);
1337 spin_lock(&nfsi->req_lock); 1343 spin_lock(&inode->i_lock);
1344
1338 } while (ret >= 0); 1345 } while (ret >= 0);
1339 spin_unlock(&nfsi->req_lock); 1346 spin_unlock(&inode->i_lock);
1340 return ret; 1347 return ret;
1341} 1348}
1342 1349
@@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page)
1430{ 1437{
1431 struct address_space *mapping = page->mapping; 1438 struct address_space *mapping = page->mapping;
1432 struct inode *inode; 1439 struct inode *inode;
1433 spinlock_t *req_lock;
1434 struct nfs_page *req; 1440 struct nfs_page *req;
1435 int ret; 1441 int ret;
1436 1442
@@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page)
1439 inode = mapping->host; 1445 inode = mapping->host;
1440 if (!inode) 1446 if (!inode)
1441 goto out_raced; 1447 goto out_raced;
1442 req_lock = &NFS_I(inode)->req_lock; 1448 spin_lock(&inode->i_lock);
1443 spin_lock(req_lock);
1444 req = nfs_page_find_request_locked(page); 1449 req = nfs_page_find_request_locked(page);
1445 if (req != NULL) { 1450 if (req != NULL) {
1446 /* Mark any existing write requests for flushing */ 1451 /* Mark any existing write requests for flushing */
1447 ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); 1452 ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
1448 spin_unlock(req_lock); 1453 spin_unlock(&inode->i_lock);
1449 nfs_release_request(req); 1454 nfs_release_request(req);
1450 return ret; 1455 return ret;
1451 } 1456 }
1452 ret = __set_page_dirty_nobuffers(page); 1457 ret = __set_page_dirty_nobuffers(page);
1453 spin_unlock(req_lock); 1458 spin_unlock(&inode->i_lock);
1454 return ret; 1459 return ret;
1455out_raced: 1460out_raced:
1456 return !TestSetPageDirty(page); 1461 return !TestSetPageDirty(page);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 864090edc28b..5443c52b57aa 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
394 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 394 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
395 .rpc_argp = clp, 395 .rpc_argp = clp,
396 }; 396 };
397 char clientname[16];
398 int status; 397 int status;
399 398
400 if (atomic_read(&cb->cb_set)) 399 if (atomic_read(&cb->cb_set))
@@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
417 memset(program->stats, 0, sizeof(cb->cb_stat)); 416 memset(program->stats, 0, sizeof(cb->cb_stat));
418 program->stats->program = program; 417 program->stats->program = program;
419 418
420 /* Just here to make some printk's more useful: */
421 snprintf(clientname, sizeof(clientname),
422 "%u.%u.%u.%u", NIPQUAD(addr.sin_addr));
423 args.servername = clientname;
424
425 /* Create RPC client */ 419 /* Create RPC client */
426 cb->cb_client = rpc_create(&args); 420 cb->cb_client = rpc_create(&args);
427 if (IS_ERR(cb->cb_client)) { 421 if (IS_ERR(cb->cb_client)) {
@@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp)
429 goto out_err; 423 goto out_err;
430 } 424 }
431 425
432 /* Kick rpciod, put the call on the wire. */
433 if (rpciod_up() != 0)
434 goto out_clnt;
435
436 /* the task holds a reference to the nfs4_client struct */ 426 /* the task holds a reference to the nfs4_client struct */
437 atomic_inc(&clp->cl_count); 427 atomic_inc(&clp->cl_count);
438 428
439 msg.rpc_cred = nfsd4_lookupcred(clp,0); 429 msg.rpc_cred = nfsd4_lookupcred(clp,0);
440 if (IS_ERR(msg.rpc_cred)) 430 if (IS_ERR(msg.rpc_cred))
441 goto out_rpciod; 431 goto out_release_clp;
442 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); 432 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
443 put_rpccred(msg.rpc_cred); 433 put_rpccred(msg.rpc_cred);
444 434
445 if (status != 0) { 435 if (status != 0) {
446 dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); 436 dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
447 goto out_rpciod; 437 goto out_release_clp;
448 } 438 }
449 return; 439 return;
450 440
451out_rpciod: 441out_release_clp:
452 atomic_dec(&clp->cl_count); 442 atomic_dec(&clp->cl_count);
453 rpciod_down();
454out_clnt:
455 rpc_shutdown_client(cb->cb_client); 443 rpc_shutdown_client(cb->cb_client);
456out_err: 444out_err:
457 cb->cb_client = NULL; 445 cb->cb_client = NULL;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3cc8ce422ab1..8c52913d7cb6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp)
378 if (clnt) { 378 if (clnt) {
379 clp->cl_callback.cb_client = NULL; 379 clp->cl_callback.cb_client = NULL;
380 rpc_shutdown_client(clnt); 380 rpc_shutdown_client(clnt);
381 rpciod_down();
382 } 381 }
383} 382}
384 383
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8604e35bd48e..945b1cedde2b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -879,6 +879,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
879 .u.data = rqstp, 879 .u.data = rqstp,
880 }; 880 };
881 881
882 rqstp->rq_resused = 1;
882 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); 883 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
883 } else { 884 } else {
884 oldfs = get_fs(); 885 oldfs = get_fs();
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 2b205f5d5790..e9e042b93dbf 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -74,7 +74,6 @@ struct mlog_attribute {
74#define define_mask(_name) { \ 74#define define_mask(_name) { \
75 .attr = { \ 75 .attr = { \
76 .name = #_name, \ 76 .name = #_name, \
77 .owner = THIS_MODULE, \
78 .mode = S_IRUGO | S_IWUSR, \ 77 .mode = S_IRUGO | S_IWUSR, \
79 }, \ 78 }, \
80 .mask = ML_##_name, \ 79 .mask = ML_##_name, \
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9a3a058f3553..98e0b85a9bb2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -397,7 +397,6 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
397 static struct attribute addpartattr = { 397 static struct attribute addpartattr = {
398 .name = "whole_disk", 398 .name = "whole_disk",
399 .mode = S_IRUSR | S_IRGRP | S_IROTH, 399 .mode = S_IRUSR | S_IRGRP | S_IROTH,
400 .owner = THIS_MODULE,
401 }; 400 };
402 401
403 sysfs_create_file(&p->kobj, &addpartattr); 402 sysfs_create_file(&p->kobj, &addpartattr);
diff --git a/fs/splice.c b/fs/splice.c
index ed2ce995475c..6c9828651e6f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -28,6 +28,7 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h> 30#include <linux/uio.h>
31#include <linux/security.h>
31 32
32/* 33/*
33 * Attempt to steal a page from a pipe buffer. This should perhaps go into 34 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -491,7 +492,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
491 492
492 ret = 0; 493 ret = 0;
493 spliced = 0; 494 spliced = 0;
494 while (len) { 495 while (len && !spliced) {
495 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 496 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
496 497
497 if (ret < 0) 498 if (ret < 0)
@@ -961,6 +962,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
961 if (unlikely(ret < 0)) 962 if (unlikely(ret < 0))
962 return ret; 963 return ret;
963 964
965 ret = security_file_permission(out, MAY_WRITE);
966 if (unlikely(ret < 0))
967 return ret;
968
964 return out->f_op->splice_write(pipe, out, ppos, len, flags); 969 return out->f_op->splice_write(pipe, out, ppos, len, flags);
965} 970}
966 971
@@ -983,6 +988,10 @@ static long do_splice_to(struct file *in, loff_t *ppos,
983 if (unlikely(ret < 0)) 988 if (unlikely(ret < 0))
984 return ret; 989 return ret;
985 990
991 ret = security_file_permission(in, MAY_READ);
992 if (unlikely(ret < 0))
993 return ret;
994
986 return in->f_op->splice_read(in, ppos, pipe, len, flags); 995 return in->f_op->splice_read(in, ppos, pipe, len, flags);
987} 996}
988 997
@@ -1051,15 +1060,10 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1051 sd->flags &= ~SPLICE_F_NONBLOCK; 1060 sd->flags &= ~SPLICE_F_NONBLOCK;
1052 1061
1053 while (len) { 1062 while (len) {
1054 size_t read_len, max_read_len; 1063 size_t read_len;
1055
1056 /*
1057 * Do at most PIPE_BUFFERS pages worth of transfer:
1058 */
1059 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
1060 1064
1061 ret = do_splice_to(in, &sd->pos, pipe, max_read_len, flags); 1065 ret = do_splice_to(in, &sd->pos, pipe, len, flags);
1062 if (unlikely(ret < 0)) 1066 if (unlikely(ret <= 0))
1063 goto out_release; 1067 goto out_release;
1064 1068
1065 read_len = ret; 1069 read_len = ret;
@@ -1071,26 +1075,17 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1071 * could get stuck data in the internal pipe: 1075 * could get stuck data in the internal pipe:
1072 */ 1076 */
1073 ret = actor(pipe, sd); 1077 ret = actor(pipe, sd);
1074 if (unlikely(ret < 0)) 1078 if (unlikely(ret <= 0))
1075 goto out_release; 1079 goto out_release;
1076 1080
1077 bytes += ret; 1081 bytes += ret;
1078 len -= ret; 1082 len -= ret;
1079 1083
1080 /* 1084 if (ret < read_len)
1081 * In nonblocking mode, if we got back a short read then 1085 goto out_release;
1082 * that was due to either an IO error or due to the
1083 * pagecache entry not being there. In the IO error case
1084 * the _next_ splice attempt will produce a clean IO error
1085 * return value (not a short read), so in both cases it's
1086 * correct to break out of the loop here:
1087 */
1088 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
1089 break;
1090 } 1086 }
1091 1087
1092 pipe->nrbufs = pipe->curbuf = 0; 1088 pipe->nrbufs = pipe->curbuf = 0;
1093
1094 return bytes; 1089 return bytes;
1095 1090
1096out_release: 1091out_release:
@@ -1152,10 +1147,12 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1152 .pos = *ppos, 1147 .pos = *ppos,
1153 .u.file = out, 1148 .u.file = out,
1154 }; 1149 };
1155 size_t ret; 1150 long ret;
1156 1151
1157 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 1152 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1158 *ppos = sd.pos; 1153 if (ret > 0)
1154 *ppos += ret;
1155
1159 return ret; 1156 return ret;
1160} 1157}
1161 1158
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index d3b9f5f07db1..135353f8a296 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -20,29 +20,41 @@
20 20
21#include "sysfs.h" 21#include "sysfs.h"
22 22
23struct bin_buffer {
24 struct mutex mutex;
25 void *buffer;
26 int mmapped;
27};
28
23static int 29static int
24fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) 30fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
25{ 31{
26 struct bin_attribute * attr = to_bin_attr(dentry); 32 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
27 struct kobject * kobj = to_kobj(dentry->d_parent); 33 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
34 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
35 int rc;
36
37 /* need attr_sd for attr, its parent for kobj */
38 if (!sysfs_get_active_two(attr_sd))
39 return -ENODEV;
28 40
29 if (!attr->read) 41 rc = -EIO;
30 return -EIO; 42 if (attr->read)
43 rc = attr->read(kobj, attr, buffer, off, count);
31 44
32 return attr->read(kobj, buffer, off, count); 45 sysfs_put_active_two(attr_sd);
46
47 return rc;
33} 48}
34 49
35static ssize_t 50static ssize_t
36read(struct file * file, char __user * userbuf, size_t count, loff_t * off) 51read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
37{ 52{
38 char *buffer = file->private_data; 53 struct bin_buffer *bb = file->private_data;
39 struct dentry *dentry = file->f_path.dentry; 54 struct dentry *dentry = file->f_path.dentry;
40 int size = dentry->d_inode->i_size; 55 int size = dentry->d_inode->i_size;
41 loff_t offs = *off; 56 loff_t offs = *off;
42 int ret; 57 int count = min_t(size_t, bytes, PAGE_SIZE);
43
44 if (count > PAGE_SIZE)
45 count = PAGE_SIZE;
46 58
47 if (size) { 59 if (size) {
48 if (offs > size) 60 if (offs > size)
@@ -51,43 +63,56 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
51 count = size - offs; 63 count = size - offs;
52 } 64 }
53 65
54 ret = fill_read(dentry, buffer, offs, count); 66 mutex_lock(&bb->mutex);
55 if (ret < 0) 67
56 return ret; 68 count = fill_read(dentry, bb->buffer, offs, count);
57 count = ret; 69 if (count < 0)
70 goto out_unlock;
58 71
59 if (copy_to_user(userbuf, buffer, count)) 72 if (copy_to_user(userbuf, bb->buffer, count)) {
60 return -EFAULT; 73 count = -EFAULT;
74 goto out_unlock;
75 }
61 76
62 pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); 77 pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
63 78
64 *off = offs + count; 79 *off = offs + count;
65 80
81 out_unlock:
82 mutex_unlock(&bb->mutex);
66 return count; 83 return count;
67} 84}
68 85
69static int 86static int
70flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) 87flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
71{ 88{
72 struct bin_attribute *attr = to_bin_attr(dentry); 89 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
73 struct kobject *kobj = to_kobj(dentry->d_parent); 90 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
91 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
92 int rc;
93
94 /* need attr_sd for attr, its parent for kobj */
95 if (!sysfs_get_active_two(attr_sd))
96 return -ENODEV;
97
98 rc = -EIO;
99 if (attr->write)
100 rc = attr->write(kobj, attr, buffer, offset, count);
74 101
75 if (!attr->write) 102 sysfs_put_active_two(attr_sd);
76 return -EIO;
77 103
78 return attr->write(kobj, buffer, offset, count); 104 return rc;
79} 105}
80 106
81static ssize_t write(struct file * file, const char __user * userbuf, 107static ssize_t write(struct file *file, const char __user *userbuf,
82 size_t count, loff_t * off) 108 size_t bytes, loff_t *off)
83{ 109{
84 char *buffer = file->private_data; 110 struct bin_buffer *bb = file->private_data;
85 struct dentry *dentry = file->f_path.dentry; 111 struct dentry *dentry = file->f_path.dentry;
86 int size = dentry->d_inode->i_size; 112 int size = dentry->d_inode->i_size;
87 loff_t offs = *off; 113 loff_t offs = *off;
114 int count = min_t(size_t, bytes, PAGE_SIZE);
88 115
89 if (count > PAGE_SIZE)
90 count = PAGE_SIZE;
91 if (size) { 116 if (size) {
92 if (offs > size) 117 if (offs > size)
93 return 0; 118 return 0;
@@ -95,72 +120,100 @@ static ssize_t write(struct file * file, const char __user * userbuf,
95 count = size - offs; 120 count = size - offs;
96 } 121 }
97 122
98 if (copy_from_user(buffer, userbuf, count)) 123 mutex_lock(&bb->mutex);
99 return -EFAULT;
100 124
101 count = flush_write(dentry, buffer, offs, count); 125 if (copy_from_user(bb->buffer, userbuf, count)) {
126 count = -EFAULT;
127 goto out_unlock;
128 }
129
130 count = flush_write(dentry, bb->buffer, offs, count);
102 if (count > 0) 131 if (count > 0)
103 *off = offs + count; 132 *off = offs + count;
133
134 out_unlock:
135 mutex_unlock(&bb->mutex);
104 return count; 136 return count;
105} 137}
106 138
107static int mmap(struct file *file, struct vm_area_struct *vma) 139static int mmap(struct file *file, struct vm_area_struct *vma)
108{ 140{
109 struct dentry *dentry = file->f_path.dentry; 141 struct bin_buffer *bb = file->private_data;
110 struct bin_attribute *attr = to_bin_attr(dentry); 142 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
111 struct kobject *kobj = to_kobj(dentry->d_parent); 143 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
144 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
145 int rc;
146
147 mutex_lock(&bb->mutex);
148
149 /* need attr_sd for attr, its parent for kobj */
150 if (!sysfs_get_active_two(attr_sd))
151 return -ENODEV;
112 152
113 if (!attr->mmap) 153 rc = -EINVAL;
114 return -EINVAL; 154 if (attr->mmap)
155 rc = attr->mmap(kobj, attr, vma);
115 156
116 return attr->mmap(kobj, attr, vma); 157 if (rc == 0 && !bb->mmapped)
158 bb->mmapped = 1;
159 else
160 sysfs_put_active_two(attr_sd);
161
162 mutex_unlock(&bb->mutex);
163
164 return rc;
117} 165}
118 166
119static int open(struct inode * inode, struct file * file) 167static int open(struct inode * inode, struct file * file)
120{ 168{
121 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); 169 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
122 struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); 170 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
123 int error = -EINVAL; 171 struct bin_buffer *bb = NULL;
124 172 int error;
125 if (!kobj || !attr)
126 goto Done;
127 173
128 /* Grab the module reference for this attribute if we have one */ 174 /* need attr_sd for attr */
129 error = -ENODEV; 175 if (!sysfs_get_active(attr_sd))
130 if (!try_module_get(attr->attr.owner)) 176 return -ENODEV;
131 goto Done;
132 177
133 error = -EACCES; 178 error = -EACCES;
134 if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) 179 if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
135 goto Error; 180 goto err_out;
136 if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) 181 if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
137 goto Error; 182 goto err_out;
138 183
139 error = -ENOMEM; 184 error = -ENOMEM;
140 file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); 185 bb = kzalloc(sizeof(*bb), GFP_KERNEL);
141 if (!file->private_data) 186 if (!bb)
142 goto Error; 187 goto err_out;
143 188
144 error = 0; 189 bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
145 goto Done; 190 if (!bb->buffer)
146 191 goto err_out;
147 Error: 192
148 module_put(attr->attr.owner); 193 mutex_init(&bb->mutex);
149 Done: 194 file->private_data = bb;
150 if (error) 195
151 kobject_put(kobj); 196 /* open succeeded, put active reference and pin attr_sd */
197 sysfs_put_active(attr_sd);
198 sysfs_get(attr_sd);
199 return 0;
200
201 err_out:
202 sysfs_put_active(attr_sd);
203 kfree(bb);
152 return error; 204 return error;
153} 205}
154 206
155static int release(struct inode * inode, struct file * file) 207static int release(struct inode * inode, struct file * file)
156{ 208{
157 struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent); 209 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
158 struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); 210 struct bin_buffer *bb = file->private_data;
159 u8 * buffer = file->private_data; 211
160 212 if (bb->mmapped)
161 kobject_put(kobj); 213 sysfs_put_active_two(attr_sd);
162 module_put(attr->attr.owner); 214 sysfs_put(attr_sd);
163 kfree(buffer); 215 kfree(bb->buffer);
216 kfree(bb);
164 return 0; 217 return 0;
165} 218}
166 219
@@ -181,9 +234,9 @@ const struct file_operations bin_fops = {
181 234
182int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) 235int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
183{ 236{
184 BUG_ON(!kobj || !kobj->dentry || !attr); 237 BUG_ON(!kobj || !kobj->sd || !attr);
185 238
186 return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR); 239 return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
187} 240}
188 241
189 242
@@ -195,7 +248,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
195 248
196void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) 249void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
197{ 250{
198 if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) { 251 if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) {
199 printk(KERN_ERR "%s: " 252 printk(KERN_ERR "%s: "
200 "bad dentry or inode or no such file: \"%s\"\n", 253 "bad dentry or inode or no such file: \"%s\"\n",
201 __FUNCTION__, attr->attr.name); 254 __FUNCTION__, attr->attr.name);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index c4342a019972..aee966c44aac 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -9,21 +9,337 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/kobject.h> 10#include <linux/kobject.h>
11#include <linux/namei.h> 11#include <linux/namei.h>
12#include <linux/idr.h>
13#include <linux/completion.h>
12#include <asm/semaphore.h> 14#include <asm/semaphore.h>
13#include "sysfs.h" 15#include "sysfs.h"
14 16
15DECLARE_RWSEM(sysfs_rename_sem); 17DEFINE_MUTEX(sysfs_mutex);
16spinlock_t sysfs_lock = SPIN_LOCK_UNLOCKED; 18spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
19
20static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
21static DEFINE_IDA(sysfs_ino_ida);
22
23/**
24 * sysfs_link_sibling - link sysfs_dirent into sibling list
25 * @sd: sysfs_dirent of interest
26 *
27 * Link @sd into its sibling list which starts from
28 * sd->s_parent->s_children.
29 *
30 * Locking:
31 * mutex_lock(sysfs_mutex)
32 */
33void sysfs_link_sibling(struct sysfs_dirent *sd)
34{
35 struct sysfs_dirent *parent_sd = sd->s_parent;
36
37 BUG_ON(sd->s_sibling);
38 sd->s_sibling = parent_sd->s_children;
39 parent_sd->s_children = sd;
40}
41
42/**
43 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
44 * @sd: sysfs_dirent of interest
45 *
46 * Unlink @sd from its sibling list which starts from
47 * sd->s_parent->s_children.
48 *
49 * Locking:
50 * mutex_lock(sysfs_mutex)
51 */
52void sysfs_unlink_sibling(struct sysfs_dirent *sd)
53{
54 struct sysfs_dirent **pos;
55
56 for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
57 if (*pos == sd) {
58 *pos = sd->s_sibling;
59 sd->s_sibling = NULL;
60 break;
61 }
62 }
63}
64
65/**
66 * sysfs_get_dentry - get dentry for the given sysfs_dirent
67 * @sd: sysfs_dirent of interest
68 *
69 * Get dentry for @sd. Dentry is looked up if currently not
70 * present. This function climbs sysfs_dirent tree till it
71 * reaches a sysfs_dirent with valid dentry attached and descends
72 * down from there looking up dentry for each step.
73 *
74 * LOCKING:
75 * Kernel thread context (may sleep)
76 *
77 * RETURNS:
78 * Pointer to found dentry on success, ERR_PTR() value on error.
79 */
80struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
81{
82 struct sysfs_dirent *cur;
83 struct dentry *parent_dentry, *dentry;
84 int i, depth;
85
86 /* Find the first parent which has valid s_dentry and get the
87 * dentry.
88 */
89 mutex_lock(&sysfs_mutex);
90 restart0:
91 spin_lock(&sysfs_assoc_lock);
92 restart1:
93 spin_lock(&dcache_lock);
94
95 dentry = NULL;
96 depth = 0;
97 cur = sd;
98 while (!cur->s_dentry || !cur->s_dentry->d_inode) {
99 if (cur->s_flags & SYSFS_FLAG_REMOVED) {
100 dentry = ERR_PTR(-ENOENT);
101 depth = 0;
102 break;
103 }
104 cur = cur->s_parent;
105 depth++;
106 }
107 if (!IS_ERR(dentry))
108 dentry = dget_locked(cur->s_dentry);
109
110 spin_unlock(&dcache_lock);
111 spin_unlock(&sysfs_assoc_lock);
112
113 /* from the found dentry, look up depth times */
114 while (depth--) {
115 /* find and get depth'th ancestor */
116 for (cur = sd, i = 0; cur && i < depth; i++)
117 cur = cur->s_parent;
118
119 /* This can happen if tree structure was modified due
120 * to move/rename. Restart.
121 */
122 if (i != depth) {
123 dput(dentry);
124 goto restart0;
125 }
126
127 sysfs_get(cur);
128
129 mutex_unlock(&sysfs_mutex);
130
131 /* look it up */
132 parent_dentry = dentry;
133 dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
134 strlen(cur->s_name));
135 dput(parent_dentry);
136
137 if (IS_ERR(dentry)) {
138 sysfs_put(cur);
139 return dentry;
140 }
141
142 mutex_lock(&sysfs_mutex);
143 spin_lock(&sysfs_assoc_lock);
144
145 /* This, again, can happen if tree structure has
146 * changed and we looked up the wrong thing. Restart.
147 */
148 if (cur->s_dentry != dentry) {
149 dput(dentry);
150 sysfs_put(cur);
151 goto restart1;
152 }
153
154 spin_unlock(&sysfs_assoc_lock);
155
156 sysfs_put(cur);
157 }
158
159 mutex_unlock(&sysfs_mutex);
160 return dentry;
161}
162
163/**
164 * sysfs_get_active - get an active reference to sysfs_dirent
165 * @sd: sysfs_dirent to get an active reference to
166 *
167 * Get an active reference of @sd. This function is noop if @sd
168 * is NULL.
169 *
170 * RETURNS:
171 * Pointer to @sd on success, NULL on failure.
172 */
173struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
174{
175 if (unlikely(!sd))
176 return NULL;
177
178 while (1) {
179 int v, t;
180
181 v = atomic_read(&sd->s_active);
182 if (unlikely(v < 0))
183 return NULL;
184
185 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
186 if (likely(t == v))
187 return sd;
188 if (t < 0)
189 return NULL;
190
191 cpu_relax();
192 }
193}
194
195/**
196 * sysfs_put_active - put an active reference to sysfs_dirent
197 * @sd: sysfs_dirent to put an active reference to
198 *
199 * Put an active reference to @sd. This function is noop if @sd
200 * is NULL.
201 */
202void sysfs_put_active(struct sysfs_dirent *sd)
203{
204 struct completion *cmpl;
205 int v;
206
207 if (unlikely(!sd))
208 return;
209
210 v = atomic_dec_return(&sd->s_active);
211 if (likely(v != SD_DEACTIVATED_BIAS))
212 return;
213
214 /* atomic_dec_return() is a mb(), we'll always see the updated
215 * sd->s_sibling.
216 */
217 cmpl = (void *)sd->s_sibling;
218 complete(cmpl);
219}
220
221/**
222 * sysfs_get_active_two - get active references to sysfs_dirent and parent
223 * @sd: sysfs_dirent of interest
224 *
225 * Get active reference to @sd and its parent. Parent's active
226 * reference is grabbed first. This function is noop if @sd is
227 * NULL.
228 *
229 * RETURNS:
230 * Pointer to @sd on success, NULL on failure.
231 */
232struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
233{
234 if (sd) {
235 if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
236 return NULL;
237 if (unlikely(!sysfs_get_active(sd))) {
238 sysfs_put_active(sd->s_parent);
239 return NULL;
240 }
241 }
242 return sd;
243}
244
245/**
246 * sysfs_put_active_two - put active references to sysfs_dirent and parent
247 * @sd: sysfs_dirent of interest
248 *
249 * Put active references to @sd and its parent. This function is
250 * noop if @sd is NULL.
251 */
252void sysfs_put_active_two(struct sysfs_dirent *sd)
253{
254 if (sd) {
255 sysfs_put_active(sd);
256 sysfs_put_active(sd->s_parent);
257 }
258}
259
260/**
261 * sysfs_deactivate - deactivate sysfs_dirent
262 * @sd: sysfs_dirent to deactivate
263 *
264 * Deny new active references and drain existing ones.
265 */
266static void sysfs_deactivate(struct sysfs_dirent *sd)
267{
268 DECLARE_COMPLETION_ONSTACK(wait);
269 int v;
270
271 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
272 sd->s_sibling = (void *)&wait;
273
274 /* atomic_add_return() is a mb(), put_active() will always see
275 * the updated sd->s_sibling.
276 */
277 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
278
279 if (v != SD_DEACTIVATED_BIAS)
280 wait_for_completion(&wait);
281
282 sd->s_sibling = NULL;
283}
284
285static int sysfs_alloc_ino(ino_t *pino)
286{
287 int ino, rc;
288
289 retry:
290 spin_lock(&sysfs_ino_lock);
291 rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
292 spin_unlock(&sysfs_ino_lock);
293
294 if (rc == -EAGAIN) {
295 if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
296 goto retry;
297 rc = -ENOMEM;
298 }
299
300 *pino = ino;
301 return rc;
302}
303
304static void sysfs_free_ino(ino_t ino)
305{
306 spin_lock(&sysfs_ino_lock);
307 ida_remove(&sysfs_ino_ida, ino);
308 spin_unlock(&sysfs_ino_lock);
309}
310
311void release_sysfs_dirent(struct sysfs_dirent * sd)
312{
313 struct sysfs_dirent *parent_sd;
314
315 repeat:
316 /* Moving/renaming is always done while holding reference.
317 * sd->s_parent won't change beneath us.
318 */
319 parent_sd = sd->s_parent;
320
321 if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
322 sysfs_put(sd->s_elem.symlink.target_sd);
323 if (sysfs_type(sd) & SYSFS_COPY_NAME)
324 kfree(sd->s_name);
325 kfree(sd->s_iattr);
326 sysfs_free_ino(sd->s_ino);
327 kmem_cache_free(sysfs_dir_cachep, sd);
328
329 sd = parent_sd;
330 if (sd && atomic_dec_and_test(&sd->s_count))
331 goto repeat;
332}
17 333
18static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) 334static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
19{ 335{
20 struct sysfs_dirent * sd = dentry->d_fsdata; 336 struct sysfs_dirent * sd = dentry->d_fsdata;
21 337
22 if (sd) { 338 if (sd) {
23 /* sd->s_dentry is protected with sysfs_lock. This 339 /* sd->s_dentry is protected with sysfs_assoc_lock.
24 * allows sysfs_drop_dentry() to dereference it. 340 * This allows sysfs_drop_dentry() to dereference it.
25 */ 341 */
26 spin_lock(&sysfs_lock); 342 spin_lock(&sysfs_assoc_lock);
27 343
28 /* The dentry might have been deleted or another 344 /* The dentry might have been deleted or another
29 * lookup could have happened updating sd->s_dentry to 345 * lookup could have happened updating sd->s_dentry to
@@ -32,7 +348,7 @@ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
32 */ 348 */
33 if (sd->s_dentry == dentry) 349 if (sd->s_dentry == dentry)
34 sd->s_dentry = NULL; 350 sd->s_dentry = NULL;
35 spin_unlock(&sysfs_lock); 351 spin_unlock(&sysfs_assoc_lock);
36 sysfs_put(sd); 352 sysfs_put(sd);
37 } 353 }
38 iput(inode); 354 iput(inode);
@@ -42,260 +358,402 @@ static struct dentry_operations sysfs_dentry_ops = {
42 .d_iput = sysfs_d_iput, 358 .d_iput = sysfs_d_iput,
43}; 359};
44 360
45static unsigned int sysfs_inode_counter; 361struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
46ino_t sysfs_get_inum(void)
47{ 362{
48 if (unlikely(sysfs_inode_counter < 3)) 363 char *dup_name = NULL;
49 sysfs_inode_counter = 3; 364 struct sysfs_dirent *sd = NULL;
50 return sysfs_inode_counter++;
51}
52 365
53/* 366 if (type & SYSFS_COPY_NAME) {
54 * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent 367 name = dup_name = kstrdup(name, GFP_KERNEL);
55 */ 368 if (!name)
56static struct sysfs_dirent * __sysfs_new_dirent(void * element) 369 goto err_out;
57{ 370 }
58 struct sysfs_dirent * sd;
59 371
60 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); 372 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
61 if (!sd) 373 if (!sd)
62 return NULL; 374 goto err_out;
375
376 if (sysfs_alloc_ino(&sd->s_ino))
377 goto err_out;
63 378
64 sd->s_ino = sysfs_get_inum();
65 atomic_set(&sd->s_count, 1); 379 atomic_set(&sd->s_count, 1);
380 atomic_set(&sd->s_active, 0);
66 atomic_set(&sd->s_event, 1); 381 atomic_set(&sd->s_event, 1);
67 INIT_LIST_HEAD(&sd->s_children); 382
68 INIT_LIST_HEAD(&sd->s_sibling); 383 sd->s_name = name;
69 sd->s_element = element; 384 sd->s_mode = mode;
385 sd->s_flags = type;
70 386
71 return sd; 387 return sd;
388
389 err_out:
390 kfree(dup_name);
391 kmem_cache_free(sysfs_dir_cachep, sd);
392 return NULL;
72} 393}
73 394
74static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd, 395/**
75 struct sysfs_dirent *sd) 396 * sysfs_attach_dentry - associate sysfs_dirent with dentry
397 * @sd: target sysfs_dirent
398 * @dentry: dentry to associate
399 *
400 * Associate @sd with @dentry. This is protected by
401 * sysfs_assoc_lock to avoid race with sysfs_d_iput().
402 *
403 * LOCKING:
404 * mutex_lock(sysfs_mutex)
405 */
406static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
76{ 407{
77 if (sd) 408 dentry->d_op = &sysfs_dentry_ops;
78 list_add(&sd->s_sibling, &parent_sd->s_children); 409 dentry->d_fsdata = sysfs_get(sd);
410
411 /* protect sd->s_dentry against sysfs_d_iput */
412 spin_lock(&sysfs_assoc_lock);
413 sd->s_dentry = dentry;
414 spin_unlock(&sysfs_assoc_lock);
415
416 d_rehash(dentry);
79} 417}
80 418
81static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd, 419static int sysfs_ilookup_test(struct inode *inode, void *arg)
82 void * element)
83{ 420{
84 struct sysfs_dirent *sd; 421 struct sysfs_dirent *sd = arg;
85 sd = __sysfs_new_dirent(element); 422 return inode->i_ino == sd->s_ino;
86 __sysfs_list_dirent(parent_sd, sd);
87 return sd;
88} 423}
89 424
90/* 425/**
426 * sysfs_addrm_start - prepare for sysfs_dirent add/remove
427 * @acxt: pointer to sysfs_addrm_cxt to be used
428 * @parent_sd: parent sysfs_dirent
91 * 429 *
92 * Return -EEXIST if there is already a sysfs element with the same name for 430 * This function is called when the caller is about to add or
93 * the same parent. 431 * remove sysfs_dirent under @parent_sd. This function acquires
432 * sysfs_mutex, grabs inode for @parent_sd if available and lock
433 * i_mutex of it. @acxt is used to keep and pass context to
434 * other addrm functions.
94 * 435 *
95 * called with parent inode's i_mutex held 436 * LOCKING:
437 * Kernel thread context (may sleep). sysfs_mutex is locked on
438 * return. i_mutex of parent inode is locked on return if
439 * available.
96 */ 440 */
97int sysfs_dirent_exist(struct sysfs_dirent *parent_sd, 441void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
98 const unsigned char *new) 442 struct sysfs_dirent *parent_sd)
99{ 443{
100 struct sysfs_dirent * sd; 444 struct inode *inode;
101 445
102 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 446 memset(acxt, 0, sizeof(*acxt));
103 if (sd->s_element) { 447 acxt->parent_sd = parent_sd;
104 const unsigned char *existing = sysfs_get_name(sd);
105 if (strcmp(existing, new))
106 continue;
107 else
108 return -EEXIST;
109 }
110 }
111 448
112 return 0; 449 /* Lookup parent inode. inode initialization and I_NEW
450 * clearing are protected by sysfs_mutex. By grabbing it and
451 * looking up with _nowait variant, inode state can be
452 * determined reliably.
453 */
454 mutex_lock(&sysfs_mutex);
455
456 inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
457 parent_sd);
458
459 if (inode && !(inode->i_state & I_NEW)) {
460 /* parent inode available */
461 acxt->parent_inode = inode;
462
463 /* sysfs_mutex is below i_mutex in lock hierarchy.
464 * First, trylock i_mutex. If fails, unlock
465 * sysfs_mutex and lock them in order.
466 */
467 if (!mutex_trylock(&inode->i_mutex)) {
468 mutex_unlock(&sysfs_mutex);
469 mutex_lock(&inode->i_mutex);
470 mutex_lock(&sysfs_mutex);
471 }
472 } else
473 iput(inode);
113} 474}
114 475
476/**
477 * sysfs_add_one - add sysfs_dirent to parent
478 * @acxt: addrm context to use
479 * @sd: sysfs_dirent to be added
480 *
481 * Get @acxt->parent_sd and set sd->s_parent to it and increment
482 * nlink of parent inode if @sd is a directory. @sd is NOT
483 * linked into the children list of the parent. The caller
484 * should invoke sysfs_link_sibling() after this function
485 * completes if @sd needs to be on the children list.
486 *
487 * This function should be called between calls to
488 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
489 * passed the same @acxt as passed to sysfs_addrm_start().
490 *
491 * LOCKING:
492 * Determined by sysfs_addrm_start().
493 */
494void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
495{
496 sd->s_parent = sysfs_get(acxt->parent_sd);
497
498 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
499 inc_nlink(acxt->parent_inode);
500
501 acxt->cnt++;
502}
115 503
116static struct sysfs_dirent * 504/**
117__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type) 505 * sysfs_remove_one - remove sysfs_dirent from parent
506 * @acxt: addrm context to use
507 * @sd: sysfs_dirent to be added
508 *
509 * Mark @sd removed and drop nlink of parent inode if @sd is a
510 * directory. @sd is NOT unlinked from the children list of the
511 * parent. The caller is repsonsible for removing @sd from the
512 * children list before calling this function.
513 *
514 * This function should be called between calls to
515 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
516 * passed the same @acxt as passed to sysfs_addrm_start().
517 *
518 * LOCKING:
519 * Determined by sysfs_addrm_start().
520 */
521void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
118{ 522{
119 struct sysfs_dirent * sd; 523 BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
120 524
121 sd = __sysfs_new_dirent(element); 525 sd->s_flags |= SYSFS_FLAG_REMOVED;
122 if (!sd) 526 sd->s_sibling = acxt->removed;
123 goto out; 527 acxt->removed = sd;
124 528
125 sd->s_mode = mode; 529 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
126 sd->s_type = type; 530 drop_nlink(acxt->parent_inode);
127 sd->s_dentry = dentry;
128 if (dentry) {
129 dentry->d_fsdata = sysfs_get(sd);
130 dentry->d_op = &sysfs_dentry_ops;
131 }
132 531
133out: 532 acxt->cnt++;
134 return sd;
135} 533}
136 534
137int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, 535/**
138 void * element, umode_t mode, int type) 536 * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
537 * @sd: target sysfs_dirent
538 *
539 * Drop dentry for @sd. @sd must have been unlinked from its
540 * parent on entry to this function such that it can't be looked
541 * up anymore.
542 *
543 * @sd->s_dentry which is protected with sysfs_assoc_lock points
544 * to the currently associated dentry but we're not holding a
545 * reference to it and racing with dput(). Grab dcache_lock and
546 * verify dentry before dropping it. If @sd->s_dentry is NULL or
547 * dput() beats us, no need to bother.
548 */
549static void sysfs_drop_dentry(struct sysfs_dirent *sd)
139{ 550{
140 struct sysfs_dirent *sd; 551 struct dentry *dentry = NULL;
552 struct inode *inode;
553
554 /* We're not holding a reference to ->s_dentry dentry but the
555 * field will stay valid as long as sysfs_assoc_lock is held.
556 */
557 spin_lock(&sysfs_assoc_lock);
558 spin_lock(&dcache_lock);
559
560 /* drop dentry if it's there and dput() didn't kill it yet */
561 if (sd->s_dentry && sd->s_dentry->d_inode) {
562 dentry = dget_locked(sd->s_dentry);
563 spin_lock(&dentry->d_lock);
564 __d_drop(dentry);
565 spin_unlock(&dentry->d_lock);
566 }
141 567
142 sd = __sysfs_make_dirent(dentry, element, mode, type); 568 spin_unlock(&dcache_lock);
143 __sysfs_list_dirent(parent_sd, sd); 569 spin_unlock(&sysfs_assoc_lock);
144 570
145 return sd ? 0 : -ENOMEM; 571 /* dentries for shadowed inodes are pinned, unpin */
572 if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
573 dput(dentry);
574 dput(dentry);
575
576 /* adjust nlink and update timestamp */
577 inode = ilookup(sysfs_sb, sd->s_ino);
578 if (inode) {
579 mutex_lock(&inode->i_mutex);
580
581 inode->i_ctime = CURRENT_TIME;
582 drop_nlink(inode);
583 if (sysfs_type(sd) == SYSFS_DIR)
584 drop_nlink(inode);
585
586 mutex_unlock(&inode->i_mutex);
587 iput(inode);
588 }
146} 589}
147 590
148static int init_dir(struct inode * inode) 591/**
592 * sysfs_addrm_finish - finish up sysfs_dirent add/remove
593 * @acxt: addrm context to finish up
594 *
595 * Finish up sysfs_dirent add/remove. Resources acquired by
596 * sysfs_addrm_start() are released and removed sysfs_dirents are
597 * cleaned up. Timestamps on the parent inode are updated.
598 *
599 * LOCKING:
600 * All mutexes acquired by sysfs_addrm_start() are released.
601 *
602 * RETURNS:
603 * Number of added/removed sysfs_dirents since sysfs_addrm_start().
604 */
605int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
149{ 606{
150 inode->i_op = &sysfs_dir_inode_operations; 607 /* release resources acquired by sysfs_addrm_start() */
151 inode->i_fop = &sysfs_dir_operations; 608 mutex_unlock(&sysfs_mutex);
609 if (acxt->parent_inode) {
610 struct inode *inode = acxt->parent_inode;
152 611
153 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 612 /* if added/removed, update timestamps on the parent */
154 inc_nlink(inode); 613 if (acxt->cnt)
155 return 0; 614 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
615
616 mutex_unlock(&inode->i_mutex);
617 iput(inode);
618 }
619
620 /* kill removed sysfs_dirents */
621 while (acxt->removed) {
622 struct sysfs_dirent *sd = acxt->removed;
623
624 acxt->removed = sd->s_sibling;
625 sd->s_sibling = NULL;
626
627 sysfs_drop_dentry(sd);
628 sysfs_deactivate(sd);
629 sysfs_put(sd);
630 }
631
632 return acxt->cnt;
156} 633}
157 634
158static int init_file(struct inode * inode) 635/**
636 * sysfs_find_dirent - find sysfs_dirent with the given name
637 * @parent_sd: sysfs_dirent to search under
638 * @name: name to look for
639 *
640 * Look for sysfs_dirent with name @name under @parent_sd.
641 *
642 * LOCKING:
643 * mutex_lock(sysfs_mutex)
644 *
645 * RETURNS:
646 * Pointer to sysfs_dirent if found, NULL if not.
647 */
648struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
649 const unsigned char *name)
159{ 650{
160 inode->i_size = PAGE_SIZE; 651 struct sysfs_dirent *sd;
161 inode->i_fop = &sysfs_file_operations; 652
162 return 0; 653 for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
654 if (sysfs_type(sd) && !strcmp(sd->s_name, name))
655 return sd;
656 return NULL;
163} 657}
164 658
165static int init_symlink(struct inode * inode) 659/**
660 * sysfs_get_dirent - find and get sysfs_dirent with the given name
661 * @parent_sd: sysfs_dirent to search under
662 * @name: name to look for
663 *
664 * Look for sysfs_dirent with name @name under @parent_sd and get
665 * it if found.
666 *
667 * LOCKING:
668 * Kernel thread context (may sleep). Grabs sysfs_mutex.
669 *
670 * RETURNS:
671 * Pointer to sysfs_dirent if found, NULL if not.
672 */
673struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
674 const unsigned char *name)
166{ 675{
167 inode->i_op = &sysfs_symlink_inode_operations; 676 struct sysfs_dirent *sd;
168 return 0; 677
678 mutex_lock(&sysfs_mutex);
679 sd = sysfs_find_dirent(parent_sd, name);
680 sysfs_get(sd);
681 mutex_unlock(&sysfs_mutex);
682
683 return sd;
169} 684}
170 685
171static int create_dir(struct kobject * k, struct dentry * p, 686static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
172 const char * n, struct dentry ** d) 687 const char *name, struct sysfs_dirent **p_sd)
173{ 688{
174 int error;
175 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 689 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
690 struct sysfs_addrm_cxt acxt;
691 struct sysfs_dirent *sd;
176 692
177 mutex_lock(&p->d_inode->i_mutex); 693 /* allocate */
178 *d = lookup_one_len(n, p, strlen(n)); 694 sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
179 if (!IS_ERR(*d)) { 695 if (!sd)
180 if (sysfs_dirent_exist(p->d_fsdata, n)) 696 return -ENOMEM;
181 error = -EEXIST; 697 sd->s_elem.dir.kobj = kobj;
182 else
183 error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
184 SYSFS_DIR);
185 if (!error) {
186 error = sysfs_create(*d, mode, init_dir);
187 if (!error) {
188 inc_nlink(p->d_inode);
189 (*d)->d_op = &sysfs_dentry_ops;
190 d_rehash(*d);
191 }
192 }
193 if (error && (error != -EEXIST)) {
194 struct sysfs_dirent *sd = (*d)->d_fsdata;
195 if (sd) {
196 list_del_init(&sd->s_sibling);
197 sysfs_put(sd);
198 }
199 d_drop(*d);
200 }
201 dput(*d);
202 } else
203 error = PTR_ERR(*d);
204 mutex_unlock(&p->d_inode->i_mutex);
205 return error;
206}
207 698
699 /* link in */
700 sysfs_addrm_start(&acxt, parent_sd);
701 if (!sysfs_find_dirent(parent_sd, name)) {
702 sysfs_add_one(&acxt, sd);
703 sysfs_link_sibling(sd);
704 }
705 if (sysfs_addrm_finish(&acxt)) {
706 *p_sd = sd;
707 return 0;
708 }
208 709
209int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d) 710 sysfs_put(sd);
711 return -EEXIST;
712}
713
714int sysfs_create_subdir(struct kobject *kobj, const char *name,
715 struct sysfs_dirent **p_sd)
210{ 716{
211 return create_dir(k,k->dentry,n,d); 717 return create_dir(kobj, kobj->sd, name, p_sd);
212} 718}
213 719
214/** 720/**
215 * sysfs_create_dir - create a directory for an object. 721 * sysfs_create_dir - create a directory for an object.
216 * @kobj: object we're creating directory for. 722 * @kobj: object we're creating directory for.
217 * @shadow_parent: parent parent object. 723 * @shadow_parent: parent object.
218 */ 724 */
219 725int sysfs_create_dir(struct kobject *kobj,
220int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent) 726 struct sysfs_dirent *shadow_parent_sd)
221{ 727{
222 struct dentry * dentry = NULL; 728 struct sysfs_dirent *parent_sd, *sd;
223 struct dentry * parent;
224 int error = 0; 729 int error = 0;
225 730
226 BUG_ON(!kobj); 731 BUG_ON(!kobj);
227 732
228 if (shadow_parent) 733 if (shadow_parent_sd)
229 parent = shadow_parent; 734 parent_sd = shadow_parent_sd;
230 else if (kobj->parent) 735 else if (kobj->parent)
231 parent = kobj->parent->dentry; 736 parent_sd = kobj->parent->sd;
232 else if (sysfs_mount && sysfs_mount->mnt_sb) 737 else if (sysfs_mount && sysfs_mount->mnt_sb)
233 parent = sysfs_mount->mnt_sb->s_root; 738 parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
234 else 739 else
235 return -EFAULT; 740 return -EFAULT;
236 741
237 error = create_dir(kobj,parent,kobject_name(kobj),&dentry); 742 error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
238 if (!error) 743 if (!error)
239 kobj->dentry = dentry; 744 kobj->sd = sd;
240 return error; 745 return error;
241} 746}
242 747
243/* attaches attribute's sysfs_dirent to the dentry corresponding to the 748static int sysfs_count_nlink(struct sysfs_dirent *sd)
244 * attribute file
245 */
246static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
247{ 749{
248 struct attribute * attr = NULL; 750 struct sysfs_dirent *child;
249 struct bin_attribute * bin_attr = NULL; 751 int nr = 0;
250 int (* init) (struct inode *) = NULL;
251 int error = 0;
252
253 if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) {
254 bin_attr = sd->s_element;
255 attr = &bin_attr->attr;
256 } else {
257 attr = sd->s_element;
258 init = init_file;
259 }
260 752
261 dentry->d_fsdata = sysfs_get(sd); 753 for (child = sd->s_children; child; child = child->s_sibling)
262 /* protect sd->s_dentry against sysfs_d_iput */ 754 if (sysfs_type(child) == SYSFS_DIR)
263 spin_lock(&sysfs_lock); 755 nr++;
264 sd->s_dentry = dentry; 756 return nr + 2;
265 spin_unlock(&sysfs_lock);
266 error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
267 if (error) {
268 sysfs_put(sd);
269 return error;
270 }
271
272 if (bin_attr) {
273 dentry->d_inode->i_size = bin_attr->size;
274 dentry->d_inode->i_fop = &bin_fops;
275 }
276 dentry->d_op = &sysfs_dentry_ops;
277 d_rehash(dentry);
278
279 return 0;
280}
281
282static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
283{
284 int err = 0;
285
286 dentry->d_fsdata = sysfs_get(sd);
287 /* protect sd->s_dentry against sysfs_d_iput */
288 spin_lock(&sysfs_lock);
289 sd->s_dentry = dentry;
290 spin_unlock(&sysfs_lock);
291 err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
292 if (!err) {
293 dentry->d_op = &sysfs_dentry_ops;
294 d_rehash(dentry);
295 } else
296 sysfs_put(sd);
297
298 return err;
299} 757}
300 758
301static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, 759static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
@@ -303,24 +761,60 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
303{ 761{
304 struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 762 struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
305 struct sysfs_dirent * sd; 763 struct sysfs_dirent * sd;
306 int err = 0; 764 struct bin_attribute *bin_attr;
765 struct inode *inode;
766 int found = 0;
307 767
308 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 768 for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
309 if (sd->s_type & SYSFS_NOT_PINNED) { 769 if (sysfs_type(sd) &&
310 const unsigned char * name = sysfs_get_name(sd); 770 !strcmp(sd->s_name, dentry->d_name.name)) {
771 found = 1;
772 break;
773 }
774 }
311 775
312 if (strcmp(name, dentry->d_name.name)) 776 /* no such entry */
313 continue; 777 if (!found)
778 return NULL;
314 779
315 if (sd->s_type & SYSFS_KOBJ_LINK) 780 /* attach dentry and inode */
316 err = sysfs_attach_link(sd, dentry); 781 inode = sysfs_get_inode(sd);
317 else 782 if (!inode)
318 err = sysfs_attach_attr(sd, dentry); 783 return ERR_PTR(-ENOMEM);
784
785 mutex_lock(&sysfs_mutex);
786
787 if (inode->i_state & I_NEW) {
788 /* initialize inode according to type */
789 switch (sysfs_type(sd)) {
790 case SYSFS_DIR:
791 inode->i_op = &sysfs_dir_inode_operations;
792 inode->i_fop = &sysfs_dir_operations;
793 inode->i_nlink = sysfs_count_nlink(sd);
794 break;
795 case SYSFS_KOBJ_ATTR:
796 inode->i_size = PAGE_SIZE;
797 inode->i_fop = &sysfs_file_operations;
798 break;
799 case SYSFS_KOBJ_BIN_ATTR:
800 bin_attr = sd->s_elem.bin_attr.bin_attr;
801 inode->i_size = bin_attr->size;
802 inode->i_fop = &bin_fops;
319 break; 803 break;
804 case SYSFS_KOBJ_LINK:
805 inode->i_op = &sysfs_symlink_inode_operations;
806 break;
807 default:
808 BUG();
320 } 809 }
321 } 810 }
322 811
323 return ERR_PTR(err); 812 sysfs_instantiate(dentry, inode);
813 sysfs_attach_dentry(sd, dentry);
814
815 mutex_unlock(&sysfs_mutex);
816
817 return NULL;
324} 818}
325 819
326const struct inode_operations sysfs_dir_inode_operations = { 820const struct inode_operations sysfs_dir_inode_operations = {
@@ -328,58 +822,46 @@ const struct inode_operations sysfs_dir_inode_operations = {
328 .setattr = sysfs_setattr, 822 .setattr = sysfs_setattr,
329}; 823};
330 824
331static void remove_dir(struct dentry * d) 825static void remove_dir(struct sysfs_dirent *sd)
332{ 826{
333 struct dentry * parent = dget(d->d_parent); 827 struct sysfs_addrm_cxt acxt;
334 struct sysfs_dirent * sd;
335
336 mutex_lock(&parent->d_inode->i_mutex);
337 d_delete(d);
338 sd = d->d_fsdata;
339 list_del_init(&sd->s_sibling);
340 sysfs_put(sd);
341 if (d->d_inode)
342 simple_rmdir(parent->d_inode,d);
343
344 pr_debug(" o %s removing done (%d)\n",d->d_name.name,
345 atomic_read(&d->d_count));
346 828
347 mutex_unlock(&parent->d_inode->i_mutex); 829 sysfs_addrm_start(&acxt, sd->s_parent);
348 dput(parent); 830 sysfs_unlink_sibling(sd);
831 sysfs_remove_one(&acxt, sd);
832 sysfs_addrm_finish(&acxt);
349} 833}
350 834
351void sysfs_remove_subdir(struct dentry * d) 835void sysfs_remove_subdir(struct sysfs_dirent *sd)
352{ 836{
353 remove_dir(d); 837 remove_dir(sd);
354} 838}
355 839
356 840
357static void __sysfs_remove_dir(struct dentry *dentry) 841static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
358{ 842{
359 struct sysfs_dirent * parent_sd; 843 struct sysfs_addrm_cxt acxt;
360 struct sysfs_dirent * sd, * tmp; 844 struct sysfs_dirent **pos;
361 845
362 dget(dentry); 846 if (!dir_sd)
363 if (!dentry)
364 return; 847 return;
365 848
366 pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); 849 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
367 mutex_lock(&dentry->d_inode->i_mutex); 850 sysfs_addrm_start(&acxt, dir_sd);
368 parent_sd = dentry->d_fsdata; 851 pos = &dir_sd->s_children;
369 list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { 852 while (*pos) {
370 if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED)) 853 struct sysfs_dirent *sd = *pos;
371 continue; 854
372 list_del_init(&sd->s_sibling); 855 if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
373 sysfs_drop_dentry(sd, dentry); 856 *pos = sd->s_sibling;
374 sysfs_put(sd); 857 sd->s_sibling = NULL;
858 sysfs_remove_one(&acxt, sd);
859 } else
860 pos = &(*pos)->s_sibling;
375 } 861 }
376 mutex_unlock(&dentry->d_inode->i_mutex); 862 sysfs_addrm_finish(&acxt);
377 863
378 remove_dir(dentry); 864 remove_dir(dir_sd);
379 /**
380 * Drop reference from dget() on entrance.
381 */
382 dput(dentry);
383} 865}
384 866
385/** 867/**
@@ -393,102 +875,166 @@ static void __sysfs_remove_dir(struct dentry *dentry)
393 875
394void sysfs_remove_dir(struct kobject * kobj) 876void sysfs_remove_dir(struct kobject * kobj)
395{ 877{
396 __sysfs_remove_dir(kobj->dentry); 878 struct sysfs_dirent *sd = kobj->sd;
397 kobj->dentry = NULL; 879
880 spin_lock(&sysfs_assoc_lock);
881 kobj->sd = NULL;
882 spin_unlock(&sysfs_assoc_lock);
883
884 __sysfs_remove_dir(sd);
398} 885}
399 886
400int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent, 887int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
401 const char *new_name) 888 const char *new_name)
402{ 889{
403 int error = 0; 890 struct sysfs_dirent *sd = kobj->sd;
404 struct dentry * new_dentry; 891 struct dentry *new_parent = NULL;
892 struct dentry *old_dentry = NULL, *new_dentry = NULL;
893 const char *dup_name = NULL;
894 int error;
405 895
406 if (!new_parent) 896 /* get dentries */
407 return -EFAULT; 897 old_dentry = sysfs_get_dentry(sd);
898 if (IS_ERR(old_dentry)) {
899 error = PTR_ERR(old_dentry);
900 goto out_dput;
901 }
408 902
409 down_write(&sysfs_rename_sem); 903 new_parent = sysfs_get_dentry(new_parent_sd);
904 if (IS_ERR(new_parent)) {
905 error = PTR_ERR(new_parent);
906 goto out_dput;
907 }
908
909 /* lock new_parent and get dentry for new name */
410 mutex_lock(&new_parent->d_inode->i_mutex); 910 mutex_lock(&new_parent->d_inode->i_mutex);
411 911
412 new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name)); 912 new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
413 if (!IS_ERR(new_dentry)) { 913 if (IS_ERR(new_dentry)) {
414 /* By allowing two different directories with the 914 error = PTR_ERR(new_dentry);
415 * same d_parent we allow this routine to move 915 goto out_unlock;
416 * between different shadows of the same directory
417 */
418 if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
419 return -EINVAL;
420 else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
421 error = -EINVAL;
422 else if (new_dentry == kobj->dentry)
423 error = -EINVAL;
424 else if (!new_dentry->d_inode) {
425 error = kobject_set_name(kobj, "%s", new_name);
426 if (!error) {
427 struct sysfs_dirent *sd, *parent_sd;
428
429 d_add(new_dentry, NULL);
430 d_move(kobj->dentry, new_dentry);
431
432 sd = kobj->dentry->d_fsdata;
433 parent_sd = new_parent->d_fsdata;
434
435 list_del_init(&sd->s_sibling);
436 list_add(&sd->s_sibling, &parent_sd->s_children);
437 }
438 else
439 d_drop(new_dentry);
440 } else
441 error = -EEXIST;
442 dput(new_dentry);
443 } 916 }
444 mutex_unlock(&new_parent->d_inode->i_mutex);
445 up_write(&sysfs_rename_sem);
446 917
918 /* By allowing two different directories with the same
919 * d_parent we allow this routine to move between different
920 * shadows of the same directory
921 */
922 error = -EINVAL;
923 if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
924 new_dentry->d_parent->d_inode != new_parent->d_inode ||
925 old_dentry == new_dentry)
926 goto out_unlock;
927
928 error = -EEXIST;
929 if (new_dentry->d_inode)
930 goto out_unlock;
931
932 /* rename kobject and sysfs_dirent */
933 error = -ENOMEM;
934 new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
935 if (!new_name)
936 goto out_drop;
937
938 error = kobject_set_name(kobj, "%s", new_name);
939 if (error)
940 goto out_drop;
941
942 dup_name = sd->s_name;
943 sd->s_name = new_name;
944
945 /* move under the new parent */
946 d_add(new_dentry, NULL);
947 d_move(sd->s_dentry, new_dentry);
948
949 mutex_lock(&sysfs_mutex);
950
951 sysfs_unlink_sibling(sd);
952 sysfs_get(new_parent_sd);
953 sysfs_put(sd->s_parent);
954 sd->s_parent = new_parent_sd;
955 sysfs_link_sibling(sd);
956
957 mutex_unlock(&sysfs_mutex);
958
959 error = 0;
960 goto out_unlock;
961
962 out_drop:
963 d_drop(new_dentry);
964 out_unlock:
965 mutex_unlock(&new_parent->d_inode->i_mutex);
966 out_dput:
967 kfree(dup_name);
968 dput(new_parent);
969 dput(old_dentry);
970 dput(new_dentry);
447 return error; 971 return error;
448} 972}
449 973
450int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent) 974int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
451{ 975{
452 struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry; 976 struct sysfs_dirent *sd = kobj->sd;
453 struct sysfs_dirent *new_parent_sd, *sd; 977 struct sysfs_dirent *new_parent_sd;
978 struct dentry *old_parent, *new_parent = NULL;
979 struct dentry *old_dentry = NULL, *new_dentry = NULL;
454 int error; 980 int error;
455 981
456 old_parent_dentry = kobj->parent ? 982 BUG_ON(!sd->s_parent);
457 kobj->parent->dentry : sysfs_mount->mnt_sb->s_root; 983 new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
458 new_parent_dentry = new_parent ? 984
459 new_parent->dentry : sysfs_mount->mnt_sb->s_root; 985 /* get dentries */
986 old_dentry = sysfs_get_dentry(sd);
987 if (IS_ERR(old_dentry)) {
988 error = PTR_ERR(old_dentry);
989 goto out_dput;
990 }
991 old_parent = sd->s_parent->s_dentry;
992
993 new_parent = sysfs_get_dentry(new_parent_sd);
994 if (IS_ERR(new_parent)) {
995 error = PTR_ERR(new_parent);
996 goto out_dput;
997 }
460 998
461 if (old_parent_dentry->d_inode == new_parent_dentry->d_inode) 999 if (old_parent->d_inode == new_parent->d_inode) {
462 return 0; /* nothing to move */ 1000 error = 0;
1001 goto out_dput; /* nothing to move */
1002 }
463again: 1003again:
464 mutex_lock(&old_parent_dentry->d_inode->i_mutex); 1004 mutex_lock(&old_parent->d_inode->i_mutex);
465 if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) { 1005 if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
466 mutex_unlock(&old_parent_dentry->d_inode->i_mutex); 1006 mutex_unlock(&old_parent->d_inode->i_mutex);
467 goto again; 1007 goto again;
468 } 1008 }
469 1009
470 new_parent_sd = new_parent_dentry->d_fsdata; 1010 new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
471 sd = kobj->dentry->d_fsdata;
472
473 new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
474 strlen(kobj->name));
475 if (IS_ERR(new_dentry)) { 1011 if (IS_ERR(new_dentry)) {
476 error = PTR_ERR(new_dentry); 1012 error = PTR_ERR(new_dentry);
477 goto out; 1013 goto out_unlock;
478 } else 1014 } else
479 error = 0; 1015 error = 0;
480 d_add(new_dentry, NULL); 1016 d_add(new_dentry, NULL);
481 d_move(kobj->dentry, new_dentry); 1017 d_move(sd->s_dentry, new_dentry);
482 dput(new_dentry); 1018 dput(new_dentry);
483 1019
484 /* Remove from old parent's list and insert into new parent's list. */ 1020 /* Remove from old parent's list and insert into new parent's list. */
485 list_del_init(&sd->s_sibling); 1021 mutex_lock(&sysfs_mutex);
486 list_add(&sd->s_sibling, &new_parent_sd->s_children); 1022
1023 sysfs_unlink_sibling(sd);
1024 sysfs_get(new_parent_sd);
1025 sysfs_put(sd->s_parent);
1026 sd->s_parent = new_parent_sd;
1027 sysfs_link_sibling(sd);
487 1028
488out: 1029 mutex_unlock(&sysfs_mutex);
489 mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
490 mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
491 1030
1031 out_unlock:
1032 mutex_unlock(&new_parent->d_inode->i_mutex);
1033 mutex_unlock(&old_parent->d_inode->i_mutex);
1034 out_dput:
1035 dput(new_parent);
1036 dput(old_dentry);
1037 dput(new_dentry);
492 return error; 1038 return error;
493} 1039}
494 1040
@@ -496,23 +1042,27 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
496{ 1042{
497 struct dentry * dentry = file->f_path.dentry; 1043 struct dentry * dentry = file->f_path.dentry;
498 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 1044 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1045 struct sysfs_dirent * sd;
499 1046
500 mutex_lock(&dentry->d_inode->i_mutex); 1047 sd = sysfs_new_dirent("_DIR_", 0, 0);
501 file->private_data = sysfs_new_dirent(parent_sd, NULL); 1048 if (sd) {
502 mutex_unlock(&dentry->d_inode->i_mutex); 1049 mutex_lock(&sysfs_mutex);
503 1050 sd->s_parent = sysfs_get(parent_sd);
504 return file->private_data ? 0 : -ENOMEM; 1051 sysfs_link_sibling(sd);
1052 mutex_unlock(&sysfs_mutex);
1053 }
505 1054
1055 file->private_data = sd;
1056 return sd ? 0 : -ENOMEM;
506} 1057}
507 1058
508static int sysfs_dir_close(struct inode *inode, struct file *file) 1059static int sysfs_dir_close(struct inode *inode, struct file *file)
509{ 1060{
510 struct dentry * dentry = file->f_path.dentry;
511 struct sysfs_dirent * cursor = file->private_data; 1061 struct sysfs_dirent * cursor = file->private_data;
512 1062
513 mutex_lock(&dentry->d_inode->i_mutex); 1063 mutex_lock(&sysfs_mutex);
514 list_del_init(&cursor->s_sibling); 1064 sysfs_unlink_sibling(cursor);
515 mutex_unlock(&dentry->d_inode->i_mutex); 1065 mutex_unlock(&sysfs_mutex);
516 1066
517 release_sysfs_dirent(cursor); 1067 release_sysfs_dirent(cursor);
518 1068
@@ -530,7 +1080,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
530 struct dentry *dentry = filp->f_path.dentry; 1080 struct dentry *dentry = filp->f_path.dentry;
531 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 1081 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
532 struct sysfs_dirent *cursor = filp->private_data; 1082 struct sysfs_dirent *cursor = filp->private_data;
533 struct list_head *p, *q = &cursor->s_sibling; 1083 struct sysfs_dirent **pos;
534 ino_t ino; 1084 ino_t ino;
535 int i = filp->f_pos; 1085 int i = filp->f_pos;
536 1086
@@ -543,38 +1093,52 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
543 i++; 1093 i++;
544 /* fallthrough */ 1094 /* fallthrough */
545 case 1: 1095 case 1:
546 ino = parent_ino(dentry); 1096 if (parent_sd->s_parent)
1097 ino = parent_sd->s_parent->s_ino;
1098 else
1099 ino = parent_sd->s_ino;
547 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 1100 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
548 break; 1101 break;
549 filp->f_pos++; 1102 filp->f_pos++;
550 i++; 1103 i++;
551 /* fallthrough */ 1104 /* fallthrough */
552 default: 1105 default:
1106 mutex_lock(&sysfs_mutex);
1107
1108 pos = &parent_sd->s_children;
1109 while (*pos != cursor)
1110 pos = &(*pos)->s_sibling;
1111
1112 /* unlink cursor */
1113 *pos = cursor->s_sibling;
1114
553 if (filp->f_pos == 2) 1115 if (filp->f_pos == 2)
554 list_move(q, &parent_sd->s_children); 1116 pos = &parent_sd->s_children;
555 1117
556 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 1118 for ( ; *pos; pos = &(*pos)->s_sibling) {
557 struct sysfs_dirent *next; 1119 struct sysfs_dirent *next = *pos;
558 const char * name; 1120 const char * name;
559 int len; 1121 int len;
560 1122
561 next = list_entry(p, struct sysfs_dirent, 1123 if (!sysfs_type(next))
562 s_sibling);
563 if (!next->s_element)
564 continue; 1124 continue;
565 1125
566 name = sysfs_get_name(next); 1126 name = next->s_name;
567 len = strlen(name); 1127 len = strlen(name);
568 ino = next->s_ino; 1128 ino = next->s_ino;
569 1129
570 if (filldir(dirent, name, len, filp->f_pos, ino, 1130 if (filldir(dirent, name, len, filp->f_pos, ino,
571 dt_type(next)) < 0) 1131 dt_type(next)) < 0)
572 return 0; 1132 break;
573 1133
574 list_move(q, p);
575 p = q;
576 filp->f_pos++; 1134 filp->f_pos++;
577 } 1135 }
1136
1137 /* put cursor back in */
1138 cursor->s_sibling = *pos;
1139 *pos = cursor;
1140
1141 mutex_unlock(&sysfs_mutex);
578 } 1142 }
579 return 0; 1143 return 0;
580} 1144}
@@ -583,7 +1147,6 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
583{ 1147{
584 struct dentry * dentry = file->f_path.dentry; 1148 struct dentry * dentry = file->f_path.dentry;
585 1149
586 mutex_lock(&dentry->d_inode->i_mutex);
587 switch (origin) { 1150 switch (origin) {
588 case 1: 1151 case 1:
589 offset += file->f_pos; 1152 offset += file->f_pos;
@@ -591,31 +1154,35 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
591 if (offset >= 0) 1154 if (offset >= 0)
592 break; 1155 break;
593 default: 1156 default:
594 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
595 return -EINVAL; 1157 return -EINVAL;
596 } 1158 }
597 if (offset != file->f_pos) { 1159 if (offset != file->f_pos) {
1160 mutex_lock(&sysfs_mutex);
1161
598 file->f_pos = offset; 1162 file->f_pos = offset;
599 if (file->f_pos >= 2) { 1163 if (file->f_pos >= 2) {
600 struct sysfs_dirent *sd = dentry->d_fsdata; 1164 struct sysfs_dirent *sd = dentry->d_fsdata;
601 struct sysfs_dirent *cursor = file->private_data; 1165 struct sysfs_dirent *cursor = file->private_data;
602 struct list_head *p; 1166 struct sysfs_dirent **pos;
603 loff_t n = file->f_pos - 2; 1167 loff_t n = file->f_pos - 2;
604 1168
605 list_del(&cursor->s_sibling); 1169 sysfs_unlink_sibling(cursor);
606 p = sd->s_children.next; 1170
607 while (n && p != &sd->s_children) { 1171 pos = &sd->s_children;
608 struct sysfs_dirent *next; 1172 while (n && *pos) {
609 next = list_entry(p, struct sysfs_dirent, 1173 struct sysfs_dirent *next = *pos;
610 s_sibling); 1174 if (sysfs_type(next))
611 if (next->s_element)
612 n--; 1175 n--;
613 p = p->next; 1176 pos = &(*pos)->s_sibling;
614 } 1177 }
615 list_add_tail(&cursor->s_sibling, p); 1178
1179 cursor->s_sibling = *pos;
1180 *pos = cursor;
616 } 1181 }
1182
1183 mutex_unlock(&sysfs_mutex);
617 } 1184 }
618 mutex_unlock(&dentry->d_inode->i_mutex); 1185
619 return offset; 1186 return offset;
620} 1187}
621 1188
@@ -628,12 +1195,20 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
628int sysfs_make_shadowed_dir(struct kobject *kobj, 1195int sysfs_make_shadowed_dir(struct kobject *kobj,
629 void * (*follow_link)(struct dentry *, struct nameidata *)) 1196 void * (*follow_link)(struct dentry *, struct nameidata *))
630{ 1197{
1198 struct dentry *dentry;
631 struct inode *inode; 1199 struct inode *inode;
632 struct inode_operations *i_op; 1200 struct inode_operations *i_op;
633 1201
634 inode = kobj->dentry->d_inode; 1202 /* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
635 if (inode->i_op != &sysfs_dir_inode_operations) 1203 dentry = sysfs_get_dentry(kobj->sd);
1204 if (IS_ERR(dentry))
1205 return PTR_ERR(dentry);
1206
1207 inode = dentry->d_inode;
1208 if (inode->i_op != &sysfs_dir_inode_operations) {
1209 dput(dentry);
636 return -EINVAL; 1210 return -EINVAL;
1211 }
637 1212
638 i_op = kmalloc(sizeof(*i_op), GFP_KERNEL); 1213 i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
639 if (!i_op) 1214 if (!i_op)
@@ -658,54 +1233,72 @@ int sysfs_make_shadowed_dir(struct kobject *kobj,
658 * directory. 1233 * directory.
659 */ 1234 */
660 1235
661struct dentry *sysfs_create_shadow_dir(struct kobject *kobj) 1236struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
662{ 1237{
663 struct sysfs_dirent *sd; 1238 struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
664 struct dentry *parent, *dir, *shadow; 1239 struct dentry *dir, *parent, *shadow;
665 struct inode *inode; 1240 struct inode *inode;
1241 struct sysfs_dirent *sd;
1242 struct sysfs_addrm_cxt acxt;
666 1243
667 dir = kobj->dentry; 1244 dir = sysfs_get_dentry(kobj->sd);
668 inode = dir->d_inode; 1245 if (IS_ERR(dir)) {
1246 sd = (void *)dir;
1247 goto out;
1248 }
669 parent = dir->d_parent; 1249 parent = dir->d_parent;
670 shadow = ERR_PTR(-EINVAL); 1250
1251 inode = dir->d_inode;
1252 sd = ERR_PTR(-EINVAL);
671 if (!sysfs_is_shadowed_inode(inode)) 1253 if (!sysfs_is_shadowed_inode(inode))
672 goto out; 1254 goto out_dput;
673 1255
674 shadow = d_alloc(parent, &dir->d_name); 1256 shadow = d_alloc(parent, &dir->d_name);
675 if (!shadow) 1257 if (!shadow)
676 goto nomem; 1258 goto nomem;
677 1259
678 sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR); 1260 sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
679 if (!sd) 1261 if (!sd)
680 goto nomem; 1262 goto nomem;
1263 sd->s_elem.dir.kobj = kobj;
681 1264
1265 sysfs_addrm_start(&acxt, parent_sd);
1266
1267 /* add but don't link into children list */
1268 sysfs_add_one(&acxt, sd);
1269
1270 /* attach and instantiate dentry */
1271 sysfs_attach_dentry(sd, shadow);
682 d_instantiate(shadow, igrab(inode)); 1272 d_instantiate(shadow, igrab(inode));
683 inc_nlink(inode); 1273 inc_nlink(inode); /* tj: synchronization? */
684 inc_nlink(parent->d_inode); 1274
685 shadow->d_op = &sysfs_dentry_ops; 1275 sysfs_addrm_finish(&acxt);
686 1276
687 dget(shadow); /* Extra count - pin the dentry in core */ 1277 dget(shadow); /* Extra count - pin the dentry in core */
688 1278
689out: 1279 goto out_dput;
690 return shadow; 1280
691nomem: 1281 nomem:
692 dput(shadow); 1282 dput(shadow);
693 shadow = ERR_PTR(-ENOMEM); 1283 sd = ERR_PTR(-ENOMEM);
694 goto out; 1284 out_dput:
1285 dput(dir);
1286 out:
1287 return sd;
695} 1288}
696 1289
697/** 1290/**
698 * sysfs_remove_shadow_dir - remove an object's directory. 1291 * sysfs_remove_shadow_dir - remove an object's directory.
699 * @shadow: dentry of shadow directory 1292 * @shadow_sd: sysfs_dirent of shadow directory
700 * 1293 *
701 * The only thing special about this is that we remove any files in 1294 * The only thing special about this is that we remove any files in
702 * the directory before we remove the directory, and we've inlined 1295 * the directory before we remove the directory, and we've inlined
703 * what used to be sysfs_rmdir() below, instead of calling separately. 1296 * what used to be sysfs_rmdir() below, instead of calling separately.
704 */ 1297 */
705 1298
706void sysfs_remove_shadow_dir(struct dentry *shadow) 1299void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
707{ 1300{
708 __sysfs_remove_dir(shadow); 1301 __sysfs_remove_dir(shadow_sd);
709} 1302}
710 1303
711const struct file_operations sysfs_dir_operations = { 1304const struct file_operations sysfs_dir_operations = {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b502c7197ec0..cc497994b2a8 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -50,29 +50,15 @@ static struct sysfs_ops subsys_sysfs_ops = {
50 .store = subsys_attr_store, 50 .store = subsys_attr_store,
51}; 51};
52 52
53/** 53struct sysfs_buffer {
54 * add_to_collection - add buffer to a collection 54 size_t count;
55 * @buffer: buffer to be added 55 loff_t pos;
56 * @node: inode of set to add to 56 char * page;
57 */ 57 struct sysfs_ops * ops;
58 58 struct semaphore sem;
59static inline void 59 int needs_read_fill;
60add_to_collection(struct sysfs_buffer *buffer, struct inode *node) 60 int event;
61{ 61};
62 struct sysfs_buffer_collection *set = node->i_private;
63
64 mutex_lock(&node->i_mutex);
65 list_add(&buffer->associates, &set->associates);
66 mutex_unlock(&node->i_mutex);
67}
68
69static inline void
70remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
71{
72 mutex_lock(&node->i_mutex);
73 list_del(&buffer->associates);
74 mutex_unlock(&node->i_mutex);
75}
76 62
77/** 63/**
78 * fill_read_buffer - allocate and fill buffer from object. 64 * fill_read_buffer - allocate and fill buffer from object.
@@ -87,9 +73,8 @@ remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
87 */ 73 */
88static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) 74static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
89{ 75{
90 struct sysfs_dirent * sd = dentry->d_fsdata; 76 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
91 struct attribute * attr = to_attr(dentry); 77 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
92 struct kobject * kobj = to_kobj(dentry->d_parent);
93 struct sysfs_ops * ops = buffer->ops; 78 struct sysfs_ops * ops = buffer->ops;
94 int ret = 0; 79 int ret = 0;
95 ssize_t count; 80 ssize_t count;
@@ -99,8 +84,15 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
99 if (!buffer->page) 84 if (!buffer->page)
100 return -ENOMEM; 85 return -ENOMEM;
101 86
102 buffer->event = atomic_read(&sd->s_event); 87 /* need attr_sd for attr and ops, its parent for kobj */
103 count = ops->show(kobj,attr,buffer->page); 88 if (!sysfs_get_active_two(attr_sd))
89 return -ENODEV;
90
91 buffer->event = atomic_read(&attr_sd->s_event);
92 count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
93
94 sysfs_put_active_two(attr_sd);
95
104 BUG_ON(count > (ssize_t)PAGE_SIZE); 96 BUG_ON(count > (ssize_t)PAGE_SIZE);
105 if (count >= 0) { 97 if (count >= 0) {
106 buffer->needs_read_fill = 0; 98 buffer->needs_read_fill = 0;
@@ -138,10 +130,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
138 130
139 down(&buffer->sem); 131 down(&buffer->sem);
140 if (buffer->needs_read_fill) { 132 if (buffer->needs_read_fill) {
141 if (buffer->orphaned) 133 retval = fill_read_buffer(file->f_path.dentry,buffer);
142 retval = -ENODEV;
143 else
144 retval = fill_read_buffer(file->f_path.dentry,buffer);
145 if (retval) 134 if (retval)
146 goto out; 135 goto out;
147 } 136 }
@@ -196,14 +185,23 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
196 * passing the buffer that we acquired in fill_write_buffer(). 185 * passing the buffer that we acquired in fill_write_buffer().
197 */ 186 */
198 187
199static int 188static int
200flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count) 189flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
201{ 190{
202 struct attribute * attr = to_attr(dentry); 191 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
203 struct kobject * kobj = to_kobj(dentry->d_parent); 192 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
204 struct sysfs_ops * ops = buffer->ops; 193 struct sysfs_ops * ops = buffer->ops;
194 int rc;
195
196 /* need attr_sd for attr and ops, its parent for kobj */
197 if (!sysfs_get_active_two(attr_sd))
198 return -ENODEV;
199
200 rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
205 201
206 return ops->store(kobj,attr,buffer->page,count); 202 sysfs_put_active_two(attr_sd);
203
204 return rc;
207} 205}
208 206
209 207
@@ -231,37 +229,26 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
231 ssize_t len; 229 ssize_t len;
232 230
233 down(&buffer->sem); 231 down(&buffer->sem);
234 if (buffer->orphaned) {
235 len = -ENODEV;
236 goto out;
237 }
238 len = fill_write_buffer(buffer, buf, count); 232 len = fill_write_buffer(buffer, buf, count);
239 if (len > 0) 233 if (len > 0)
240 len = flush_write_buffer(file->f_path.dentry, buffer, len); 234 len = flush_write_buffer(file->f_path.dentry, buffer, len);
241 if (len > 0) 235 if (len > 0)
242 *ppos += len; 236 *ppos += len;
243out:
244 up(&buffer->sem); 237 up(&buffer->sem);
245 return len; 238 return len;
246} 239}
247 240
248static int sysfs_open_file(struct inode *inode, struct file *file) 241static int sysfs_open_file(struct inode *inode, struct file *file)
249{ 242{
250 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); 243 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
251 struct attribute * attr = to_attr(file->f_path.dentry); 244 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
252 struct sysfs_buffer_collection *set;
253 struct sysfs_buffer * buffer; 245 struct sysfs_buffer * buffer;
254 struct sysfs_ops * ops = NULL; 246 struct sysfs_ops * ops = NULL;
255 int error = 0; 247 int error;
256
257 if (!kobj || !attr)
258 goto Einval;
259 248
260 /* Grab the module reference for this attribute if we have one */ 249 /* need attr_sd for attr and ops, its parent for kobj */
261 if (!try_module_get(attr->owner)) { 250 if (!sysfs_get_active_two(attr_sd))
262 error = -ENODEV; 251 return -ENODEV;
263 goto Done;
264 }
265 252
266 /* if the kobject has no ktype, then we assume that it is a subsystem 253 /* if the kobject has no ktype, then we assume that it is a subsystem
267 * itself, and use ops for it. 254 * itself, and use ops for it.
@@ -273,33 +260,21 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
273 else 260 else
274 ops = &subsys_sysfs_ops; 261 ops = &subsys_sysfs_ops;
275 262
263 error = -EACCES;
264
276 /* No sysfs operations, either from having no subsystem, 265 /* No sysfs operations, either from having no subsystem,
277 * or the subsystem have no operations. 266 * or the subsystem have no operations.
278 */ 267 */
279 if (!ops) 268 if (!ops)
280 goto Eaccess; 269 goto err_out;
281
282 /* make sure we have a collection to add our buffers to */
283 mutex_lock(&inode->i_mutex);
284 if (!(set = inode->i_private)) {
285 if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
286 error = -ENOMEM;
287 goto Done;
288 } else {
289 INIT_LIST_HEAD(&set->associates);
290 }
291 }
292 mutex_unlock(&inode->i_mutex);
293 270
294 /* File needs write support. 271 /* File needs write support.
295 * The inode's perms must say it's ok, 272 * The inode's perms must say it's ok,
296 * and we must have a store method. 273 * and we must have a store method.
297 */ 274 */
298 if (file->f_mode & FMODE_WRITE) { 275 if (file->f_mode & FMODE_WRITE) {
299
300 if (!(inode->i_mode & S_IWUGO) || !ops->store) 276 if (!(inode->i_mode & S_IWUGO) || !ops->store)
301 goto Eaccess; 277 goto err_out;
302
303 } 278 }
304 279
305 /* File needs read support. 280 /* File needs read support.
@@ -308,48 +283,38 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
308 */ 283 */
309 if (file->f_mode & FMODE_READ) { 284 if (file->f_mode & FMODE_READ) {
310 if (!(inode->i_mode & S_IRUGO) || !ops->show) 285 if (!(inode->i_mode & S_IRUGO) || !ops->show)
311 goto Eaccess; 286 goto err_out;
312 } 287 }
313 288
314 /* No error? Great, allocate a buffer for the file, and store it 289 /* No error? Great, allocate a buffer for the file, and store it
315 * it in file->private_data for easy access. 290 * it in file->private_data for easy access.
316 */ 291 */
292 error = -ENOMEM;
317 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); 293 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
318 if (buffer) { 294 if (!buffer)
319 INIT_LIST_HEAD(&buffer->associates); 295 goto err_out;
320 init_MUTEX(&buffer->sem); 296
321 buffer->needs_read_fill = 1; 297 init_MUTEX(&buffer->sem);
322 buffer->ops = ops; 298 buffer->needs_read_fill = 1;
323 add_to_collection(buffer, inode); 299 buffer->ops = ops;
324 file->private_data = buffer; 300 file->private_data = buffer;
325 } else 301
326 error = -ENOMEM; 302 /* open succeeded, put active references and pin attr_sd */
327 goto Done; 303 sysfs_put_active_two(attr_sd);
328 304 sysfs_get(attr_sd);
329 Einval: 305 return 0;
330 error = -EINVAL; 306
331 goto Done; 307 err_out:
332 Eaccess: 308 sysfs_put_active_two(attr_sd);
333 error = -EACCES;
334 module_put(attr->owner);
335 Done:
336 if (error)
337 kobject_put(kobj);
338 return error; 309 return error;
339} 310}
340 311
341static int sysfs_release(struct inode * inode, struct file * filp) 312static int sysfs_release(struct inode * inode, struct file * filp)
342{ 313{
343 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); 314 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
344 struct attribute * attr = to_attr(filp->f_path.dentry); 315 struct sysfs_buffer *buffer = filp->private_data;
345 struct module * owner = attr->owner;
346 struct sysfs_buffer * buffer = filp->private_data;
347 316
348 if (buffer) 317 sysfs_put(attr_sd);
349 remove_from_collection(buffer, inode);
350 kobject_put(kobj);
351 /* After this point, attr should not be accessed. */
352 module_put(owner);
353 318
354 if (buffer) { 319 if (buffer) {
355 if (buffer->page) 320 if (buffer->page)
@@ -376,57 +341,43 @@ static int sysfs_release(struct inode * inode, struct file * filp)
376static unsigned int sysfs_poll(struct file *filp, poll_table *wait) 341static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
377{ 342{
378 struct sysfs_buffer * buffer = filp->private_data; 343 struct sysfs_buffer * buffer = filp->private_data;
379 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); 344 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
380 struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata; 345 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
381 int res = 0; 346
347 /* need parent for the kobj, grab both */
348 if (!sysfs_get_active_two(attr_sd))
349 goto trigger;
382 350
383 poll_wait(filp, &kobj->poll, wait); 351 poll_wait(filp, &kobj->poll, wait);
384 352
385 if (buffer->event != atomic_read(&sd->s_event)) { 353 sysfs_put_active_two(attr_sd);
386 res = POLLERR|POLLPRI;
387 buffer->needs_read_fill = 1;
388 }
389 354
390 return res; 355 if (buffer->event != atomic_read(&attr_sd->s_event))
391} 356 goto trigger;
392 357
358 return 0;
393 359
394static struct dentry *step_down(struct dentry *dir, const char * name) 360 trigger:
395{ 361 buffer->needs_read_fill = 1;
396 struct dentry * de; 362 return POLLERR|POLLPRI;
397
398 if (dir == NULL || dir->d_inode == NULL)
399 return NULL;
400
401 mutex_lock(&dir->d_inode->i_mutex);
402 de = lookup_one_len(name, dir, strlen(name));
403 mutex_unlock(&dir->d_inode->i_mutex);
404 dput(dir);
405 if (IS_ERR(de))
406 return NULL;
407 if (de->d_inode == NULL) {
408 dput(de);
409 return NULL;
410 }
411 return de;
412} 363}
413 364
414void sysfs_notify(struct kobject * k, char *dir, char *attr) 365void sysfs_notify(struct kobject *k, char *dir, char *attr)
415{ 366{
416 struct dentry *de = k->dentry; 367 struct sysfs_dirent *sd = k->sd;
417 if (de) 368
418 dget(de); 369 mutex_lock(&sysfs_mutex);
419 if (de && dir) 370
420 de = step_down(de, dir); 371 if (sd && dir)
421 if (de && attr) 372 sd = sysfs_find_dirent(sd, dir);
422 de = step_down(de, attr); 373 if (sd && attr)
423 if (de) { 374 sd = sysfs_find_dirent(sd, attr);
424 struct sysfs_dirent * sd = de->d_fsdata; 375 if (sd) {
425 if (sd) 376 atomic_inc(&sd->s_event);
426 atomic_inc(&sd->s_event);
427 wake_up_interruptible(&k->poll); 377 wake_up_interruptible(&k->poll);
428 dput(de);
429 } 378 }
379
380 mutex_unlock(&sysfs_mutex);
430} 381}
431EXPORT_SYMBOL_GPL(sysfs_notify); 382EXPORT_SYMBOL_GPL(sysfs_notify);
432 383
@@ -440,19 +391,30 @@ const struct file_operations sysfs_file_operations = {
440}; 391};
441 392
442 393
443int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type) 394int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
395 int type)
444{ 396{
445 struct sysfs_dirent * parent_sd = dir->d_fsdata;
446 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; 397 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
447 int error = -EEXIST; 398 struct sysfs_addrm_cxt acxt;
399 struct sysfs_dirent *sd;
448 400
449 mutex_lock(&dir->d_inode->i_mutex); 401 sd = sysfs_new_dirent(attr->name, mode, type);
450 if (!sysfs_dirent_exist(parent_sd, attr->name)) 402 if (!sd)
451 error = sysfs_make_dirent(parent_sd, NULL, (void *)attr, 403 return -ENOMEM;
452 mode, type); 404 sd->s_elem.attr.attr = (void *)attr;
453 mutex_unlock(&dir->d_inode->i_mutex);
454 405
455 return error; 406 sysfs_addrm_start(&acxt, dir_sd);
407
408 if (!sysfs_find_dirent(dir_sd, attr->name)) {
409 sysfs_add_one(&acxt, sd);
410 sysfs_link_sibling(sd);
411 }
412
413 if (sysfs_addrm_finish(&acxt))
414 return 0;
415
416 sysfs_put(sd);
417 return -EEXIST;
456} 418}
457 419
458 420
@@ -464,9 +426,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
464 426
465int sysfs_create_file(struct kobject * kobj, const struct attribute * attr) 427int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
466{ 428{
467 BUG_ON(!kobj || !kobj->dentry || !attr); 429 BUG_ON(!kobj || !kobj->sd || !attr);
468 430
469 return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR); 431 return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
470 432
471} 433}
472 434
@@ -480,16 +442,16 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
480int sysfs_add_file_to_group(struct kobject *kobj, 442int sysfs_add_file_to_group(struct kobject *kobj,
481 const struct attribute *attr, const char *group) 443 const struct attribute *attr, const char *group)
482{ 444{
483 struct dentry *dir; 445 struct sysfs_dirent *dir_sd;
484 int error; 446 int error;
485 447
486 dir = lookup_one_len(group, kobj->dentry, strlen(group)); 448 dir_sd = sysfs_get_dirent(kobj->sd, group);
487 if (IS_ERR(dir)) 449 if (!dir_sd)
488 error = PTR_ERR(dir); 450 return -ENOENT;
489 else { 451
490 error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR); 452 error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
491 dput(dir); 453 sysfs_put(dir_sd);
492 } 454
493 return error; 455 return error;
494} 456}
495EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); 457EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
@@ -502,30 +464,31 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
502 */ 464 */
503int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) 465int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
504{ 466{
505 struct dentry * dir = kobj->dentry; 467 struct sysfs_dirent *victim_sd = NULL;
506 struct dentry * victim; 468 struct dentry *victim = NULL;
507 int res = -ENOENT; 469 int rc;
508 470
509 mutex_lock(&dir->d_inode->i_mutex); 471 rc = -ENOENT;
510 victim = lookup_one_len(attr->name, dir, strlen(attr->name)); 472 victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
511 if (!IS_ERR(victim)) { 473 if (!victim_sd)
512 /* make sure dentry is really there */ 474 goto out;
513 if (victim->d_inode && 475
514 (victim->d_parent->d_inode == dir->d_inode)) { 476 victim = sysfs_get_dentry(victim_sd);
515 victim->d_inode->i_mtime = CURRENT_TIME; 477 if (IS_ERR(victim)) {
516 fsnotify_modify(victim); 478 rc = PTR_ERR(victim);
517 res = 0; 479 victim = NULL;
518 } else 480 goto out;
519 d_drop(victim);
520
521 /**
522 * Drop the reference acquired from lookup_one_len() above.
523 */
524 dput(victim);
525 } 481 }
526 mutex_unlock(&dir->d_inode->i_mutex);
527 482
528 return res; 483 mutex_lock(&victim->d_inode->i_mutex);
484 victim->d_inode->i_mtime = CURRENT_TIME;
485 fsnotify_modify(victim);
486 mutex_unlock(&victim->d_inode->i_mutex);
487 rc = 0;
488 out:
489 dput(victim);
490 sysfs_put(victim_sd);
491 return rc;
529} 492}
530 493
531 494
@@ -538,30 +501,34 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
538 */ 501 */
539int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) 502int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
540{ 503{
541 struct dentry *dir = kobj->dentry; 504 struct sysfs_dirent *victim_sd = NULL;
542 struct dentry *victim; 505 struct dentry *victim = NULL;
543 struct inode * inode; 506 struct inode * inode;
544 struct iattr newattrs; 507 struct iattr newattrs;
545 int res = -ENOENT; 508 int rc;
546 509
547 mutex_lock(&dir->d_inode->i_mutex); 510 rc = -ENOENT;
548 victim = lookup_one_len(attr->name, dir, strlen(attr->name)); 511 victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
549 if (!IS_ERR(victim)) { 512 if (!victim_sd)
550 if (victim->d_inode && 513 goto out;
551 (victim->d_parent->d_inode == dir->d_inode)) { 514
552 inode = victim->d_inode; 515 victim = sysfs_get_dentry(victim_sd);
553 mutex_lock(&inode->i_mutex); 516 if (IS_ERR(victim)) {
554 newattrs.ia_mode = (mode & S_IALLUGO) | 517 rc = PTR_ERR(victim);
555 (inode->i_mode & ~S_IALLUGO); 518 victim = NULL;
556 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 519 goto out;
557 res = notify_change(victim, &newattrs);
558 mutex_unlock(&inode->i_mutex);
559 }
560 dput(victim);
561 } 520 }
562 mutex_unlock(&dir->d_inode->i_mutex);
563 521
564 return res; 522 inode = victim->d_inode;
523 mutex_lock(&inode->i_mutex);
524 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
525 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
526 rc = notify_change(victim, &newattrs);
527 mutex_unlock(&inode->i_mutex);
528 out:
529 dput(victim);
530 sysfs_put(victim_sd);
531 return rc;
565} 532}
566EXPORT_SYMBOL_GPL(sysfs_chmod_file); 533EXPORT_SYMBOL_GPL(sysfs_chmod_file);
567 534
@@ -576,7 +543,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
576 543
577void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) 544void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
578{ 545{
579 sysfs_hash_and_remove(kobj->dentry, attr->name); 546 sysfs_hash_and_remove(kobj->sd, attr->name);
580} 547}
581 548
582 549
@@ -589,12 +556,12 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
589void sysfs_remove_file_from_group(struct kobject *kobj, 556void sysfs_remove_file_from_group(struct kobject *kobj,
590 const struct attribute *attr, const char *group) 557 const struct attribute *attr, const char *group)
591{ 558{
592 struct dentry *dir; 559 struct sysfs_dirent *dir_sd;
593 560
594 dir = lookup_one_len(group, kobj->dentry, strlen(group)); 561 dir_sd = sysfs_get_dirent(kobj->sd, group);
595 if (!IS_ERR(dir)) { 562 if (dir_sd) {
596 sysfs_hash_and_remove(dir, attr->name); 563 sysfs_hash_and_remove(dir_sd, attr->name);
597 dput(dir); 564 sysfs_put(dir_sd);
598 } 565 }
599} 566}
600EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); 567EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 52eed2a7a5ef..f318b73c790c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -18,26 +18,25 @@
18#include "sysfs.h" 18#include "sysfs.h"
19 19
20 20
21static void remove_files(struct dentry * dir, 21static void remove_files(struct sysfs_dirent *dir_sd,
22 const struct attribute_group * grp) 22 const struct attribute_group *grp)
23{ 23{
24 struct attribute *const* attr; 24 struct attribute *const* attr;
25 25
26 for (attr = grp->attrs; *attr; attr++) 26 for (attr = grp->attrs; *attr; attr++)
27 sysfs_hash_and_remove(dir,(*attr)->name); 27 sysfs_hash_and_remove(dir_sd, (*attr)->name);
28} 28}
29 29
30static int create_files(struct dentry * dir, 30static int create_files(struct sysfs_dirent *dir_sd,
31 const struct attribute_group * grp) 31 const struct attribute_group *grp)
32{ 32{
33 struct attribute *const* attr; 33 struct attribute *const* attr;
34 int error = 0; 34 int error = 0;
35 35
36 for (attr = grp->attrs; *attr && !error; attr++) { 36 for (attr = grp->attrs; *attr && !error; attr++)
37 error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR); 37 error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
38 }
39 if (error) 38 if (error)
40 remove_files(dir,grp); 39 remove_files(dir_sd, grp);
41 return error; 40 return error;
42} 41}
43 42
@@ -45,44 +44,44 @@ static int create_files(struct dentry * dir,
45int sysfs_create_group(struct kobject * kobj, 44int sysfs_create_group(struct kobject * kobj,
46 const struct attribute_group * grp) 45 const struct attribute_group * grp)
47{ 46{
48 struct dentry * dir; 47 struct sysfs_dirent *sd;
49 int error; 48 int error;
50 49
51 BUG_ON(!kobj || !kobj->dentry); 50 BUG_ON(!kobj || !kobj->sd);
52 51
53 if (grp->name) { 52 if (grp->name) {
54 error = sysfs_create_subdir(kobj,grp->name,&dir); 53 error = sysfs_create_subdir(kobj, grp->name, &sd);
55 if (error) 54 if (error)
56 return error; 55 return error;
57 } else 56 } else
58 dir = kobj->dentry; 57 sd = kobj->sd;
59 dir = dget(dir); 58 sysfs_get(sd);
60 if ((error = create_files(dir,grp))) { 59 error = create_files(sd, grp);
60 if (error) {
61 if (grp->name) 61 if (grp->name)
62 sysfs_remove_subdir(dir); 62 sysfs_remove_subdir(sd);
63 } 63 }
64 dput(dir); 64 sysfs_put(sd);
65 return error; 65 return error;
66} 66}
67 67
68void sysfs_remove_group(struct kobject * kobj, 68void sysfs_remove_group(struct kobject * kobj,
69 const struct attribute_group * grp) 69 const struct attribute_group * grp)
70{ 70{
71 struct dentry * dir; 71 struct sysfs_dirent *dir_sd = kobj->sd;
72 struct sysfs_dirent *sd;
72 73
73 if (grp->name) { 74 if (grp->name) {
74 dir = lookup_one_len_kern(grp->name, kobj->dentry, 75 sd = sysfs_get_dirent(dir_sd, grp->name);
75 strlen(grp->name)); 76 BUG_ON(!sd);
76 BUG_ON(IS_ERR(dir)); 77 } else
77 } 78 sd = sysfs_get(dir_sd);
78 else
79 dir = dget(kobj->dentry);
80 79
81 remove_files(dir,grp); 80 remove_files(sd, grp);
82 if (grp->name) 81 if (grp->name)
83 sysfs_remove_subdir(dir); 82 sysfs_remove_subdir(sd);
84 /* release the ref. taken in this routine */ 83
85 dput(dir); 84 sysfs_put(sd);
86} 85}
87 86
88 87
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 5266eec15f6e..3756e152285a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -133,187 +133,94 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
133 */ 133 */
134static struct lock_class_key sysfs_inode_imutex_key; 134static struct lock_class_key sysfs_inode_imutex_key;
135 135
136struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) 136void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
137{ 137{
138 struct inode * inode = new_inode(sysfs_sb); 138 inode->i_blocks = 0;
139 if (inode) { 139 inode->i_mapping->a_ops = &sysfs_aops;
140 inode->i_blocks = 0; 140 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
141 inode->i_mapping->a_ops = &sysfs_aops; 141 inode->i_op = &sysfs_inode_operations;
142 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 142 inode->i_ino = sd->s_ino;
143 inode->i_op = &sysfs_inode_operations; 143 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
144 inode->i_ino = sd->s_ino; 144
145 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); 145 if (sd->s_iattr) {
146 146 /* sysfs_dirent has non-default attributes
147 if (sd->s_iattr) { 147 * get them for the new inode from persistent copy
148 /* sysfs_dirent has non-default attributes 148 * in sysfs_dirent
149 * get them for the new inode from persistent copy 149 */
150 * in sysfs_dirent 150 set_inode_attr(inode, sd->s_iattr);
151 */
152 set_inode_attr(inode, sd->s_iattr);
153 } else
154 set_default_inode_attr(inode, mode);
155 }
156 return inode;
157}
158
159int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
160{
161 int error = 0;
162 struct inode * inode = NULL;
163 if (dentry) {
164 if (!dentry->d_inode) {
165 struct sysfs_dirent * sd = dentry->d_fsdata;
166 if ((inode = sysfs_new_inode(mode, sd))) {
167 if (dentry->d_parent && dentry->d_parent->d_inode) {
168 struct inode *p_inode = dentry->d_parent->d_inode;
169 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
170 }
171 goto Proceed;
172 }
173 else
174 error = -ENOMEM;
175 } else
176 error = -EEXIST;
177 } else
178 error = -ENOENT;
179 goto Done;
180
181 Proceed:
182 if (init)
183 error = init(inode);
184 if (!error) {
185 d_instantiate(dentry, inode);
186 if (S_ISDIR(mode))
187 dget(dentry); /* pin only directory dentry in core */
188 } else 151 } else
189 iput(inode); 152 set_default_inode_attr(inode, sd->s_mode);
190 Done:
191 return error;
192} 153}
193 154
194/* 155/**
195 * Get the name for corresponding element represented by the given sysfs_dirent 156 * sysfs_get_inode - get inode for sysfs_dirent
157 * @sd: sysfs_dirent to allocate inode for
158 *
159 * Get inode for @sd. If such inode doesn't exist, a new inode
160 * is allocated and basics are initialized. New inode is
161 * returned locked.
162 *
163 * LOCKING:
164 * Kernel thread context (may sleep).
165 *
166 * RETURNS:
167 * Pointer to allocated inode on success, NULL on failure.
196 */ 168 */
197const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) 169struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
198{ 170{
199 struct attribute * attr; 171 struct inode *inode;
200 struct bin_attribute * bin_attr;
201 struct sysfs_symlink * sl;
202
203 BUG_ON(!sd || !sd->s_element);
204
205 switch (sd->s_type) {
206 case SYSFS_DIR:
207 /* Always have a dentry so use that */
208 return sd->s_dentry->d_name.name;
209
210 case SYSFS_KOBJ_ATTR:
211 attr = sd->s_element;
212 return attr->name;
213
214 case SYSFS_KOBJ_BIN_ATTR:
215 bin_attr = sd->s_element;
216 return bin_attr->attr.name;
217 172
218 case SYSFS_KOBJ_LINK: 173 inode = iget_locked(sysfs_sb, sd->s_ino);
219 sl = sd->s_element; 174 if (inode && (inode->i_state & I_NEW))
220 return sl->link_name; 175 sysfs_init_inode(sd, inode);
221 }
222 return NULL;
223}
224 176
225static inline void orphan_all_buffers(struct inode *node) 177 return inode;
226{
227 struct sysfs_buffer_collection *set;
228 struct sysfs_buffer *buf;
229
230 mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
231 set = node->i_private;
232 if (set) {
233 list_for_each_entry(buf, &set->associates, associates) {
234 down(&buf->sem);
235 buf->orphaned = 1;
236 up(&buf->sem);
237 }
238 }
239 mutex_unlock(&node->i_mutex);
240} 178}
241 179
242 180/**
243/* 181 * sysfs_instantiate - instantiate dentry
244 * Unhashes the dentry corresponding to given sysfs_dirent 182 * @dentry: dentry to be instantiated
245 * Called with parent inode's i_mutex held. 183 * @inode: inode associated with @sd
184 *
185 * Unlock @inode if locked and instantiate @dentry with @inode.
186 *
187 * LOCKING:
188 * None.
246 */ 189 */
247void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) 190void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
248{ 191{
249 struct dentry *dentry = NULL; 192 BUG_ON(!dentry || dentry->d_inode);
250 struct inode *inode;
251 193
252 /* We're not holding a reference to ->s_dentry dentry but the 194 if (inode->i_state & I_NEW)
253 * field will stay valid as long as sysfs_lock is held. 195 unlock_new_inode(inode);
254 */
255 spin_lock(&sysfs_lock);
256 spin_lock(&dcache_lock);
257
258 /* dget dentry if it's still alive */
259 if (sd->s_dentry && sd->s_dentry->d_inode)
260 dentry = dget_locked(sd->s_dentry);
261
262 spin_unlock(&dcache_lock);
263 spin_unlock(&sysfs_lock);
264
265 /* drop dentry */
266 if (dentry) {
267 spin_lock(&dcache_lock);
268 spin_lock(&dentry->d_lock);
269 if (!d_unhashed(dentry) && dentry->d_inode) {
270 inode = dentry->d_inode;
271 spin_lock(&inode->i_lock);
272 __iget(inode);
273 spin_unlock(&inode->i_lock);
274 dget_locked(dentry);
275 __d_drop(dentry);
276 spin_unlock(&dentry->d_lock);
277 spin_unlock(&dcache_lock);
278 simple_unlink(parent->d_inode, dentry);
279 orphan_all_buffers(inode);
280 iput(inode);
281 } else {
282 spin_unlock(&dentry->d_lock);
283 spin_unlock(&dcache_lock);
284 }
285 196
286 dput(dentry); 197 d_instantiate(dentry, inode);
287 }
288} 198}
289 199
290int sysfs_hash_and_remove(struct dentry * dir, const char * name) 200int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
291{ 201{
292 struct sysfs_dirent * sd; 202 struct sysfs_addrm_cxt acxt;
293 struct sysfs_dirent * parent_sd; 203 struct sysfs_dirent **pos, *sd;
294 int found = 0;
295 204
296 if (!dir) 205 if (!dir_sd)
297 return -ENOENT; 206 return -ENOENT;
298 207
299 if (dir->d_inode == NULL) 208 sysfs_addrm_start(&acxt, dir_sd);
300 /* no inode means this hasn't been made visible yet */ 209
301 return -ENOENT; 210 for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
211 sd = *pos;
302 212
303 parent_sd = dir->d_fsdata; 213 if (!sysfs_type(sd))
304 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
305 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
306 if (!sd->s_element)
307 continue; 214 continue;
308 if (!strcmp(sysfs_get_name(sd), name)) { 215 if (!strcmp(sd->s_name, name)) {
309 list_del_init(&sd->s_sibling); 216 *pos = sd->s_sibling;
310 sysfs_drop_dentry(sd, dir); 217 sd->s_sibling = NULL;
311 sysfs_put(sd); 218 sysfs_remove_one(&acxt, sd);
312 found = 1;
313 break; 219 break;
314 } 220 }
315 } 221 }
316 mutex_unlock(&dir->d_inode->i_mutex);
317 222
318 return found ? 0 : -ENOENT; 223 if (sysfs_addrm_finish(&acxt))
224 return 0;
225 return -ENOENT;
319} 226}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 00ab9125d398..402cc356203c 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,28 +19,18 @@ struct vfsmount *sysfs_mount;
19struct super_block * sysfs_sb = NULL; 19struct super_block * sysfs_sb = NULL;
20struct kmem_cache *sysfs_dir_cachep; 20struct kmem_cache *sysfs_dir_cachep;
21 21
22static void sysfs_clear_inode(struct inode *inode);
23
24static const struct super_operations sysfs_ops = { 22static const struct super_operations sysfs_ops = {
25 .statfs = simple_statfs, 23 .statfs = simple_statfs,
26 .drop_inode = sysfs_delete_inode, 24 .drop_inode = sysfs_delete_inode,
27 .clear_inode = sysfs_clear_inode,
28}; 25};
29 26
30static struct sysfs_dirent sysfs_root = { 27struct sysfs_dirent sysfs_root = {
31 .s_sibling = LIST_HEAD_INIT(sysfs_root.s_sibling), 28 .s_count = ATOMIC_INIT(1),
32 .s_children = LIST_HEAD_INIT(sysfs_root.s_children), 29 .s_flags = SYSFS_ROOT,
33 .s_element = NULL, 30 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
34 .s_type = SYSFS_ROOT,
35 .s_iattr = NULL,
36 .s_ino = 1, 31 .s_ino = 1,
37}; 32};
38 33
39static void sysfs_clear_inode(struct inode *inode)
40{
41 kfree(inode->i_private);
42}
43
44static int sysfs_fill_super(struct super_block *sb, void *data, int silent) 34static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
45{ 35{
46 struct inode *inode; 36 struct inode *inode;
@@ -53,24 +43,26 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
53 sb->s_time_gran = 1; 43 sb->s_time_gran = 1;
54 sysfs_sb = sb; 44 sysfs_sb = sb;
55 45
56 inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 46 inode = new_inode(sysfs_sb);
57 &sysfs_root); 47 if (!inode) {
58 if (inode) {
59 inode->i_op = &sysfs_dir_inode_operations;
60 inode->i_fop = &sysfs_dir_operations;
61 /* directory inodes start off with i_nlink == 2 (for "." entry) */
62 inc_nlink(inode);
63 } else {
64 pr_debug("sysfs: could not get root inode\n"); 48 pr_debug("sysfs: could not get root inode\n");
65 return -ENOMEM; 49 return -ENOMEM;
66 } 50 }
67 51
52 sysfs_init_inode(&sysfs_root, inode);
53
54 inode->i_op = &sysfs_dir_inode_operations;
55 inode->i_fop = &sysfs_dir_operations;
56 /* directory inodes start off with i_nlink == 2 (for "." entry) */
57 inc_nlink(inode);
58
68 root = d_alloc_root(inode); 59 root = d_alloc_root(inode);
69 if (!root) { 60 if (!root) {
70 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 61 pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
71 iput(inode); 62 iput(inode);
72 return -ENOMEM; 63 return -ENOMEM;
73 } 64 }
65 sysfs_root.s_dentry = root;
74 root->d_fsdata = &sysfs_root; 66 root->d_fsdata = &sysfs_root;
75 sb->s_root = root; 67 sb->s_root = root;
76 return 0; 68 return 0;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 7b9c5bfde920..2f86e0422290 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,71 +11,39 @@
11 11
12#include "sysfs.h" 12#include "sysfs.h"
13 13
14static int object_depth(struct kobject * kobj) 14static int object_depth(struct sysfs_dirent *sd)
15{ 15{
16 struct kobject * p = kobj;
17 int depth = 0; 16 int depth = 0;
18 do { depth++; } while ((p = p->parent)); 17
18 for (; sd->s_parent; sd = sd->s_parent)
19 depth++;
20
19 return depth; 21 return depth;
20} 22}
21 23
22static int object_path_length(struct kobject * kobj) 24static int object_path_length(struct sysfs_dirent * sd)
23{ 25{
24 struct kobject * p = kobj;
25 int length = 1; 26 int length = 1;
26 do { 27
27 length += strlen(kobject_name(p)) + 1; 28 for (; sd->s_parent; sd = sd->s_parent)
28 p = p->parent; 29 length += strlen(sd->s_name) + 1;
29 } while (p); 30
30 return length; 31 return length;
31} 32}
32 33
33static void fill_object_path(struct kobject * kobj, char * buffer, int length) 34static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
34{ 35{
35 struct kobject * p;
36
37 --length; 36 --length;
38 for (p = kobj; p; p = p->parent) { 37 for (; sd->s_parent; sd = sd->s_parent) {
39 int cur = strlen(kobject_name(p)); 38 int cur = strlen(sd->s_name);
40 39
41 /* back up enough to print this bus id with '/' */ 40 /* back up enough to print this bus id with '/' */
42 length -= cur; 41 length -= cur;
43 strncpy(buffer + length,kobject_name(p),cur); 42 strncpy(buffer + length, sd->s_name, cur);
44 *(buffer + --length) = '/'; 43 *(buffer + --length) = '/';
45 } 44 }
46} 45}
47 46
48static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
49{
50 struct sysfs_dirent * parent_sd = parent->d_fsdata;
51 struct sysfs_symlink * sl;
52 int error = 0;
53
54 error = -ENOMEM;
55 sl = kmalloc(sizeof(*sl), GFP_KERNEL);
56 if (!sl)
57 goto exit1;
58
59 sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
60 if (!sl->link_name)
61 goto exit2;
62
63 strcpy(sl->link_name, name);
64 sl->target_kobj = kobject_get(target);
65
66 error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO,
67 SYSFS_KOBJ_LINK);
68 if (!error)
69 return 0;
70
71 kobject_put(target);
72 kfree(sl->link_name);
73exit2:
74 kfree(sl);
75exit1:
76 return error;
77}
78
79/** 47/**
80 * sysfs_create_link - create symlink between two objects. 48 * sysfs_create_link - create symlink between two objects.
81 * @kobj: object whose directory we're creating the link in. 49 * @kobj: object whose directory we're creating the link in.
@@ -84,24 +52,57 @@ exit1:
84 */ 52 */
85int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) 53int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
86{ 54{
87 struct dentry *dentry = NULL; 55 struct sysfs_dirent *parent_sd = NULL;
88 int error = -EEXIST; 56 struct sysfs_dirent *target_sd = NULL;
57 struct sysfs_dirent *sd = NULL;
58 struct sysfs_addrm_cxt acxt;
59 int error;
89 60
90 BUG_ON(!name); 61 BUG_ON(!name);
91 62
92 if (!kobj) { 63 if (!kobj) {
93 if (sysfs_mount && sysfs_mount->mnt_sb) 64 if (sysfs_mount && sysfs_mount->mnt_sb)
94 dentry = sysfs_mount->mnt_sb->s_root; 65 parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
95 } else 66 } else
96 dentry = kobj->dentry; 67 parent_sd = kobj->sd;
68
69 error = -EFAULT;
70 if (!parent_sd)
71 goto out_put;
72
73 /* target->sd can go away beneath us but is protected with
74 * sysfs_assoc_lock. Fetch target_sd from it.
75 */
76 spin_lock(&sysfs_assoc_lock);
77 if (target->sd)
78 target_sd = sysfs_get(target->sd);
79 spin_unlock(&sysfs_assoc_lock);
80
81 error = -ENOENT;
82 if (!target_sd)
83 goto out_put;
84
85 error = -ENOMEM;
86 sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
87 if (!sd)
88 goto out_put;
89 sd->s_elem.symlink.target_sd = target_sd;
97 90
98 if (!dentry) 91 sysfs_addrm_start(&acxt, parent_sd);
99 return -EFAULT;
100 92
101 mutex_lock(&dentry->d_inode->i_mutex); 93 if (!sysfs_find_dirent(parent_sd, name)) {
102 if (!sysfs_dirent_exist(dentry->d_fsdata, name)) 94 sysfs_add_one(&acxt, sd);
103 error = sysfs_add_link(dentry, name, target); 95 sysfs_link_sibling(sd);
104 mutex_unlock(&dentry->d_inode->i_mutex); 96 }
97
98 if (sysfs_addrm_finish(&acxt))
99 return 0;
100
101 error = -EEXIST;
102 /* fall through */
103 out_put:
104 sysfs_put(target_sd);
105 sysfs_put(sd);
105 return error; 106 return error;
106} 107}
107 108
@@ -114,17 +115,17 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
114 115
115void sysfs_remove_link(struct kobject * kobj, const char * name) 116void sysfs_remove_link(struct kobject * kobj, const char * name)
116{ 117{
117 sysfs_hash_and_remove(kobj->dentry,name); 118 sysfs_hash_and_remove(kobj->sd, name);
118} 119}
119 120
120static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, 121static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
121 char *path) 122 struct sysfs_dirent * target_sd, char *path)
122{ 123{
123 char * s; 124 char * s;
124 int depth, size; 125 int depth, size;
125 126
126 depth = object_depth(kobj); 127 depth = object_depth(parent_sd);
127 size = object_path_length(target) + depth * 3 - 1; 128 size = object_path_length(target_sd) + depth * 3 - 1;
128 if (size > PATH_MAX) 129 if (size > PATH_MAX)
129 return -ENAMETOOLONG; 130 return -ENAMETOOLONG;
130 131
@@ -133,7 +134,7 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
133 for (s = path; depth--; s += 3) 134 for (s = path; depth--; s += 3)
134 strcpy(s,"../"); 135 strcpy(s,"../");
135 136
136 fill_object_path(target, path, size); 137 fill_object_path(target_sd, path, size);
137 pr_debug("%s: path = '%s'\n", __FUNCTION__, path); 138 pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
138 139
139 return 0; 140 return 0;
@@ -141,27 +142,16 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
141 142
142static int sysfs_getlink(struct dentry *dentry, char * path) 143static int sysfs_getlink(struct dentry *dentry, char * path)
143{ 144{
144 struct kobject *kobj, *target_kobj; 145 struct sysfs_dirent *sd = dentry->d_fsdata;
145 int error = 0; 146 struct sysfs_dirent *parent_sd = sd->s_parent;
147 struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd;
148 int error;
146 149
147 kobj = sysfs_get_kobject(dentry->d_parent); 150 mutex_lock(&sysfs_mutex);
148 if (!kobj) 151 error = sysfs_get_target_path(parent_sd, target_sd, path);
149 return -EINVAL; 152 mutex_unlock(&sysfs_mutex);
150 153
151 target_kobj = sysfs_get_kobject(dentry);
152 if (!target_kobj) {
153 kobject_put(kobj);
154 return -EINVAL;
155 }
156
157 down_read(&sysfs_rename_sem);
158 error = sysfs_get_target_path(kobj, target_kobj, path);
159 up_read(&sysfs_rename_sem);
160
161 kobject_put(kobj);
162 kobject_put(target_kobj);
163 return error; 154 return error;
164
165} 155}
166 156
167static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) 157static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 502c949c402d..6a37f2386a8d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,9 +1,40 @@
1struct sysfs_elem_dir {
2 struct kobject * kobj;
3};
4
5struct sysfs_elem_symlink {
6 struct sysfs_dirent * target_sd;
7};
8
9struct sysfs_elem_attr {
10 struct attribute * attr;
11};
12
13struct sysfs_elem_bin_attr {
14 struct bin_attribute * bin_attr;
15};
16
17/*
18 * As long as s_count reference is held, the sysfs_dirent itself is
19 * accessible. Dereferencing s_elem or any other outer entity
20 * requires s_active reference.
21 */
1struct sysfs_dirent { 22struct sysfs_dirent {
2 atomic_t s_count; 23 atomic_t s_count;
3 struct list_head s_sibling; 24 atomic_t s_active;
4 struct list_head s_children; 25 struct sysfs_dirent * s_parent;
5 void * s_element; 26 struct sysfs_dirent * s_sibling;
6 int s_type; 27 struct sysfs_dirent * s_children;
28 const char * s_name;
29
30 union {
31 struct sysfs_elem_dir dir;
32 struct sysfs_elem_symlink symlink;
33 struct sysfs_elem_attr attr;
34 struct sysfs_elem_bin_attr bin_attr;
35 } s_elem;
36
37 unsigned int s_flags;
7 umode_t s_mode; 38 umode_t s_mode;
8 ino_t s_ino; 39 ino_t s_ino;
9 struct dentry * s_dentry; 40 struct dentry * s_dentry;
@@ -11,30 +42,60 @@ struct sysfs_dirent {
11 atomic_t s_event; 42 atomic_t s_event;
12}; 43};
13 44
45#define SD_DEACTIVATED_BIAS INT_MIN
46
47struct sysfs_addrm_cxt {
48 struct sysfs_dirent *parent_sd;
49 struct inode *parent_inode;
50 struct sysfs_dirent *removed;
51 int cnt;
52};
53
14extern struct vfsmount * sysfs_mount; 54extern struct vfsmount * sysfs_mount;
55extern struct sysfs_dirent sysfs_root;
15extern struct kmem_cache *sysfs_dir_cachep; 56extern struct kmem_cache *sysfs_dir_cachep;
16 57
17extern void sysfs_delete_inode(struct inode *inode); 58extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
18extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); 59extern void sysfs_link_sibling(struct sysfs_dirent *sd);
19extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); 60extern void sysfs_unlink_sibling(struct sysfs_dirent *sd);
61extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
62extern void sysfs_put_active(struct sysfs_dirent *sd);
63extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
64extern void sysfs_put_active_two(struct sysfs_dirent *sd);
65extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
66 struct sysfs_dirent *parent_sd);
67extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt,
68 struct sysfs_dirent *sd);
69extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
70 struct sysfs_dirent *sd);
71extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
20 72
21extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *); 73extern void sysfs_delete_inode(struct inode *inode);
22extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, 74extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
23 umode_t, int); 75extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
24 76extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
25extern int sysfs_add_file(struct dentry *, const struct attribute *, int); 77
26extern int sysfs_hash_and_remove(struct dentry * dir, const char * name); 78extern void release_sysfs_dirent(struct sysfs_dirent * sd);
79extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
80 const unsigned char *name);
81extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
82 const unsigned char *name);
83extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode,
84 int type);
85
86extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
87 const struct attribute *attr, int type);
88extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
27extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); 89extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
28 90
29extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); 91extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
30extern void sysfs_remove_subdir(struct dentry *); 92 struct sysfs_dirent **p_sd);
93extern void sysfs_remove_subdir(struct sysfs_dirent *sd);
31 94
32extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
33extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
34extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 95extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
35 96
36extern spinlock_t sysfs_lock; 97extern spinlock_t sysfs_assoc_lock;
37extern struct rw_semaphore sysfs_rename_sem; 98extern struct mutex sysfs_mutex;
38extern struct super_block * sysfs_sb; 99extern struct super_block * sysfs_sb;
39extern const struct file_operations sysfs_dir_operations; 100extern const struct file_operations sysfs_dir_operations;
40extern const struct file_operations sysfs_file_operations; 101extern const struct file_operations sysfs_file_operations;
@@ -42,73 +103,9 @@ extern const struct file_operations bin_fops;
42extern const struct inode_operations sysfs_dir_inode_operations; 103extern const struct inode_operations sysfs_dir_inode_operations;
43extern const struct inode_operations sysfs_symlink_inode_operations; 104extern const struct inode_operations sysfs_symlink_inode_operations;
44 105
45struct sysfs_symlink { 106static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
46 char * link_name;
47 struct kobject * target_kobj;
48};
49
50struct sysfs_buffer {
51 struct list_head associates;
52 size_t count;
53 loff_t pos;
54 char * page;
55 struct sysfs_ops * ops;
56 struct semaphore sem;
57 int orphaned;
58 int needs_read_fill;
59 int event;
60};
61
62struct sysfs_buffer_collection {
63 struct list_head associates;
64};
65
66static inline struct kobject * to_kobj(struct dentry * dentry)
67{
68 struct sysfs_dirent * sd = dentry->d_fsdata;
69 return ((struct kobject *) sd->s_element);
70}
71
72static inline struct attribute * to_attr(struct dentry * dentry)
73{ 107{
74 struct sysfs_dirent * sd = dentry->d_fsdata; 108 return sd->s_flags & SYSFS_TYPE_MASK;
75 return ((struct attribute *) sd->s_element);
76}
77
78static inline struct bin_attribute * to_bin_attr(struct dentry * dentry)
79{
80 struct sysfs_dirent * sd = dentry->d_fsdata;
81 return ((struct bin_attribute *) sd->s_element);
82}
83
84static inline struct kobject *sysfs_get_kobject(struct dentry *dentry)
85{
86 struct kobject * kobj = NULL;
87
88 spin_lock(&dcache_lock);
89 if (!d_unhashed(dentry)) {
90 struct sysfs_dirent * sd = dentry->d_fsdata;
91 if (sd->s_type & SYSFS_KOBJ_LINK) {
92 struct sysfs_symlink * sl = sd->s_element;
93 kobj = kobject_get(sl->target_kobj);
94 } else
95 kobj = kobject_get(sd->s_element);
96 }
97 spin_unlock(&dcache_lock);
98
99 return kobj;
100}
101
102static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
103{
104 if (sd->s_type & SYSFS_KOBJ_LINK) {
105 struct sysfs_symlink * sl = sd->s_element;
106 kfree(sl->link_name);
107 kobject_put(sl->target_kobj);
108 kfree(sl);
109 }
110 kfree(sd->s_iattr);
111 kmem_cache_free(sysfs_dir_cachep, sd);
112} 109}
113 110
114static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) 111static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
@@ -122,7 +119,7 @@ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
122 119
123static inline void sysfs_put(struct sysfs_dirent * sd) 120static inline void sysfs_put(struct sysfs_dirent * sd)
124{ 121{
125 if (atomic_dec_and_test(&sd->s_count)) 122 if (sd && atomic_dec_and_test(&sd->s_count))
126 release_sysfs_dirent(sd); 123 release_sysfs_dirent(sd);
127} 124}
128 125