aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c28
-rw-r--r--fs/9p/cache.c204
-rw-r--r--fs/9p/cache.h64
-rw-r--r--fs/9p/fid.c114
-rw-r--r--fs/9p/fid.h5
-rw-r--r--fs/9p/v9fs.c108
-rw-r--r--fs/9p/v9fs.h53
-rw-r--r--fs/9p/v9fs_vfs.h26
-rw-r--r--fs/9p/vfs_addr.c194
-rw-r--r--fs/9p/vfs_dentry.c47
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/9p/vfs_file.c316
-rw-r--r--fs/9p/vfs_inode.c307
-rw-r--r--fs/9p/vfs_inode_dotl.c198
-rw-r--r--fs/9p/vfs_super.c65
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/aio.c4
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/compat.c61
-rw-r--r--fs/dcache.c95
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c55
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/acl.c7
-rw-r--r--fs/gfs2/aops.c1
-rw-r--r--fs/gfs2/bmap.c20
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/file.c77
-rw-r--r--fs/gfs2/glock.c410
-rw-r--r--fs/gfs2/glock.h39
-rw-r--r--fs/gfs2/glops.c33
-rw-r--r--fs/gfs2/incore.h7
-rw-r--r--fs/gfs2/lock_dlm.c14
-rw-r--r--fs/gfs2/log.c32
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/main.c6
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c11
-rw-r--r--fs/gfs2/ops_inode.c10
-rw-r--r--fs/gfs2/quota.c14
-rw-r--r--fs/gfs2/rgrp.c34
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/inode.c32
-rw-r--r--fs/internal.h13
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jfs/namei.c3
-rw-r--r--fs/namei.c1501
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/quota.h3
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/super.c7
-rw-r--r--fs/open.c126
-rw-r--r--fs/partitions/osf.c2
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/xfs_mru_cache.c2
77 files changed, 2985 insertions, 2078 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 02a2cf61631..51545529637 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -21,8 +21,8 @@
21#include <linux/posix_acl_xattr.h> 21#include <linux/posix_acl_xattr.h>
22#include "xattr.h" 22#include "xattr.h"
23#include "acl.h" 23#include "acl.h"
24#include "v9fs_vfs.h"
25#include "v9fs.h" 24#include "v9fs.h"
25#include "v9fs_vfs.h"
26 26
27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) 27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
28{ 28{
@@ -59,7 +59,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
59 struct v9fs_session_info *v9ses; 59 struct v9fs_session_info *v9ses;
60 60
61 v9ses = v9fs_inode2v9ses(inode); 61 v9ses = v9fs_inode2v9ses(inode);
62 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 62 if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
63 ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
63 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL); 64 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
64 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); 65 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
65 return 0; 66 return 0;
@@ -71,11 +72,15 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
71 if (!IS_ERR(dacl) && !IS_ERR(pacl)) { 72 if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
72 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); 73 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
73 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl); 74 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
74 posix_acl_release(dacl);
75 posix_acl_release(pacl);
76 } else 75 } else
77 retval = -EIO; 76 retval = -EIO;
78 77
78 if (!IS_ERR(dacl))
79 posix_acl_release(dacl);
80
81 if (!IS_ERR(pacl))
82 posix_acl_release(pacl);
83
79 return retval; 84 return retval;
80} 85}
81 86
@@ -100,9 +105,10 @@ int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
100 return -ECHILD; 105 return -ECHILD;
101 106
102 v9ses = v9fs_inode2v9ses(inode); 107 v9ses = v9fs_inode2v9ses(inode);
103 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 108 if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
109 ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
104 /* 110 /*
105 * On access = client mode get the acl 111 * On access = client and acl = on mode get the acl
106 * values from the server 112 * values from the server
107 */ 113 */
108 return 0; 114 return 0;
@@ -128,6 +134,10 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
128 struct inode *inode = dentry->d_inode; 134 struct inode *inode = dentry->d_inode;
129 135
130 set_cached_acl(inode, type, acl); 136 set_cached_acl(inode, type, acl);
137
138 if (!acl)
139 return 0;
140
131 /* Set a setxattr request to server */ 141 /* Set a setxattr request to server */
132 size = posix_acl_xattr_size(acl->a_count); 142 size = posix_acl_xattr_size(acl->a_count);
133 buffer = kmalloc(size, GFP_KERNEL); 143 buffer = kmalloc(size, GFP_KERNEL);
@@ -177,10 +187,8 @@ int v9fs_acl_chmod(struct dentry *dentry)
177int v9fs_set_create_acl(struct dentry *dentry, 187int v9fs_set_create_acl(struct dentry *dentry,
178 struct posix_acl *dpacl, struct posix_acl *pacl) 188 struct posix_acl *dpacl, struct posix_acl *pacl)
179{ 189{
180 if (dpacl) 190 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
181 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl); 191 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
182 if (pacl)
183 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
184 posix_acl_release(dpacl); 192 posix_acl_release(dpacl);
185 posix_acl_release(pacl); 193 posix_acl_release(pacl);
186 return 0; 194 return 0;
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 0dbe0d139ac..5b335c5086a 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -33,67 +33,11 @@
33 33
34#define CACHETAG_LEN 11 34#define CACHETAG_LEN 11
35 35
36struct kmem_cache *vcookie_cache;
37
38struct fscache_netfs v9fs_cache_netfs = { 36struct fscache_netfs v9fs_cache_netfs = {
39 .name = "9p", 37 .name = "9p",
40 .version = 0, 38 .version = 0,
41}; 39};
42 40
43static void init_once(void *foo)
44{
45 struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
46 vcookie->fscache = NULL;
47 vcookie->qid = NULL;
48 inode_init_once(&vcookie->inode);
49}
50
51/**
52 * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
53 * vcookie to inode mapping
54 *
55 * Returns 0 on success.
56 */
57
58static int v9fs_init_vcookiecache(void)
59{
60 vcookie_cache = kmem_cache_create("vcookie_cache",
61 sizeof(struct v9fs_cookie),
62 0, (SLAB_RECLAIM_ACCOUNT|
63 SLAB_MEM_SPREAD),
64 init_once);
65 if (!vcookie_cache)
66 return -ENOMEM;
67
68 return 0;
69}
70
71/**
72 * v9fs_destroy_vcookiecache - destroy the cache of vcookies
73 *
74 */
75
76static void v9fs_destroy_vcookiecache(void)
77{
78 kmem_cache_destroy(vcookie_cache);
79}
80
81int __v9fs_cache_register(void)
82{
83 int ret;
84 ret = v9fs_init_vcookiecache();
85 if (ret < 0)
86 return ret;
87
88 return fscache_register_netfs(&v9fs_cache_netfs);
89}
90
91void __v9fs_cache_unregister(void)
92{
93 v9fs_destroy_vcookiecache();
94 fscache_unregister_netfs(&v9fs_cache_netfs);
95}
96
97/** 41/**
98 * v9fs_random_cachetag - Generate a random tag to be associated 42 * v9fs_random_cachetag - Generate a random tag to be associated
99 * with a new cache session. 43 * with a new cache session.
@@ -133,9 +77,9 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
133} 77}
134 78
135const struct fscache_cookie_def v9fs_cache_session_index_def = { 79const struct fscache_cookie_def v9fs_cache_session_index_def = {
136 .name = "9P.session", 80 .name = "9P.session",
137 .type = FSCACHE_COOKIE_TYPE_INDEX, 81 .type = FSCACHE_COOKIE_TYPE_INDEX,
138 .get_key = v9fs_cache_session_get_key, 82 .get_key = v9fs_cache_session_get_key,
139}; 83};
140 84
141void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) 85void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
@@ -163,33 +107,33 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
163static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, 107static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
164 void *buffer, uint16_t bufmax) 108 void *buffer, uint16_t bufmax)
165{ 109{
166 const struct v9fs_cookie *vcookie = cookie_netfs_data; 110 const struct v9fs_inode *v9inode = cookie_netfs_data;
167 memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path)); 111 memcpy(buffer, &v9inode->fscache_key->path,
168 112 sizeof(v9inode->fscache_key->path));
169 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode, 113 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
170 vcookie->qid->path); 114 v9inode->fscache_key->path);
171 return sizeof(vcookie->qid->path); 115 return sizeof(v9inode->fscache_key->path);
172} 116}
173 117
174static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, 118static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
175 uint64_t *size) 119 uint64_t *size)
176{ 120{
177 const struct v9fs_cookie *vcookie = cookie_netfs_data; 121 const struct v9fs_inode *v9inode = cookie_netfs_data;
178 *size = i_size_read(&vcookie->inode); 122 *size = i_size_read(&v9inode->vfs_inode);
179 123
180 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode, 124 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode,
181 *size); 125 *size);
182} 126}
183 127
184static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, 128static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
185 void *buffer, uint16_t buflen) 129 void *buffer, uint16_t buflen)
186{ 130{
187 const struct v9fs_cookie *vcookie = cookie_netfs_data; 131 const struct v9fs_inode *v9inode = cookie_netfs_data;
188 memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version)); 132 memcpy(buffer, &v9inode->fscache_key->version,
189 133 sizeof(v9inode->fscache_key->version));
190 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode, 134 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
191 vcookie->qid->version); 135 v9inode->fscache_key->version);
192 return sizeof(vcookie->qid->version); 136 return sizeof(v9inode->fscache_key->version);
193} 137}
194 138
195static enum 139static enum
@@ -197,13 +141,13 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
197 const void *buffer, 141 const void *buffer,
198 uint16_t buflen) 142 uint16_t buflen)
199{ 143{
200 const struct v9fs_cookie *vcookie = cookie_netfs_data; 144 const struct v9fs_inode *v9inode = cookie_netfs_data;
201 145
202 if (buflen != sizeof(vcookie->qid->version)) 146 if (buflen != sizeof(v9inode->fscache_key->version))
203 return FSCACHE_CHECKAUX_OBSOLETE; 147 return FSCACHE_CHECKAUX_OBSOLETE;
204 148
205 if (memcmp(buffer, &vcookie->qid->version, 149 if (memcmp(buffer, &v9inode->fscache_key->version,
206 sizeof(vcookie->qid->version))) 150 sizeof(v9inode->fscache_key->version)))
207 return FSCACHE_CHECKAUX_OBSOLETE; 151 return FSCACHE_CHECKAUX_OBSOLETE;
208 152
209 return FSCACHE_CHECKAUX_OKAY; 153 return FSCACHE_CHECKAUX_OKAY;
@@ -211,7 +155,7 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
211 155
212static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data) 156static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
213{ 157{
214 struct v9fs_cookie *vcookie = cookie_netfs_data; 158 struct v9fs_inode *v9inode = cookie_netfs_data;
215 struct pagevec pvec; 159 struct pagevec pvec;
216 pgoff_t first; 160 pgoff_t first;
217 int loop, nr_pages; 161 int loop, nr_pages;
@@ -220,7 +164,7 @@ static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
220 first = 0; 164 first = 0;
221 165
222 for (;;) { 166 for (;;) {
223 nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping, 167 nr_pages = pagevec_lookup(&pvec, v9inode->vfs_inode.i_mapping,
224 first, 168 first,
225 PAGEVEC_SIZE - pagevec_count(&pvec)); 169 PAGEVEC_SIZE - pagevec_count(&pvec));
226 if (!nr_pages) 170 if (!nr_pages)
@@ -249,115 +193,114 @@ const struct fscache_cookie_def v9fs_cache_inode_index_def = {
249 193
250void v9fs_cache_inode_get_cookie(struct inode *inode) 194void v9fs_cache_inode_get_cookie(struct inode *inode)
251{ 195{
252 struct v9fs_cookie *vcookie; 196 struct v9fs_inode *v9inode;
253 struct v9fs_session_info *v9ses; 197 struct v9fs_session_info *v9ses;
254 198
255 if (!S_ISREG(inode->i_mode)) 199 if (!S_ISREG(inode->i_mode))
256 return; 200 return;
257 201
258 vcookie = v9fs_inode2cookie(inode); 202 v9inode = V9FS_I(inode);
259 if (vcookie->fscache) 203 if (v9inode->fscache)
260 return; 204 return;
261 205
262 v9ses = v9fs_inode2v9ses(inode); 206 v9ses = v9fs_inode2v9ses(inode);
263 vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, 207 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
264 &v9fs_cache_inode_index_def, 208 &v9fs_cache_inode_index_def,
265 vcookie); 209 v9inode);
266 210
267 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, 211 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
268 vcookie->fscache); 212 v9inode->fscache);
269} 213}
270 214
271void v9fs_cache_inode_put_cookie(struct inode *inode) 215void v9fs_cache_inode_put_cookie(struct inode *inode)
272{ 216{
273 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 217 struct v9fs_inode *v9inode = V9FS_I(inode);
274 218
275 if (!vcookie->fscache) 219 if (!v9inode->fscache)
276 return; 220 return;
277 P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, 221 P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
278 vcookie->fscache); 222 v9inode->fscache);
279 223
280 fscache_relinquish_cookie(vcookie->fscache, 0); 224 fscache_relinquish_cookie(v9inode->fscache, 0);
281 vcookie->fscache = NULL; 225 v9inode->fscache = NULL;
282} 226}
283 227
284void v9fs_cache_inode_flush_cookie(struct inode *inode) 228void v9fs_cache_inode_flush_cookie(struct inode *inode)
285{ 229{
286 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 230 struct v9fs_inode *v9inode = V9FS_I(inode);
287 231
288 if (!vcookie->fscache) 232 if (!v9inode->fscache)
289 return; 233 return;
290 P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, 234 P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
291 vcookie->fscache); 235 v9inode->fscache);
292 236
293 fscache_relinquish_cookie(vcookie->fscache, 1); 237 fscache_relinquish_cookie(v9inode->fscache, 1);
294 vcookie->fscache = NULL; 238 v9inode->fscache = NULL;
295} 239}
296 240
297void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp) 241void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
298{ 242{
299 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 243 struct v9fs_inode *v9inode = V9FS_I(inode);
300 struct p9_fid *fid; 244 struct p9_fid *fid;
301 245
302 if (!vcookie->fscache) 246 if (!v9inode->fscache)
303 return; 247 return;
304 248
305 spin_lock(&vcookie->lock); 249 spin_lock(&v9inode->fscache_lock);
306 fid = filp->private_data; 250 fid = filp->private_data;
307 if ((filp->f_flags & O_ACCMODE) != O_RDONLY) 251 if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
308 v9fs_cache_inode_flush_cookie(inode); 252 v9fs_cache_inode_flush_cookie(inode);
309 else 253 else
310 v9fs_cache_inode_get_cookie(inode); 254 v9fs_cache_inode_get_cookie(inode);
311 255
312 spin_unlock(&vcookie->lock); 256 spin_unlock(&v9inode->fscache_lock);
313} 257}
314 258
315void v9fs_cache_inode_reset_cookie(struct inode *inode) 259void v9fs_cache_inode_reset_cookie(struct inode *inode)
316{ 260{
317 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 261 struct v9fs_inode *v9inode = V9FS_I(inode);
318 struct v9fs_session_info *v9ses; 262 struct v9fs_session_info *v9ses;
319 struct fscache_cookie *old; 263 struct fscache_cookie *old;
320 264
321 if (!vcookie->fscache) 265 if (!v9inode->fscache)
322 return; 266 return;
323 267
324 old = vcookie->fscache; 268 old = v9inode->fscache;
325 269
326 spin_lock(&vcookie->lock); 270 spin_lock(&v9inode->fscache_lock);
327 fscache_relinquish_cookie(vcookie->fscache, 1); 271 fscache_relinquish_cookie(v9inode->fscache, 1);
328 272
329 v9ses = v9fs_inode2v9ses(inode); 273 v9ses = v9fs_inode2v9ses(inode);
330 vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, 274 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
331 &v9fs_cache_inode_index_def, 275 &v9fs_cache_inode_index_def,
332 vcookie); 276 v9inode);
333
334 P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", 277 P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
335 inode, old, vcookie->fscache); 278 inode, old, v9inode->fscache);
336 279
337 spin_unlock(&vcookie->lock); 280 spin_unlock(&v9inode->fscache_lock);
338} 281}
339 282
340int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) 283int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
341{ 284{
342 struct inode *inode = page->mapping->host; 285 struct inode *inode = page->mapping->host;
343 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 286 struct v9fs_inode *v9inode = V9FS_I(inode);
344 287
345 BUG_ON(!vcookie->fscache); 288 BUG_ON(!v9inode->fscache);
346 289
347 return fscache_maybe_release_page(vcookie->fscache, page, gfp); 290 return fscache_maybe_release_page(v9inode->fscache, page, gfp);
348} 291}
349 292
350void __v9fs_fscache_invalidate_page(struct page *page) 293void __v9fs_fscache_invalidate_page(struct page *page)
351{ 294{
352 struct inode *inode = page->mapping->host; 295 struct inode *inode = page->mapping->host;
353 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 296 struct v9fs_inode *v9inode = V9FS_I(inode);
354 297
355 BUG_ON(!vcookie->fscache); 298 BUG_ON(!v9inode->fscache);
356 299
357 if (PageFsCache(page)) { 300 if (PageFsCache(page)) {
358 fscache_wait_on_page_write(vcookie->fscache, page); 301 fscache_wait_on_page_write(v9inode->fscache, page);
359 BUG_ON(!PageLocked(page)); 302 BUG_ON(!PageLocked(page));
360 fscache_uncache_page(vcookie->fscache, page); 303 fscache_uncache_page(v9inode->fscache, page);
361 } 304 }
362} 305}
363 306
@@ -380,13 +323,13 @@ static void v9fs_vfs_readpage_complete(struct page *page, void *data,
380int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) 323int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
381{ 324{
382 int ret; 325 int ret;
383 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 326 const struct v9fs_inode *v9inode = V9FS_I(inode);
384 327
385 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 328 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
386 if (!vcookie->fscache) 329 if (!v9inode->fscache)
387 return -ENOBUFS; 330 return -ENOBUFS;
388 331
389 ret = fscache_read_or_alloc_page(vcookie->fscache, 332 ret = fscache_read_or_alloc_page(v9inode->fscache,
390 page, 333 page,
391 v9fs_vfs_readpage_complete, 334 v9fs_vfs_readpage_complete,
392 NULL, 335 NULL,
@@ -418,13 +361,13 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
418 unsigned *nr_pages) 361 unsigned *nr_pages)
419{ 362{
420 int ret; 363 int ret;
421 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 364 const struct v9fs_inode *v9inode = V9FS_I(inode);
422 365
423 P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); 366 P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
424 if (!vcookie->fscache) 367 if (!v9inode->fscache)
425 return -ENOBUFS; 368 return -ENOBUFS;
426 369
427 ret = fscache_read_or_alloc_pages(vcookie->fscache, 370 ret = fscache_read_or_alloc_pages(v9inode->fscache,
428 mapping, pages, nr_pages, 371 mapping, pages, nr_pages,
429 v9fs_vfs_readpage_complete, 372 v9fs_vfs_readpage_complete,
430 NULL, 373 NULL,
@@ -453,11 +396,22 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
453void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) 396void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
454{ 397{
455 int ret; 398 int ret;
456 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 399 const struct v9fs_inode *v9inode = V9FS_I(inode);
457 400
458 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 401 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
459 ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL); 402 ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
460 P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); 403 P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
461 if (ret != 0) 404 if (ret != 0)
462 v9fs_uncache_page(inode, page); 405 v9fs_uncache_page(inode, page);
463} 406}
407
408/*
409 * wait for a page to complete writing to the cache
410 */
411void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
412{
413 const struct v9fs_inode *v9inode = V9FS_I(inode);
414 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
415 if (PageFsCache(page))
416 fscache_wait_on_page_write(v9inode->fscache, page);
417}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index a94192bfaee..049507a5b01 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -25,20 +25,6 @@
25#include <linux/fscache.h> 25#include <linux/fscache.h>
26#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27 27
28extern struct kmem_cache *vcookie_cache;
29
30struct v9fs_cookie {
31 spinlock_t lock;
32 struct inode inode;
33 struct fscache_cookie *fscache;
34 struct p9_qid *qid;
35};
36
37static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
38{
39 return container_of(inode, struct v9fs_cookie, inode);
40}
41
42extern struct fscache_netfs v9fs_cache_netfs; 28extern struct fscache_netfs v9fs_cache_netfs;
43extern const struct fscache_cookie_def v9fs_cache_session_index_def; 29extern const struct fscache_cookie_def v9fs_cache_session_index_def;
44extern const struct fscache_cookie_def v9fs_cache_inode_index_def; 30extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
@@ -64,23 +50,8 @@ extern int __v9fs_readpages_from_fscache(struct inode *inode,
64 struct list_head *pages, 50 struct list_head *pages,
65 unsigned *nr_pages); 51 unsigned *nr_pages);
66extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); 52extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
67 53extern void __v9fs_fscache_wait_on_page_write(struct inode *inode,
68 54 struct page *page);
69/**
70 * v9fs_cache_register - Register v9fs file system with the cache
71 */
72static inline int v9fs_cache_register(void)
73{
74 return __v9fs_cache_register();
75}
76
77/**
78 * v9fs_cache_unregister - Unregister v9fs from the cache
79 */
80static inline void v9fs_cache_unregister(void)
81{
82 __v9fs_cache_unregister();
83}
84 55
85static inline int v9fs_fscache_release_page(struct page *page, 56static inline int v9fs_fscache_release_page(struct page *page,
86 gfp_t gfp) 57 gfp_t gfp)
@@ -117,28 +88,27 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
117 88
118static inline void v9fs_uncache_page(struct inode *inode, struct page *page) 89static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
119{ 90{
120 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 91 struct v9fs_inode *v9inode = V9FS_I(inode);
121 fscache_uncache_page(vcookie->fscache, page); 92 fscache_uncache_page(v9inode->fscache, page);
122 BUG_ON(PageFsCache(page)); 93 BUG_ON(PageFsCache(page));
123} 94}
124 95
125static inline void v9fs_vcookie_set_qid(struct inode *inode, 96static inline void v9fs_fscache_set_key(struct inode *inode,
126 struct p9_qid *qid) 97 struct p9_qid *qid)
127{ 98{
128 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 99 struct v9fs_inode *v9inode = V9FS_I(inode);
129 spin_lock(&vcookie->lock); 100 spin_lock(&v9inode->fscache_lock);
130 vcookie->qid = qid; 101 v9inode->fscache_key = qid;
131 spin_unlock(&vcookie->lock); 102 spin_unlock(&v9inode->fscache_lock);
132} 103}
133 104
134#else /* CONFIG_9P_FSCACHE */ 105static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
135 106 struct page *page)
136static inline int v9fs_cache_register(void)
137{ 107{
138 return 1; 108 return __v9fs_fscache_wait_on_page_write(inode, page);
139} 109}
140 110
141static inline void v9fs_cache_unregister(void) {} 111#else /* CONFIG_9P_FSCACHE */
142 112
143static inline int v9fs_fscache_release_page(struct page *page, 113static inline int v9fs_fscache_release_page(struct page *page,
144 gfp_t gfp) { 114 gfp_t gfp) {
@@ -168,9 +138,11 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
168static inline void v9fs_uncache_page(struct inode *inode, struct page *page) 138static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
169{} 139{}
170 140
171static inline void v9fs_vcookie_set_qid(struct inode *inode, 141static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
172 struct p9_qid *qid) 142 struct page *page)
173{} 143{
144 return;
145}
174 146
175#endif /* CONFIG_9P_FSCACHE */ 147#endif /* CONFIG_9P_FSCACHE */
176#endif /* _9P_CACHE_H */ 148#endif /* _9P_CACHE_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index b00223c99d7..cd63e002d82 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -125,46 +125,17 @@ err_out:
125 return -ENOMEM; 125 return -ENOMEM;
126} 126}
127 127
128/** 128static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
129 * v9fs_fid_lookup - lookup for a fid, try to walk if not found 129 uid_t uid, int any)
130 * @dentry: dentry to look for fid in
131 *
132 * Look for a fid in the specified dentry for the current user.
133 * If no fid is found, try to create one walking from a fid from the parent
134 * dentry (if it has one), or the root dentry. If the user haven't accessed
135 * the fs yet, attach now and walk from the root.
136 */
137
138struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
139{ 130{
140 int i, n, l, clone, any, access;
141 u32 uid;
142 struct p9_fid *fid, *old_fid = NULL;
143 struct dentry *ds; 131 struct dentry *ds;
144 struct v9fs_session_info *v9ses;
145 char **wnames, *uname; 132 char **wnames, *uname;
133 int i, n, l, clone, access;
134 struct v9fs_session_info *v9ses;
135 struct p9_fid *fid, *old_fid = NULL;
146 136
147 v9ses = v9fs_inode2v9ses(dentry->d_inode); 137 v9ses = v9fs_inode2v9ses(dentry->d_inode);
148 access = v9ses->flags & V9FS_ACCESS_MASK; 138 access = v9ses->flags & V9FS_ACCESS_MASK;
149 switch (access) {
150 case V9FS_ACCESS_SINGLE:
151 case V9FS_ACCESS_USER:
152 case V9FS_ACCESS_CLIENT:
153 uid = current_fsuid();
154 any = 0;
155 break;
156
157 case V9FS_ACCESS_ANY:
158 uid = v9ses->uid;
159 any = 1;
160 break;
161
162 default:
163 uid = ~0;
164 any = 0;
165 break;
166 }
167
168 fid = v9fs_fid_find(dentry, uid, any); 139 fid = v9fs_fid_find(dentry, uid, any);
169 if (fid) 140 if (fid)
170 return fid; 141 return fid;
@@ -250,6 +221,45 @@ err_out:
250 return fid; 221 return fid;
251} 222}
252 223
224/**
225 * v9fs_fid_lookup - lookup for a fid, try to walk if not found
226 * @dentry: dentry to look for fid in
227 *
228 * Look for a fid in the specified dentry for the current user.
229 * If no fid is found, try to create one walking from a fid from the parent
230 * dentry (if it has one), or the root dentry. If the user haven't accessed
231 * the fs yet, attach now and walk from the root.
232 */
233
234struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
235{
236 uid_t uid;
237 int any, access;
238 struct v9fs_session_info *v9ses;
239
240 v9ses = v9fs_inode2v9ses(dentry->d_inode);
241 access = v9ses->flags & V9FS_ACCESS_MASK;
242 switch (access) {
243 case V9FS_ACCESS_SINGLE:
244 case V9FS_ACCESS_USER:
245 case V9FS_ACCESS_CLIENT:
246 uid = current_fsuid();
247 any = 0;
248 break;
249
250 case V9FS_ACCESS_ANY:
251 uid = v9ses->uid;
252 any = 1;
253 break;
254
255 default:
256 uid = ~0;
257 any = 0;
258 break;
259 }
260 return v9fs_fid_lookup_with_uid(dentry, uid, any);
261}
262
253struct p9_fid *v9fs_fid_clone(struct dentry *dentry) 263struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
254{ 264{
255 struct p9_fid *fid, *ret; 265 struct p9_fid *fid, *ret;
@@ -261,3 +271,39 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
261 ret = p9_client_walk(fid, 0, NULL, 1); 271 ret = p9_client_walk(fid, 0, NULL, 1);
262 return ret; 272 return ret;
263} 273}
274
275static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
276{
277 struct p9_fid *fid, *ret;
278
279 fid = v9fs_fid_lookup_with_uid(dentry, uid, 0);
280 if (IS_ERR(fid))
281 return fid;
282
283 ret = p9_client_walk(fid, 0, NULL, 1);
284 return ret;
285}
286
287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
288{
289 int err;
290 struct p9_fid *fid;
291
292 fid = v9fs_fid_clone_with_uid(dentry, 0);
293 if (IS_ERR(fid))
294 goto error_out;
295 /*
296 * writeback fid will only be used to write back the
297 * dirty pages. We always request for the open fid in read-write
298 * mode so that a partial page write which result in page
299 * read can work.
300 */
301 err = p9_client_open(fid, O_RDWR);
302 if (err < 0) {
303 p9_client_clunk(fid);
304 fid = ERR_PTR(err);
305 goto error_out;
306 }
307error_out:
308 return fid;
309}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index c3bbd6af996..bb0b6e7f58f 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -19,7 +19,8 @@
19 * Boston, MA 02111-1301 USA 19 * Boston, MA 02111-1301 USA
20 * 20 *
21 */ 21 */
22 22#ifndef FS_9P_FID_H
23#define FS_9P_FID_H
23#include <linux/list.h> 24#include <linux/list.h>
24 25
25/** 26/**
@@ -45,3 +46,5 @@ struct v9fs_dentry {
45struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); 46struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
46struct p9_fid *v9fs_fid_clone(struct dentry *dentry); 47struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
47int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); 48int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
49struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
50#endif
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2f77cd33ba8..c82b017f51f 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,6 +39,7 @@
39 39
40static DEFINE_SPINLOCK(v9fs_sessionlist_lock); 40static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
41static LIST_HEAD(v9fs_sessionlist); 41static LIST_HEAD(v9fs_sessionlist);
42struct kmem_cache *v9fs_inode_cache;
42 43
43/* 44/*
44 * Option Parsing (code inspired by NFS code) 45 * Option Parsing (code inspired by NFS code)
@@ -55,7 +56,7 @@ enum {
55 /* Cache options */ 56 /* Cache options */
56 Opt_cache_loose, Opt_fscache, 57 Opt_cache_loose, Opt_fscache,
57 /* Access options */ 58 /* Access options */
58 Opt_access, 59 Opt_access, Opt_posixacl,
59 /* Error token */ 60 /* Error token */
60 Opt_err 61 Opt_err
61}; 62};
@@ -73,6 +74,7 @@ static const match_table_t tokens = {
73 {Opt_fscache, "fscache"}, 74 {Opt_fscache, "fscache"},
74 {Opt_cachetag, "cachetag=%s"}, 75 {Opt_cachetag, "cachetag=%s"},
75 {Opt_access, "access=%s"}, 76 {Opt_access, "access=%s"},
77 {Opt_posixacl, "posixacl"},
76 {Opt_err, NULL} 78 {Opt_err, NULL}
77}; 79};
78 80
@@ -194,15 +196,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
194 else if (strcmp(s, "any") == 0) 196 else if (strcmp(s, "any") == 0)
195 v9ses->flags |= V9FS_ACCESS_ANY; 197 v9ses->flags |= V9FS_ACCESS_ANY;
196 else if (strcmp(s, "client") == 0) { 198 else if (strcmp(s, "client") == 0) {
197#ifdef CONFIG_9P_FS_POSIX_ACL
198 v9ses->flags |= V9FS_ACCESS_CLIENT; 199 v9ses->flags |= V9FS_ACCESS_CLIENT;
199#else
200 P9_DPRINTK(P9_DEBUG_ERROR,
201 "access=client option not supported\n");
202 kfree(s);
203 ret = -EINVAL;
204 goto free_and_return;
205#endif
206 } else { 200 } else {
207 v9ses->flags |= V9FS_ACCESS_SINGLE; 201 v9ses->flags |= V9FS_ACCESS_SINGLE;
208 v9ses->uid = simple_strtoul(s, &e, 10); 202 v9ses->uid = simple_strtoul(s, &e, 10);
@@ -212,6 +206,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
212 kfree(s); 206 kfree(s);
213 break; 207 break;
214 208
209 case Opt_posixacl:
210#ifdef CONFIG_9P_FS_POSIX_ACL
211 v9ses->flags |= V9FS_POSIX_ACL;
212#else
213 P9_DPRINTK(P9_DEBUG_ERROR,
214 "Not defined CONFIG_9P_FS_POSIX_ACL. "
215 "Ignoring posixacl option\n");
216#endif
217 break;
218
215 default: 219 default:
216 continue; 220 continue;
217 } 221 }
@@ -260,19 +264,12 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
260 list_add(&v9ses->slist, &v9fs_sessionlist); 264 list_add(&v9ses->slist, &v9fs_sessionlist);
261 spin_unlock(&v9fs_sessionlist_lock); 265 spin_unlock(&v9fs_sessionlist_lock);
262 266
263 v9ses->flags = V9FS_ACCESS_USER;
264 strcpy(v9ses->uname, V9FS_DEFUSER); 267 strcpy(v9ses->uname, V9FS_DEFUSER);
265 strcpy(v9ses->aname, V9FS_DEFANAME); 268 strcpy(v9ses->aname, V9FS_DEFANAME);
266 v9ses->uid = ~0; 269 v9ses->uid = ~0;
267 v9ses->dfltuid = V9FS_DEFUID; 270 v9ses->dfltuid = V9FS_DEFUID;
268 v9ses->dfltgid = V9FS_DEFGID; 271 v9ses->dfltgid = V9FS_DEFGID;
269 272
270 rc = v9fs_parse_options(v9ses, data);
271 if (rc < 0) {
272 retval = rc;
273 goto error;
274 }
275
276 v9ses->clnt = p9_client_create(dev_name, data); 273 v9ses->clnt = p9_client_create(dev_name, data);
277 if (IS_ERR(v9ses->clnt)) { 274 if (IS_ERR(v9ses->clnt)) {
278 retval = PTR_ERR(v9ses->clnt); 275 retval = PTR_ERR(v9ses->clnt);
@@ -281,10 +278,20 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
281 goto error; 278 goto error;
282 } 279 }
283 280
284 if (p9_is_proto_dotl(v9ses->clnt)) 281 v9ses->flags = V9FS_ACCESS_USER;
282
283 if (p9_is_proto_dotl(v9ses->clnt)) {
284 v9ses->flags = V9FS_ACCESS_CLIENT;
285 v9ses->flags |= V9FS_PROTO_2000L; 285 v9ses->flags |= V9FS_PROTO_2000L;
286 else if (p9_is_proto_dotu(v9ses->clnt)) 286 } else if (p9_is_proto_dotu(v9ses->clnt)) {
287 v9ses->flags |= V9FS_PROTO_2000U; 287 v9ses->flags |= V9FS_PROTO_2000U;
288 }
289
290 rc = v9fs_parse_options(v9ses, data);
291 if (rc < 0) {
292 retval = rc;
293 goto error;
294 }
288 295
289 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; 296 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
290 297
@@ -306,6 +313,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
306 v9ses->flags |= V9FS_ACCESS_ANY; 313 v9ses->flags |= V9FS_ACCESS_ANY;
307 v9ses->uid = ~0; 314 v9ses->uid = ~0;
308 } 315 }
316 if (!v9fs_proto_dotl(v9ses) ||
317 !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
318 /*
319 * We support ACL checks on clinet only if the protocol is
320 * 9P2000.L and access is V9FS_ACCESS_CLIENT.
321 */
322 v9ses->flags &= ~V9FS_ACL_MASK;
323 }
309 324
310 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0, 325 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0,
311 v9ses->aname); 326 v9ses->aname);
@@ -467,6 +482,63 @@ static void v9fs_sysfs_cleanup(void)
467 kobject_put(v9fs_kobj); 482 kobject_put(v9fs_kobj);
468} 483}
469 484
485static void v9fs_inode_init_once(void *foo)
486{
487 struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
488#ifdef CONFIG_9P_FSCACHE
489 v9inode->fscache = NULL;
490 v9inode->fscache_key = NULL;
491#endif
492 inode_init_once(&v9inode->vfs_inode);
493}
494
495/**
496 * v9fs_init_inode_cache - initialize a cache for 9P
497 * Returns 0 on success.
498 */
499static int v9fs_init_inode_cache(void)
500{
501 v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache",
502 sizeof(struct v9fs_inode),
503 0, (SLAB_RECLAIM_ACCOUNT|
504 SLAB_MEM_SPREAD),
505 v9fs_inode_init_once);
506 if (!v9fs_inode_cache)
507 return -ENOMEM;
508
509 return 0;
510}
511
512/**
513 * v9fs_destroy_inode_cache - destroy the cache of 9P inode
514 *
515 */
516static void v9fs_destroy_inode_cache(void)
517{
518 kmem_cache_destroy(v9fs_inode_cache);
519}
520
521static int v9fs_cache_register(void)
522{
523 int ret;
524 ret = v9fs_init_inode_cache();
525 if (ret < 0)
526 return ret;
527#ifdef CONFIG_9P_FSCACHE
528 return fscache_register_netfs(&v9fs_cache_netfs);
529#else
530 return ret;
531#endif
532}
533
534static void v9fs_cache_unregister(void)
535{
536 v9fs_destroy_inode_cache();
537#ifdef CONFIG_9P_FSCACHE
538 fscache_unregister_netfs(&v9fs_cache_netfs);
539#endif
540}
541
470/** 542/**
471 * init_v9fs - Initialize module 543 * init_v9fs - Initialize module
472 * 544 *
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c4b5d8864f0..bd8496db135 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,9 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#ifndef FS_9P_V9FS_H
24#define FS_9P_V9FS_H
25
23#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
24 27
25/** 28/**
@@ -28,8 +31,10 @@
28 * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions 31 * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions
29 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy 32 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
30 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) 33 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
34 * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client.
31 * @V9FS_ACCESS_ANY: use a single attach for all users 35 * @V9FS_ACCESS_ANY: use a single attach for all users
32 * @V9FS_ACCESS_MASK: bit mask of different ACCESS options 36 * @V9FS_ACCESS_MASK: bit mask of different ACCESS options
37 * @V9FS_POSIX_ACL: POSIX ACLs are enforced
33 * 38 *
34 * Session flags reflect options selected by users at mount time 39 * Session flags reflect options selected by users at mount time
35 */ 40 */
@@ -37,13 +42,15 @@
37 V9FS_ACCESS_USER | \ 42 V9FS_ACCESS_USER | \
38 V9FS_ACCESS_CLIENT) 43 V9FS_ACCESS_CLIENT)
39#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY 44#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
45#define V9FS_ACL_MASK V9FS_POSIX_ACL
40 46
41enum p9_session_flags { 47enum p9_session_flags {
42 V9FS_PROTO_2000U = 0x01, 48 V9FS_PROTO_2000U = 0x01,
43 V9FS_PROTO_2000L = 0x02, 49 V9FS_PROTO_2000L = 0x02,
44 V9FS_ACCESS_SINGLE = 0x04, 50 V9FS_ACCESS_SINGLE = 0x04,
45 V9FS_ACCESS_USER = 0x08, 51 V9FS_ACCESS_USER = 0x08,
46 V9FS_ACCESS_CLIENT = 0x10 52 V9FS_ACCESS_CLIENT = 0x10,
53 V9FS_POSIX_ACL = 0x20
47}; 54};
48 55
49/* possible values of ->cache */ 56/* possible values of ->cache */
@@ -109,8 +116,28 @@ struct v9fs_session_info {
109 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
110 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
111 struct rw_semaphore rename_sem; 118 struct rw_semaphore rename_sem;
119 struct p9_fid *root_fid; /* Used for file system sync */
120};
121
122/* cache_validity flags */
123#define V9FS_INO_INVALID_ATTR 0x01
124
125struct v9fs_inode {
126#ifdef CONFIG_9P_FSCACHE
127 spinlock_t fscache_lock;
128 struct fscache_cookie *fscache;
129 struct p9_qid *fscache_key;
130#endif
131 unsigned int cache_validity;
132 struct p9_fid *writeback_fid;
133 struct inode vfs_inode;
112}; 134};
113 135
136static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
137{
138 return container_of(inode, struct v9fs_inode, vfs_inode);
139}
140
114struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 141struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
115 char *); 142 char *);
116extern void v9fs_session_close(struct v9fs_session_info *v9ses); 143extern void v9fs_session_close(struct v9fs_session_info *v9ses);
@@ -124,16 +151,15 @@ extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
124 struct inode *new_dir, struct dentry *new_dentry); 151 struct inode *new_dir, struct dentry *new_dentry);
125extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, 152extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
126 void *p); 153 void *p);
127extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses, 154extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
128 struct p9_fid *fid, 155 struct p9_fid *fid,
129 struct super_block *sb); 156 struct super_block *sb);
130
131extern const struct inode_operations v9fs_dir_inode_operations_dotl; 157extern const struct inode_operations v9fs_dir_inode_operations_dotl;
132extern const struct inode_operations v9fs_file_inode_operations_dotl; 158extern const struct inode_operations v9fs_file_inode_operations_dotl;
133extern const struct inode_operations v9fs_symlink_inode_operations_dotl; 159extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
134extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses, 160extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
135 struct p9_fid *fid, 161 struct p9_fid *fid,
136 struct super_block *sb); 162 struct super_block *sb);
137 163
138/* other default globals */ 164/* other default globals */
139#define V9FS_PORT 564 165#define V9FS_PORT 564
@@ -158,7 +184,7 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
158} 184}
159 185
160/** 186/**
161 * v9fs_inode_from_fid - Helper routine to populate an inode by 187 * v9fs_get_inode_from_fid - Helper routine to populate an inode by
162 * issuing a attribute request 188 * issuing a attribute request
163 * @v9ses: session information 189 * @v9ses: session information
164 * @fid: fid to issue attribute request for 190 * @fid: fid to issue attribute request for
@@ -166,11 +192,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
166 * 192 *
167 */ 193 */
168static inline struct inode * 194static inline struct inode *
169v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, 195v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
170 struct super_block *sb) 196 struct super_block *sb)
171{ 197{
172 if (v9fs_proto_dotl(v9ses)) 198 if (v9fs_proto_dotl(v9ses))
173 return v9fs_inode_dotl(v9ses, fid, sb); 199 return v9fs_inode_from_fid_dotl(v9ses, fid, sb);
174 else 200 else
175 return v9fs_inode(v9ses, fid, sb); 201 return v9fs_inode_from_fid(v9ses, fid, sb);
176} 202}
203#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b789f8e597e..4014160903a 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -20,6 +20,8 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#ifndef FS_9P_V9FS_VFS_H
24#define FS_9P_V9FS_VFS_H
23 25
24/* plan9 semantics are that created files are implicitly opened. 26/* plan9 semantics are that created files are implicitly opened.
25 * But linux semantics are that you call create, then open. 27 * But linux semantics are that you call create, then open.
@@ -36,6 +38,7 @@
36 * unlink calls remove, which is an implicit clunk. So we have to track 38 * unlink calls remove, which is an implicit clunk. So we have to track
37 * that kind of thing so that we don't try to clunk a dead fid. 39 * that kind of thing so that we don't try to clunk a dead fid.
38 */ 40 */
41#define P9_LOCK_TIMEOUT (30*HZ)
39 42
40extern struct file_system_type v9fs_fs_type; 43extern struct file_system_type v9fs_fs_type;
41extern const struct address_space_operations v9fs_addr_operations; 44extern const struct address_space_operations v9fs_addr_operations;
@@ -45,13 +48,15 @@ extern const struct file_operations v9fs_dir_operations;
45extern const struct file_operations v9fs_dir_operations_dotl; 48extern const struct file_operations v9fs_dir_operations_dotl;
46extern const struct dentry_operations v9fs_dentry_operations; 49extern const struct dentry_operations v9fs_dentry_operations;
47extern const struct dentry_operations v9fs_cached_dentry_operations; 50extern const struct dentry_operations v9fs_cached_dentry_operations;
51extern const struct file_operations v9fs_cached_file_operations;
52extern const struct file_operations v9fs_cached_file_operations_dotl;
53extern struct kmem_cache *v9fs_inode_cache;
48 54
49#ifdef CONFIG_9P_FSCACHE
50struct inode *v9fs_alloc_inode(struct super_block *sb); 55struct inode *v9fs_alloc_inode(struct super_block *sb);
51void v9fs_destroy_inode(struct inode *inode); 56void v9fs_destroy_inode(struct inode *inode);
52#endif
53
54struct inode *v9fs_get_inode(struct super_block *sb, int mode); 57struct inode *v9fs_get_inode(struct super_block *sb, int mode);
58int v9fs_init_inode(struct v9fs_session_info *v9ses,
59 struct inode *inode, int mode);
55void v9fs_evict_inode(struct inode *inode); 60void v9fs_evict_inode(struct inode *inode);
56ino_t v9fs_qid2ino(struct p9_qid *qid); 61ino_t v9fs_qid2ino(struct p9_qid *qid);
57void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -62,8 +67,19 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
62int v9fs_uflags2omode(int uflags, int extended); 67int v9fs_uflags2omode(int uflags, int extended);
63 68
64ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); 69ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
70ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
65void v9fs_blank_wstat(struct p9_wstat *wstat); 71void v9fs_blank_wstat(struct p9_wstat *wstat);
66int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); 72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
67int v9fs_file_fsync_dotl(struct file *filp, int datasync); 73int v9fs_file_fsync_dotl(struct file *filp, int datasync);
68 74ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
69#define P9_LOCK_TIMEOUT (30*HZ) 75 const char __user *, size_t, loff_t *, int);
76int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
77int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
78static inline void v9fs_invalidate_inode_attr(struct inode *inode)
79{
80 struct v9fs_inode *v9inode;
81 v9inode = V9FS_I(inode);
82 v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
83 return;
84}
85#endif
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index b7f2a8e3863..2524e4cbb8e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -39,16 +39,16 @@
39#include "v9fs.h" 39#include "v9fs.h"
40#include "v9fs_vfs.h" 40#include "v9fs_vfs.h"
41#include "cache.h" 41#include "cache.h"
42#include "fid.h"
42 43
43/** 44/**
44 * v9fs_vfs_readpage - read an entire page in from 9P 45 * v9fs_fid_readpage - read an entire page in from 9P
45 * 46 *
46 * @filp: file being read 47 * @fid: fid being read
47 * @page: structure to page 48 * @page: structure to page
48 * 49 *
49 */ 50 */
50 51static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
51static int v9fs_vfs_readpage(struct file *filp, struct page *page)
52{ 52{
53 int retval; 53 int retval;
54 loff_t offset; 54 loff_t offset;
@@ -67,7 +67,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
67 buffer = kmap(page); 67 buffer = kmap(page);
68 offset = page_offset(page); 68 offset = page_offset(page);
69 69
70 retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset); 70 retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset);
71 if (retval < 0) { 71 if (retval < 0) {
72 v9fs_uncache_page(inode, page); 72 v9fs_uncache_page(inode, page);
73 goto done; 73 goto done;
@@ -87,6 +87,19 @@ done:
87} 87}
88 88
89/** 89/**
90 * v9fs_vfs_readpage - read an entire page in from 9P
91 *
92 * @filp: file being read
93 * @page: structure to page
94 *
95 */
96
97static int v9fs_vfs_readpage(struct file *filp, struct page *page)
98{
99 return v9fs_fid_readpage(filp->private_data, page);
100}
101
102/**
90 * v9fs_vfs_readpages - read a set of pages from 9P 103 * v9fs_vfs_readpages - read a set of pages from 9P
91 * 104 *
92 * @filp: file being read 105 * @filp: file being read
@@ -124,7 +137,6 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
124{ 137{
125 if (PagePrivate(page)) 138 if (PagePrivate(page))
126 return 0; 139 return 0;
127
128 return v9fs_fscache_release_page(page, gfp); 140 return v9fs_fscache_release_page(page, gfp);
129} 141}
130 142
@@ -137,20 +149,89 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
137 149
138static void v9fs_invalidate_page(struct page *page, unsigned long offset) 150static void v9fs_invalidate_page(struct page *page, unsigned long offset)
139{ 151{
152 /*
153 * If called with zero offset, we should release
154 * the private state assocated with the page
155 */
140 if (offset == 0) 156 if (offset == 0)
141 v9fs_fscache_invalidate_page(page); 157 v9fs_fscache_invalidate_page(page);
142} 158}
143 159
160static int v9fs_vfs_writepage_locked(struct page *page)
161{
162 char *buffer;
163 int retval, len;
164 loff_t offset, size;
165 mm_segment_t old_fs;
166 struct v9fs_inode *v9inode;
167 struct inode *inode = page->mapping->host;
168
169 v9inode = V9FS_I(inode);
170 size = i_size_read(inode);
171 if (page->index == size >> PAGE_CACHE_SHIFT)
172 len = size & ~PAGE_CACHE_MASK;
173 else
174 len = PAGE_CACHE_SIZE;
175
176 set_page_writeback(page);
177
178 buffer = kmap(page);
179 offset = page_offset(page);
180
181 old_fs = get_fs();
182 set_fs(get_ds());
183 /* We should have writeback_fid always set */
184 BUG_ON(!v9inode->writeback_fid);
185
186 retval = v9fs_file_write_internal(inode,
187 v9inode->writeback_fid,
188 (__force const char __user *)buffer,
189 len, &offset, 0);
190 if (retval > 0)
191 retval = 0;
192
193 set_fs(old_fs);
194 kunmap(page);
195 end_page_writeback(page);
196 return retval;
197}
198
199static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
200{
201 int retval;
202
203 retval = v9fs_vfs_writepage_locked(page);
204 if (retval < 0) {
205 if (retval == -EAGAIN) {
206 redirty_page_for_writepage(wbc, page);
207 retval = 0;
208 } else {
209 SetPageError(page);
210 mapping_set_error(page->mapping, retval);
211 }
212 } else
213 retval = 0;
214
215 unlock_page(page);
216 return retval;
217}
218
144/** 219/**
145 * v9fs_launder_page - Writeback a dirty page 220 * v9fs_launder_page - Writeback a dirty page
146 * Since the writes go directly to the server, we simply return a 0
147 * here to indicate success.
148 *
149 * Returns 0 on success. 221 * Returns 0 on success.
150 */ 222 */
151 223
152static int v9fs_launder_page(struct page *page) 224static int v9fs_launder_page(struct page *page)
153{ 225{
226 int retval;
227 struct inode *inode = page->mapping->host;
228
229 v9fs_fscache_wait_on_page_write(inode, page);
230 if (clear_page_dirty_for_io(page)) {
231 retval = v9fs_vfs_writepage_locked(page);
232 if (retval)
233 return retval;
234 }
154 return 0; 235 return 0;
155} 236}
156 237
@@ -173,9 +254,15 @@ static int v9fs_launder_page(struct page *page)
173 * with an error. 254 * with an error.
174 * 255 *
175 */ 256 */
176ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 257static ssize_t
177 loff_t pos, unsigned long nr_segs) 258v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
259 loff_t pos, unsigned long nr_segs)
178{ 260{
261 /*
262 * FIXME
263 * Now that we do caching with cache mode enabled, We need
264 * to support direct IO
265 */
179 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) " 266 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
180 "off/no(%lld/%lu) EINVAL\n", 267 "off/no(%lld/%lu) EINVAL\n",
181 iocb->ki_filp->f_path.dentry->d_name.name, 268 iocb->ki_filp->f_path.dentry->d_name.name,
@@ -183,11 +270,84 @@ ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
183 270
184 return -EINVAL; 271 return -EINVAL;
185} 272}
273
274static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
275 loff_t pos, unsigned len, unsigned flags,
276 struct page **pagep, void **fsdata)
277{
278 int retval = 0;
279 struct page *page;
280 struct v9fs_inode *v9inode;
281 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
282 struct inode *inode = mapping->host;
283
284 v9inode = V9FS_I(inode);
285start:
286 page = grab_cache_page_write_begin(mapping, index, flags);
287 if (!page) {
288 retval = -ENOMEM;
289 goto out;
290 }
291 BUG_ON(!v9inode->writeback_fid);
292 if (PageUptodate(page))
293 goto out;
294
295 if (len == PAGE_CACHE_SIZE)
296 goto out;
297
298 retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
299 page_cache_release(page);
300 if (!retval)
301 goto start;
302out:
303 *pagep = page;
304 return retval;
305}
306
307static int v9fs_write_end(struct file *filp, struct address_space *mapping,
308 loff_t pos, unsigned len, unsigned copied,
309 struct page *page, void *fsdata)
310{
311 loff_t last_pos = pos + copied;
312 struct inode *inode = page->mapping->host;
313
314 if (unlikely(copied < len)) {
315 /*
316 * zero out the rest of the area
317 */
318 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
319
320 zero_user(page, from + copied, len - copied);
321 flush_dcache_page(page);
322 }
323
324 if (!PageUptodate(page))
325 SetPageUptodate(page);
326 /*
327 * No need to use i_size_read() here, the i_size
328 * cannot change under us because we hold the i_mutex.
329 */
330 if (last_pos > inode->i_size) {
331 inode_add_bytes(inode, last_pos - inode->i_size);
332 i_size_write(inode, last_pos);
333 }
334 set_page_dirty(page);
335 unlock_page(page);
336 page_cache_release(page);
337
338 return copied;
339}
340
341
186const struct address_space_operations v9fs_addr_operations = { 342const struct address_space_operations v9fs_addr_operations = {
187 .readpage = v9fs_vfs_readpage, 343 .readpage = v9fs_vfs_readpage,
188 .readpages = v9fs_vfs_readpages, 344 .readpages = v9fs_vfs_readpages,
189 .releasepage = v9fs_release_page, 345 .set_page_dirty = __set_page_dirty_nobuffers,
190 .invalidatepage = v9fs_invalidate_page, 346 .writepage = v9fs_vfs_writepage,
191 .launder_page = v9fs_launder_page, 347 .write_begin = v9fs_write_begin,
192 .direct_IO = v9fs_direct_IO, 348 .write_end = v9fs_write_end,
349 .releasepage = v9fs_release_page,
350 .invalidatepage = v9fs_invalidate_page,
351 .launder_page = v9fs_launder_page,
352 .direct_IO = v9fs_direct_IO,
193}; 353};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 233b7d4ffe5..b6a3b9f7fe4 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -63,20 +63,15 @@ static int v9fs_dentry_delete(const struct dentry *dentry)
63 * v9fs_cached_dentry_delete - called when dentry refcount equals 0 63 * v9fs_cached_dentry_delete - called when dentry refcount equals 0
64 * @dentry: dentry in question 64 * @dentry: dentry in question
65 * 65 *
66 * Only return 1 if our inode is invalid. Only non-synthetic files
67 * (ones without mtime == 0) should be calling this function.
68 *
69 */ 66 */
70
71static int v9fs_cached_dentry_delete(const struct dentry *dentry) 67static int v9fs_cached_dentry_delete(const struct dentry *dentry)
72{ 68{
73 struct inode *inode = dentry->d_inode; 69 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n",
74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 70 dentry->d_name.name, dentry);
75 dentry);
76 71
77 if(!inode) 72 /* Don't cache negative dentries */
73 if (!dentry->d_inode)
78 return 1; 74 return 1;
79
80 return 0; 75 return 0;
81} 76}
82 77
@@ -105,7 +100,41 @@ static void v9fs_dentry_release(struct dentry *dentry)
105 } 100 }
106} 101}
107 102
103static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
104{
105 struct p9_fid *fid;
106 struct inode *inode;
107 struct v9fs_inode *v9inode;
108
109 if (nd->flags & LOOKUP_RCU)
110 return -ECHILD;
111
112 inode = dentry->d_inode;
113 if (!inode)
114 goto out_valid;
115
116 v9inode = V9FS_I(inode);
117 if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
118 int retval;
119 struct v9fs_session_info *v9ses;
120 fid = v9fs_fid_lookup(dentry);
121 if (IS_ERR(fid))
122 return PTR_ERR(fid);
123
124 v9ses = v9fs_inode2v9ses(inode);
125 if (v9fs_proto_dotl(v9ses))
126 retval = v9fs_refresh_inode_dotl(fid, inode);
127 else
128 retval = v9fs_refresh_inode(fid, inode);
129 if (retval <= 0)
130 return retval;
131 }
132out_valid:
133 return 1;
134}
135
108const struct dentry_operations v9fs_cached_dentry_operations = { 136const struct dentry_operations v9fs_cached_dentry_operations = {
137 .d_revalidate = v9fs_lookup_revalidate,
109 .d_delete = v9fs_cached_dentry_delete, 138 .d_delete = v9fs_cached_dentry_delete,
110 .d_release = v9fs_dentry_release, 139 .d_release = v9fs_dentry_release,
111}; 140};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b84ebe8cefe..9c2bdda5cd9 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -295,7 +295,6 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
295 P9_DPRINTK(P9_DEBUG_VFS, 295 P9_DPRINTK(P9_DEBUG_VFS,
296 "v9fs_dir_release: inode: %p filp: %p fid: %d\n", 296 "v9fs_dir_release: inode: %p filp: %p fid: %d\n",
297 inode, filp, fid ? fid->fid : -1); 297 inode, filp, fid ? fid->fid : -1);
298 filemap_write_and_wait(inode->i_mapping);
299 if (fid) 298 if (fid)
300 p9_client_clunk(fid); 299 p9_client_clunk(fid);
301 return 0; 300 return 0;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 240c3067439..78bcb97c342 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -44,8 +44,7 @@
44#include "fid.h" 44#include "fid.h"
45#include "cache.h" 45#include "cache.h"
46 46
47static const struct file_operations v9fs_cached_file_operations; 47static const struct vm_operations_struct v9fs_file_vm_ops;
48static const struct file_operations v9fs_cached_file_operations_dotl;
49 48
50/** 49/**
51 * v9fs_file_open - open a file (or directory) 50 * v9fs_file_open - open a file (or directory)
@@ -57,11 +56,13 @@ static const struct file_operations v9fs_cached_file_operations_dotl;
57int v9fs_file_open(struct inode *inode, struct file *file) 56int v9fs_file_open(struct inode *inode, struct file *file)
58{ 57{
59 int err; 58 int err;
59 struct v9fs_inode *v9inode;
60 struct v9fs_session_info *v9ses; 60 struct v9fs_session_info *v9ses;
61 struct p9_fid *fid; 61 struct p9_fid *fid;
62 int omode; 62 int omode;
63 63
64 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); 64 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
65 v9inode = V9FS_I(inode);
65 v9ses = v9fs_inode2v9ses(inode); 66 v9ses = v9fs_inode2v9ses(inode);
66 if (v9fs_proto_dotl(v9ses)) 67 if (v9fs_proto_dotl(v9ses))
67 omode = file->f_flags; 68 omode = file->f_flags;
@@ -89,20 +90,30 @@ int v9fs_file_open(struct inode *inode, struct file *file)
89 } 90 }
90 91
91 file->private_data = fid; 92 file->private_data = fid;
92 if ((fid->qid.version) && (v9ses->cache)) { 93 if (v9ses->cache && !v9inode->writeback_fid) {
93 P9_DPRINTK(P9_DEBUG_VFS, "cached"); 94 /*
94 /* enable cached file options */ 95 * clone a fid and add it to writeback_fid
95 if(file->f_op == &v9fs_file_operations) 96 * we do it during open time instead of
96 file->f_op = &v9fs_cached_file_operations; 97 * page dirty time via write_begin/page_mkwrite
97 else if (file->f_op == &v9fs_file_operations_dotl) 98 * because we want write after unlink usecase
98 file->f_op = &v9fs_cached_file_operations_dotl; 99 * to work.
99 100 */
101 fid = v9fs_writeback_fid(file->f_path.dentry);
102 if (IS_ERR(fid)) {
103 err = PTR_ERR(fid);
104 goto out_error;
105 }
106 v9inode->writeback_fid = (void *) fid;
107 }
100#ifdef CONFIG_9P_FSCACHE 108#ifdef CONFIG_9P_FSCACHE
109 if (v9ses->cache)
101 v9fs_cache_inode_set_cookie(inode, file); 110 v9fs_cache_inode_set_cookie(inode, file);
102#endif 111#endif
103 }
104
105 return 0; 112 return 0;
113out_error:
114 p9_client_clunk(file->private_data);
115 file->private_data = NULL;
116 return err;
106} 117}
107 118
108/** 119/**
@@ -335,25 +346,22 @@ out_err:
335} 346}
336 347
337/** 348/**
338 * v9fs_file_readn - read from a file 349 * v9fs_fid_readn - read from a fid
339 * @filp: file pointer to read 350 * @fid: fid to read
340 * @data: data buffer to read data into 351 * @data: data buffer to read data into
341 * @udata: user data buffer to read data into 352 * @udata: user data buffer to read data into
342 * @count: size of buffer 353 * @count: size of buffer
343 * @offset: offset at which to read data 354 * @offset: offset at which to read data
344 * 355 *
345 */ 356 */
346
347ssize_t 357ssize_t
348v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, 358v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
349 u64 offset) 359 u64 offset)
350{ 360{
351 int n, total, size; 361 int n, total, size;
352 struct p9_fid *fid = filp->private_data;
353 362
354 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, 363 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
355 (long long unsigned) offset, count); 364 (long long unsigned) offset, count);
356
357 n = 0; 365 n = 0;
358 total = 0; 366 total = 0;
359 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; 367 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -379,6 +387,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
379} 387}
380 388
381/** 389/**
390 * v9fs_file_readn - read from a file
391 * @filp: file pointer to read
392 * @data: data buffer to read data into
393 * @udata: user data buffer to read data into
394 * @count: size of buffer
395 * @offset: offset at which to read data
396 *
397 */
398ssize_t
399v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
400 u64 offset)
401{
402 return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
403}
404
405/**
382 * v9fs_file_read - read from a file 406 * v9fs_file_read - read from a file
383 * @filp: file pointer to read 407 * @filp: file pointer to read
384 * @udata: user data buffer to read data into 408 * @udata: user data buffer to read data into
@@ -410,45 +434,22 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
410 return ret; 434 return ret;
411} 435}
412 436
413/** 437ssize_t
414 * v9fs_file_write - write to a file 438v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
415 * @filp: file pointer to write 439 const char __user *data, size_t count,
416 * @data: data buffer to write data from 440 loff_t *offset, int invalidate)
417 * @count: size of buffer
418 * @offset: offset at which to write data
419 *
420 */
421
422static ssize_t
423v9fs_file_write(struct file *filp, const char __user * data,
424 size_t count, loff_t * offset)
425{ 441{
426 ssize_t retval;
427 size_t total = 0;
428 int n; 442 int n;
429 struct p9_fid *fid; 443 loff_t i_size;
444 size_t total = 0;
430 struct p9_client *clnt; 445 struct p9_client *clnt;
431 struct inode *inode = filp->f_path.dentry->d_inode;
432 loff_t origin = *offset; 446 loff_t origin = *offset;
433 unsigned long pg_start, pg_end; 447 unsigned long pg_start, pg_end;
434 448
435 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, 449 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
436 (int)count, (int)*offset); 450 (int)count, (int)*offset);
437 451
438 fid = filp->private_data;
439 clnt = fid->clnt; 452 clnt = fid->clnt;
440
441 retval = generic_write_checks(filp, &origin, &count, 0);
442 if (retval)
443 goto out;
444
445 retval = -EINVAL;
446 if ((ssize_t) count < 0)
447 goto out;
448 retval = 0;
449 if (!count)
450 goto out;
451
452 do { 453 do {
453 n = p9_client_write(fid, NULL, data+total, origin+total, count); 454 n = p9_client_write(fid, NULL, data+total, origin+total, count);
454 if (n <= 0) 455 if (n <= 0)
@@ -457,25 +458,60 @@ v9fs_file_write(struct file *filp, const char __user * data,
457 total += n; 458 total += n;
458 } while (count > 0); 459 } while (count > 0);
459 460
460 if (total > 0) { 461 if (invalidate && (total > 0)) {
461 pg_start = origin >> PAGE_CACHE_SHIFT; 462 pg_start = origin >> PAGE_CACHE_SHIFT;
462 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT; 463 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
463 if (inode->i_mapping && inode->i_mapping->nrpages) 464 if (inode->i_mapping && inode->i_mapping->nrpages)
464 invalidate_inode_pages2_range(inode->i_mapping, 465 invalidate_inode_pages2_range(inode->i_mapping,
465 pg_start, pg_end); 466 pg_start, pg_end);
466 *offset += total; 467 *offset += total;
467 i_size_write(inode, i_size_read(inode) + total); 468 i_size = i_size_read(inode);
468 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; 469 if (*offset > i_size) {
470 inode_add_bytes(inode, *offset - i_size);
471 i_size_write(inode, *offset);
472 }
469 } 473 }
470
471 if (n < 0) 474 if (n < 0)
472 retval = n; 475 return n;
473 else 476
474 retval = total; 477 return total;
478}
479
480/**
481 * v9fs_file_write - write to a file
482 * @filp: file pointer to write
483 * @data: data buffer to write data from
484 * @count: size of buffer
485 * @offset: offset at which to write data
486 *
487 */
488static ssize_t
489v9fs_file_write(struct file *filp, const char __user * data,
490 size_t count, loff_t *offset)
491{
492 ssize_t retval = 0;
493 loff_t origin = *offset;
494
495
496 retval = generic_write_checks(filp, &origin, &count, 0);
497 if (retval)
498 goto out;
499
500 retval = -EINVAL;
501 if ((ssize_t) count < 0)
502 goto out;
503 retval = 0;
504 if (!count)
505 goto out;
506
507 return v9fs_file_write_internal(filp->f_path.dentry->d_inode,
508 filp->private_data,
509 data, count, offset, 1);
475out: 510out:
476 return retval; 511 return retval;
477} 512}
478 513
514
479static int v9fs_file_fsync(struct file *filp, int datasync) 515static int v9fs_file_fsync(struct file *filp, int datasync)
480{ 516{
481 struct p9_fid *fid; 517 struct p9_fid *fid;
@@ -505,28 +541,182 @@ int v9fs_file_fsync_dotl(struct file *filp, int datasync)
505 return retval; 541 return retval;
506} 542}
507 543
508static const struct file_operations v9fs_cached_file_operations = { 544static int
545v9fs_file_mmap(struct file *file, struct vm_area_struct *vma)
546{
547 int retval;
548
549 retval = generic_file_mmap(file, vma);
550 if (!retval)
551 vma->vm_ops = &v9fs_file_vm_ops;
552
553 return retval;
554}
555
556static int
557v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
558{
559 struct v9fs_inode *v9inode;
560 struct page *page = vmf->page;
561 struct file *filp = vma->vm_file;
562 struct inode *inode = filp->f_path.dentry->d_inode;
563
564
565 P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n",
566 page, (unsigned long)filp->private_data);
567
568 v9inode = V9FS_I(inode);
569 /* make sure the cache has finished storing the page */
570 v9fs_fscache_wait_on_page_write(inode, page);
571 BUG_ON(!v9inode->writeback_fid);
572 lock_page(page);
573 if (page->mapping != inode->i_mapping)
574 goto out_unlock;
575
576 return VM_FAULT_LOCKED;
577out_unlock:
578 unlock_page(page);
579 return VM_FAULT_NOPAGE;
580}
581
582static ssize_t
583v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
584 loff_t *offsetp)
585{
586 loff_t size, offset;
587 struct inode *inode;
588 struct address_space *mapping;
589
590 offset = *offsetp;
591 mapping = filp->f_mapping;
592 inode = mapping->host;
593 if (!count)
594 return 0;
595 size = i_size_read(inode);
596 if (offset < size)
597 filemap_write_and_wait_range(mapping, offset,
598 offset + count - 1);
599
600 return v9fs_file_read(filp, udata, count, offsetp);
601}
602
603/**
604 * v9fs_cached_file_read - read from a file
605 * @filp: file pointer to read
606 * @udata: user data buffer to read data into
607 * @count: size of buffer
608 * @offset: offset at which to read data
609 *
610 */
611static ssize_t
612v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
613 loff_t *offset)
614{
615 if (filp->f_flags & O_DIRECT)
616 return v9fs_direct_read(filp, data, count, offset);
617 return do_sync_read(filp, data, count, offset);
618}
619
620static ssize_t
621v9fs_direct_write(struct file *filp, const char __user * data,
622 size_t count, loff_t *offsetp)
623{
624 loff_t offset;
625 ssize_t retval;
626 struct inode *inode;
627 struct address_space *mapping;
628
629 offset = *offsetp;
630 mapping = filp->f_mapping;
631 inode = mapping->host;
632 if (!count)
633 return 0;
634
635 mutex_lock(&inode->i_mutex);
636 retval = filemap_write_and_wait_range(mapping, offset,
637 offset + count - 1);
638 if (retval)
639 goto err_out;
640 /*
641 * After a write we want buffered reads to be sure to go to disk to get
642 * the new data. We invalidate clean cached page from the region we're
643 * about to write. We do this *before* the write so that if we fail
644 * here we fall back to buffered write
645 */
646 if (mapping->nrpages) {
647 pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
648 pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
649
650 retval = invalidate_inode_pages2_range(mapping,
651 pg_start, pg_end);
652 /*
653 * If a page can not be invalidated, fall back
654 * to buffered write.
655 */
656 if (retval) {
657 if (retval == -EBUSY)
658 goto buff_write;
659 goto err_out;
660 }
661 }
662 retval = v9fs_file_write(filp, data, count, offsetp);
663err_out:
664 mutex_unlock(&inode->i_mutex);
665 return retval;
666
667buff_write:
668 mutex_unlock(&inode->i_mutex);
669 return do_sync_write(filp, data, count, offsetp);
670}
671
672/**
673 * v9fs_cached_file_write - write to a file
674 * @filp: file pointer to write
675 * @data: data buffer to write data from
676 * @count: size of buffer
677 * @offset: offset at which to write data
678 *
679 */
680static ssize_t
681v9fs_cached_file_write(struct file *filp, const char __user * data,
682 size_t count, loff_t *offset)
683{
684
685 if (filp->f_flags & O_DIRECT)
686 return v9fs_direct_write(filp, data, count, offset);
687 return do_sync_write(filp, data, count, offset);
688}
689
690static const struct vm_operations_struct v9fs_file_vm_ops = {
691 .fault = filemap_fault,
692 .page_mkwrite = v9fs_vm_page_mkwrite,
693};
694
695
696const struct file_operations v9fs_cached_file_operations = {
509 .llseek = generic_file_llseek, 697 .llseek = generic_file_llseek,
510 .read = do_sync_read, 698 .read = v9fs_cached_file_read,
699 .write = v9fs_cached_file_write,
511 .aio_read = generic_file_aio_read, 700 .aio_read = generic_file_aio_read,
512 .write = v9fs_file_write, 701 .aio_write = generic_file_aio_write,
513 .open = v9fs_file_open, 702 .open = v9fs_file_open,
514 .release = v9fs_dir_release, 703 .release = v9fs_dir_release,
515 .lock = v9fs_file_lock, 704 .lock = v9fs_file_lock,
516 .mmap = generic_file_readonly_mmap, 705 .mmap = v9fs_file_mmap,
517 .fsync = v9fs_file_fsync, 706 .fsync = v9fs_file_fsync,
518}; 707};
519 708
520static const struct file_operations v9fs_cached_file_operations_dotl = { 709const struct file_operations v9fs_cached_file_operations_dotl = {
521 .llseek = generic_file_llseek, 710 .llseek = generic_file_llseek,
522 .read = do_sync_read, 711 .read = v9fs_cached_file_read,
712 .write = v9fs_cached_file_write,
523 .aio_read = generic_file_aio_read, 713 .aio_read = generic_file_aio_read,
524 .write = v9fs_file_write, 714 .aio_write = generic_file_aio_write,
525 .open = v9fs_file_open, 715 .open = v9fs_file_open,
526 .release = v9fs_dir_release, 716 .release = v9fs_dir_release,
527 .lock = v9fs_file_lock_dotl, 717 .lock = v9fs_file_lock_dotl,
528 .flock = v9fs_file_flock_dotl, 718 .flock = v9fs_file_flock_dotl,
529 .mmap = generic_file_readonly_mmap, 719 .mmap = v9fs_file_mmap,
530 .fsync = v9fs_file_fsync_dotl, 720 .fsync = v9fs_file_fsync_dotl,
531}; 721};
532 722
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b76a40bdf4c..8a2c232f708 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -203,26 +203,25 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
203 wstat->extension = NULL; 203 wstat->extension = NULL;
204} 204}
205 205
206#ifdef CONFIG_9P_FSCACHE
207/** 206/**
208 * v9fs_alloc_inode - helper function to allocate an inode 207 * v9fs_alloc_inode - helper function to allocate an inode
209 * This callback is executed before setting up the inode so that we
210 * can associate a vcookie with each inode.
211 * 208 *
212 */ 209 */
213
214struct inode *v9fs_alloc_inode(struct super_block *sb) 210struct inode *v9fs_alloc_inode(struct super_block *sb)
215{ 211{
216 struct v9fs_cookie *vcookie; 212 struct v9fs_inode *v9inode;
217 vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache, 213 v9inode = (struct v9fs_inode *)kmem_cache_alloc(v9fs_inode_cache,
218 GFP_KERNEL); 214 GFP_KERNEL);
219 if (!vcookie) 215 if (!v9inode)
220 return NULL; 216 return NULL;
221 217#ifdef CONFIG_9P_FSCACHE
222 vcookie->fscache = NULL; 218 v9inode->fscache = NULL;
223 vcookie->qid = NULL; 219 v9inode->fscache_key = NULL;
224 spin_lock_init(&vcookie->lock); 220 spin_lock_init(&v9inode->fscache_lock);
225 return &vcookie->inode; 221#endif
222 v9inode->writeback_fid = NULL;
223 v9inode->cache_validity = 0;
224 return &v9inode->vfs_inode;
226} 225}
227 226
228/** 227/**
@@ -234,35 +233,18 @@ static void v9fs_i_callback(struct rcu_head *head)
234{ 233{
235 struct inode *inode = container_of(head, struct inode, i_rcu); 234 struct inode *inode = container_of(head, struct inode, i_rcu);
236 INIT_LIST_HEAD(&inode->i_dentry); 235 INIT_LIST_HEAD(&inode->i_dentry);
237 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); 236 kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
238} 237}
239 238
240void v9fs_destroy_inode(struct inode *inode) 239void v9fs_destroy_inode(struct inode *inode)
241{ 240{
242 call_rcu(&inode->i_rcu, v9fs_i_callback); 241 call_rcu(&inode->i_rcu, v9fs_i_callback);
243} 242}
244#endif
245 243
246/** 244int v9fs_init_inode(struct v9fs_session_info *v9ses,
247 * v9fs_get_inode - helper function to setup an inode 245 struct inode *inode, int mode)
248 * @sb: superblock
249 * @mode: mode to setup inode with
250 *
251 */
252
253struct inode *v9fs_get_inode(struct super_block *sb, int mode)
254{ 246{
255 int err; 247 int err = 0;
256 struct inode *inode;
257 struct v9fs_session_info *v9ses = sb->s_fs_info;
258
259 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
260
261 inode = new_inode(sb);
262 if (!inode) {
263 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
264 return ERR_PTR(-ENOMEM);
265 }
266 248
267 inode_init_owner(inode, NULL, mode); 249 inode_init_owner(inode, NULL, mode);
268 inode->i_blocks = 0; 250 inode->i_blocks = 0;
@@ -292,14 +274,20 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
292 case S_IFREG: 274 case S_IFREG:
293 if (v9fs_proto_dotl(v9ses)) { 275 if (v9fs_proto_dotl(v9ses)) {
294 inode->i_op = &v9fs_file_inode_operations_dotl; 276 inode->i_op = &v9fs_file_inode_operations_dotl;
295 inode->i_fop = &v9fs_file_operations_dotl; 277 if (v9ses->cache)
278 inode->i_fop =
279 &v9fs_cached_file_operations_dotl;
280 else
281 inode->i_fop = &v9fs_file_operations_dotl;
296 } else { 282 } else {
297 inode->i_op = &v9fs_file_inode_operations; 283 inode->i_op = &v9fs_file_inode_operations;
298 inode->i_fop = &v9fs_file_operations; 284 if (v9ses->cache)
285 inode->i_fop = &v9fs_cached_file_operations;
286 else
287 inode->i_fop = &v9fs_file_operations;
299 } 288 }
300 289
301 break; 290 break;
302
303 case S_IFLNK: 291 case S_IFLNK:
304 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { 292 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
305 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " 293 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with "
@@ -335,12 +323,37 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
335 err = -EINVAL; 323 err = -EINVAL;
336 goto error; 324 goto error;
337 } 325 }
326error:
327 return err;
338 328
339 return inode; 329}
340 330
341error: 331/**
342 iput(inode); 332 * v9fs_get_inode - helper function to setup an inode
343 return ERR_PTR(err); 333 * @sb: superblock
334 * @mode: mode to setup inode with
335 *
336 */
337
338struct inode *v9fs_get_inode(struct super_block *sb, int mode)
339{
340 int err;
341 struct inode *inode;
342 struct v9fs_session_info *v9ses = sb->s_fs_info;
343
344 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
345
346 inode = new_inode(sb);
347 if (!inode) {
348 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
349 return ERR_PTR(-ENOMEM);
350 }
351 err = v9fs_init_inode(v9ses, inode, mode);
352 if (err) {
353 iput(inode);
354 return ERR_PTR(err);
355 }
356 return inode;
344} 357}
345 358
346/* 359/*
@@ -403,6 +416,8 @@ error:
403 */ 416 */
404void v9fs_evict_inode(struct inode *inode) 417void v9fs_evict_inode(struct inode *inode)
405{ 418{
419 struct v9fs_inode *v9inode = V9FS_I(inode);
420
406 truncate_inode_pages(inode->i_mapping, 0); 421 truncate_inode_pages(inode->i_mapping, 0);
407 end_writeback(inode); 422 end_writeback(inode);
408 filemap_fdatawrite(inode->i_mapping); 423 filemap_fdatawrite(inode->i_mapping);
@@ -410,41 +425,67 @@ void v9fs_evict_inode(struct inode *inode)
410#ifdef CONFIG_9P_FSCACHE 425#ifdef CONFIG_9P_FSCACHE
411 v9fs_cache_inode_put_cookie(inode); 426 v9fs_cache_inode_put_cookie(inode);
412#endif 427#endif
428 /* clunk the fid stashed in writeback_fid */
429 if (v9inode->writeback_fid) {
430 p9_client_clunk(v9inode->writeback_fid);
431 v9inode->writeback_fid = NULL;
432 }
413} 433}
414 434
415struct inode * 435static struct inode *v9fs_qid_iget(struct super_block *sb,
416v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid, 436 struct p9_qid *qid,
417 struct super_block *sb) 437 struct p9_wstat *st)
418{ 438{
419 int err, umode; 439 int retval, umode;
420 struct inode *ret = NULL; 440 unsigned long i_ino;
421 struct p9_wstat *st; 441 struct inode *inode;
422 442 struct v9fs_session_info *v9ses = sb->s_fs_info;
423 st = p9_client_stat(fid);
424 if (IS_ERR(st))
425 return ERR_CAST(st);
426 443
444 i_ino = v9fs_qid2ino(qid);
445 inode = iget_locked(sb, i_ino);
446 if (!inode)
447 return ERR_PTR(-ENOMEM);
448 if (!(inode->i_state & I_NEW))
449 return inode;
450 /*
451 * initialize the inode with the stat info
452 * FIXME!! we may need support for stale inodes
453 * later.
454 */
427 umode = p9mode2unixmode(v9ses, st->mode); 455 umode = p9mode2unixmode(v9ses, st->mode);
428 ret = v9fs_get_inode(sb, umode); 456 retval = v9fs_init_inode(v9ses, inode, umode);
429 if (IS_ERR(ret)) { 457 if (retval)
430 err = PTR_ERR(ret);
431 goto error; 458 goto error;
432 }
433
434 v9fs_stat2inode(st, ret, sb);
435 ret->i_ino = v9fs_qid2ino(&st->qid);
436 459
460 v9fs_stat2inode(st, inode, sb);
437#ifdef CONFIG_9P_FSCACHE 461#ifdef CONFIG_9P_FSCACHE
438 v9fs_vcookie_set_qid(ret, &st->qid); 462 v9fs_fscache_set_key(inode, &st->qid);
439 v9fs_cache_inode_get_cookie(ret); 463 v9fs_cache_inode_get_cookie(inode);
440#endif 464#endif
441 p9stat_free(st); 465 unlock_new_inode(inode);
442 kfree(st); 466 return inode;
443 return ret;
444error: 467error:
468 unlock_new_inode(inode);
469 iput(inode);
470 return ERR_PTR(retval);
471
472}
473
474struct inode *
475v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
476 struct super_block *sb)
477{
478 struct p9_wstat *st;
479 struct inode *inode = NULL;
480
481 st = p9_client_stat(fid);
482 if (IS_ERR(st))
483 return ERR_CAST(st);
484
485 inode = v9fs_qid_iget(sb, &st->qid, st);
445 p9stat_free(st); 486 p9stat_free(st);
446 kfree(st); 487 kfree(st);
447 return ERR_PTR(err); 488 return inode;
448} 489}
449 490
450/** 491/**
@@ -458,8 +499,8 @@ error:
458static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) 499static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
459{ 500{
460 int retval; 501 int retval;
461 struct inode *file_inode;
462 struct p9_fid *v9fid; 502 struct p9_fid *v9fid;
503 struct inode *file_inode;
463 504
464 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, 505 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
465 rmdir); 506 rmdir);
@@ -470,8 +511,20 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
470 return PTR_ERR(v9fid); 511 return PTR_ERR(v9fid);
471 512
472 retval = p9_client_remove(v9fid); 513 retval = p9_client_remove(v9fid);
473 if (!retval) 514 if (!retval) {
474 drop_nlink(file_inode); 515 /*
516 * directories on unlink should have zero
517 * link count
518 */
519 if (rmdir) {
520 clear_nlink(file_inode);
521 drop_nlink(dir);
522 } else
523 drop_nlink(file_inode);
524
525 v9fs_invalidate_inode_attr(file_inode);
526 v9fs_invalidate_inode_attr(dir);
527 }
475 return retval; 528 return retval;
476} 529}
477 530
@@ -531,7 +584,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
531 } 584 }
532 585
533 /* instantiate inode and assign the unopened fid to the dentry */ 586 /* instantiate inode and assign the unopened fid to the dentry */
534 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 587 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
535 if (IS_ERR(inode)) { 588 if (IS_ERR(inode)) {
536 err = PTR_ERR(inode); 589 err = PTR_ERR(inode);
537 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 590 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -570,9 +623,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
570 int err; 623 int err;
571 u32 perm; 624 u32 perm;
572 int flags; 625 int flags;
573 struct v9fs_session_info *v9ses;
574 struct p9_fid *fid;
575 struct file *filp; 626 struct file *filp;
627 struct v9fs_inode *v9inode;
628 struct v9fs_session_info *v9ses;
629 struct p9_fid *fid, *inode_fid;
576 630
577 err = 0; 631 err = 0;
578 fid = NULL; 632 fid = NULL;
@@ -592,8 +646,25 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
592 goto error; 646 goto error;
593 } 647 }
594 648
649 v9fs_invalidate_inode_attr(dir);
595 /* if we are opening a file, assign the open fid to the file */ 650 /* if we are opening a file, assign the open fid to the file */
596 if (nd && nd->flags & LOOKUP_OPEN) { 651 if (nd && nd->flags & LOOKUP_OPEN) {
652 v9inode = V9FS_I(dentry->d_inode);
653 if (v9ses->cache && !v9inode->writeback_fid) {
654 /*
655 * clone a fid and add it to writeback_fid
656 * we do it during open time instead of
657 * page dirty time via write_begin/page_mkwrite
658 * because we want write after unlink usecase
659 * to work.
660 */
661 inode_fid = v9fs_writeback_fid(dentry);
662 if (IS_ERR(inode_fid)) {
663 err = PTR_ERR(inode_fid);
664 goto error;
665 }
666 v9inode->writeback_fid = (void *) inode_fid;
667 }
597 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 668 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
598 if (IS_ERR(filp)) { 669 if (IS_ERR(filp)) {
599 err = PTR_ERR(filp); 670 err = PTR_ERR(filp);
@@ -601,6 +672,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
601 } 672 }
602 673
603 filp->private_data = fid; 674 filp->private_data = fid;
675#ifdef CONFIG_9P_FSCACHE
676 if (v9ses->cache)
677 v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
678#endif
604 } else 679 } else
605 p9_client_clunk(fid); 680 p9_client_clunk(fid);
606 681
@@ -625,8 +700,8 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
625{ 700{
626 int err; 701 int err;
627 u32 perm; 702 u32 perm;
628 struct v9fs_session_info *v9ses;
629 struct p9_fid *fid; 703 struct p9_fid *fid;
704 struct v9fs_session_info *v9ses;
630 705
631 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); 706 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
632 err = 0; 707 err = 0;
@@ -636,6 +711,9 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
636 if (IS_ERR(fid)) { 711 if (IS_ERR(fid)) {
637 err = PTR_ERR(fid); 712 err = PTR_ERR(fid);
638 fid = NULL; 713 fid = NULL;
714 } else {
715 inc_nlink(dir);
716 v9fs_invalidate_inode_attr(dir);
639 } 717 }
640 718
641 if (fid) 719 if (fid)
@@ -687,7 +765,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
687 return ERR_PTR(result); 765 return ERR_PTR(result);
688 } 766 }
689 767
690 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 768 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
691 if (IS_ERR(inode)) { 769 if (IS_ERR(inode)) {
692 result = PTR_ERR(inode); 770 result = PTR_ERR(inode);
693 inode = NULL; 771 inode = NULL;
@@ -747,17 +825,19 @@ int
747v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 825v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
748 struct inode *new_dir, struct dentry *new_dentry) 826 struct inode *new_dir, struct dentry *new_dentry)
749{ 827{
828 int retval;
750 struct inode *old_inode; 829 struct inode *old_inode;
830 struct inode *new_inode;
751 struct v9fs_session_info *v9ses; 831 struct v9fs_session_info *v9ses;
752 struct p9_fid *oldfid; 832 struct p9_fid *oldfid;
753 struct p9_fid *olddirfid; 833 struct p9_fid *olddirfid;
754 struct p9_fid *newdirfid; 834 struct p9_fid *newdirfid;
755 struct p9_wstat wstat; 835 struct p9_wstat wstat;
756 int retval;
757 836
758 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 837 P9_DPRINTK(P9_DEBUG_VFS, "\n");
759 retval = 0; 838 retval = 0;
760 old_inode = old_dentry->d_inode; 839 old_inode = old_dentry->d_inode;
840 new_inode = new_dentry->d_inode;
761 v9ses = v9fs_inode2v9ses(old_inode); 841 v9ses = v9fs_inode2v9ses(old_inode);
762 oldfid = v9fs_fid_lookup(old_dentry); 842 oldfid = v9fs_fid_lookup(old_dentry);
763 if (IS_ERR(oldfid)) 843 if (IS_ERR(oldfid))
@@ -798,9 +878,30 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
798 retval = p9_client_wstat(oldfid, &wstat); 878 retval = p9_client_wstat(oldfid, &wstat);
799 879
800clunk_newdir: 880clunk_newdir:
801 if (!retval) 881 if (!retval) {
882 if (new_inode) {
883 if (S_ISDIR(new_inode->i_mode))
884 clear_nlink(new_inode);
885 else
886 drop_nlink(new_inode);
887 /*
888 * Work around vfs rename rehash bug with
889 * FS_RENAME_DOES_D_MOVE
890 */
891 v9fs_invalidate_inode_attr(new_inode);
892 }
893 if (S_ISDIR(old_inode->i_mode)) {
894 if (!new_inode)
895 inc_nlink(new_dir);
896 drop_nlink(old_dir);
897 }
898 v9fs_invalidate_inode_attr(old_inode);
899 v9fs_invalidate_inode_attr(old_dir);
900 v9fs_invalidate_inode_attr(new_dir);
901
802 /* successful rename */ 902 /* successful rename */
803 d_move(old_dentry, new_dentry); 903 d_move(old_dentry, new_dentry);
904 }
804 up_write(&v9ses->rename_sem); 905 up_write(&v9ses->rename_sem);
805 p9_client_clunk(newdirfid); 906 p9_client_clunk(newdirfid);
806 907
@@ -831,9 +932,10 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
831 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 932 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
832 err = -EPERM; 933 err = -EPERM;
833 v9ses = v9fs_inode2v9ses(dentry->d_inode); 934 v9ses = v9fs_inode2v9ses(dentry->d_inode);
834 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 935 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
835 return simple_getattr(mnt, dentry, stat); 936 generic_fillattr(dentry->d_inode, stat);
836 937 return 0;
938 }
837 fid = v9fs_fid_lookup(dentry); 939 fid = v9fs_fid_lookup(dentry);
838 if (IS_ERR(fid)) 940 if (IS_ERR(fid))
839 return PTR_ERR(fid); 941 return PTR_ERR(fid);
@@ -891,17 +993,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
891 if (iattr->ia_valid & ATTR_GID) 993 if (iattr->ia_valid & ATTR_GID)
892 wstat.n_gid = iattr->ia_gid; 994 wstat.n_gid = iattr->ia_gid;
893 } 995 }
894
895 retval = p9_client_wstat(fid, &wstat);
896 if (retval < 0)
897 return retval;
898
899 if ((iattr->ia_valid & ATTR_SIZE) && 996 if ((iattr->ia_valid & ATTR_SIZE) &&
900 iattr->ia_size != i_size_read(dentry->d_inode)) { 997 iattr->ia_size != i_size_read(dentry->d_inode)) {
901 retval = vmtruncate(dentry->d_inode, iattr->ia_size); 998 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
902 if (retval) 999 if (retval)
903 return retval; 1000 return retval;
904 } 1001 }
1002 /* Write all dirty data */
1003 if (S_ISREG(dentry->d_inode->i_mode))
1004 filemap_write_and_wait(dentry->d_inode->i_mapping);
1005
1006 retval = p9_client_wstat(fid, &wstat);
1007 if (retval < 0)
1008 return retval;
1009 v9fs_invalidate_inode_attr(dentry->d_inode);
905 1010
906 setattr_copy(dentry->d_inode, iattr); 1011 setattr_copy(dentry->d_inode, iattr);
907 mark_inode_dirty(dentry->d_inode); 1012 mark_inode_dirty(dentry->d_inode);
@@ -924,6 +1029,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
924 char tag_name[14]; 1029 char tag_name[14];
925 unsigned int i_nlink; 1030 unsigned int i_nlink;
926 struct v9fs_session_info *v9ses = sb->s_fs_info; 1031 struct v9fs_session_info *v9ses = sb->s_fs_info;
1032 struct v9fs_inode *v9inode = V9FS_I(inode);
927 1033
928 inode->i_nlink = 1; 1034 inode->i_nlink = 1;
929 1035
@@ -983,6 +1089,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
983 1089
984 /* not real number of blocks, but 512 byte ones ... */ 1090 /* not real number of blocks, but 512 byte ones ... */
985 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; 1091 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
1092 v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
986} 1093}
987 1094
988/** 1095/**
@@ -1115,8 +1222,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1115 int mode, const char *extension) 1222 int mode, const char *extension)
1116{ 1223{
1117 u32 perm; 1224 u32 perm;
1118 struct v9fs_session_info *v9ses;
1119 struct p9_fid *fid; 1225 struct p9_fid *fid;
1226 struct v9fs_session_info *v9ses;
1120 1227
1121 v9ses = v9fs_inode2v9ses(dir); 1228 v9ses = v9fs_inode2v9ses(dir);
1122 if (!v9fs_proto_dotu(v9ses)) { 1229 if (!v9fs_proto_dotu(v9ses)) {
@@ -1130,6 +1237,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1130 if (IS_ERR(fid)) 1237 if (IS_ERR(fid))
1131 return PTR_ERR(fid); 1238 return PTR_ERR(fid);
1132 1239
1240 v9fs_invalidate_inode_attr(dir);
1133 p9_client_clunk(fid); 1241 p9_client_clunk(fid);
1134 return 0; 1242 return 0;
1135} 1243}
@@ -1166,8 +1274,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1166 struct dentry *dentry) 1274 struct dentry *dentry)
1167{ 1275{
1168 int retval; 1276 int retval;
1169 struct p9_fid *oldfid;
1170 char *name; 1277 char *name;
1278 struct p9_fid *oldfid;
1171 1279
1172 P9_DPRINTK(P9_DEBUG_VFS, 1280 P9_DPRINTK(P9_DEBUG_VFS,
1173 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1281 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
@@ -1186,7 +1294,10 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1186 sprintf(name, "%d\n", oldfid->fid); 1294 sprintf(name, "%d\n", oldfid->fid);
1187 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); 1295 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
1188 __putname(name); 1296 __putname(name);
1189 1297 if (!retval) {
1298 v9fs_refresh_inode(oldfid, old_dentry->d_inode);
1299 v9fs_invalidate_inode_attr(dir);
1300 }
1190clunk_fid: 1301clunk_fid:
1191 p9_client_clunk(oldfid); 1302 p9_client_clunk(oldfid);
1192 return retval; 1303 return retval;
@@ -1237,6 +1348,32 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1237 return retval; 1348 return retval;
1238} 1349}
1239 1350
1351int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
1352{
1353 loff_t i_size;
1354 struct p9_wstat *st;
1355 struct v9fs_session_info *v9ses;
1356
1357 v9ses = v9fs_inode2v9ses(inode);
1358 st = p9_client_stat(fid);
1359 if (IS_ERR(st))
1360 return PTR_ERR(st);
1361
1362 spin_lock(&inode->i_lock);
1363 /*
1364 * We don't want to refresh inode->i_size,
1365 * because we may have cached data
1366 */
1367 i_size = inode->i_size;
1368 v9fs_stat2inode(st, inode, inode->i_sb);
1369 if (v9ses->cache)
1370 inode->i_size = i_size;
1371 spin_unlock(&inode->i_lock);
1372 p9stat_free(st);
1373 kfree(st);
1374 return 0;
1375}
1376
1240static const struct inode_operations v9fs_dir_inode_operations_dotu = { 1377static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1241 .create = v9fs_vfs_create, 1378 .create = v9fs_vfs_create,
1242 .lookup = v9fs_vfs_lookup, 1379 .lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index fe3ffa9aace..67c138e94fe 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,40 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
86 return dentry; 86 return dentry;
87} 87}
88 88
89static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
90 struct p9_qid *qid,
91 struct p9_fid *fid,
92 struct p9_stat_dotl *st)
93{
94 int retval;
95 unsigned long i_ino;
96 struct inode *inode;
97 struct v9fs_session_info *v9ses = sb->s_fs_info;
98
99 i_ino = v9fs_qid2ino(qid);
100 inode = iget_locked(sb, i_ino);
101 if (!inode)
102 return ERR_PTR(-ENOMEM);
103 if (!(inode->i_state & I_NEW))
104 return inode;
105 /*
106 * initialize the inode with the stat info
107 * FIXME!! we may need support for stale inodes
108 * later.
109 */
110 retval = v9fs_init_inode(v9ses, inode, st->st_mode);
111 if (retval)
112 goto error;
113
114 v9fs_stat2inode_dotl(st, inode);
115#ifdef CONFIG_9P_FSCACHE
116 v9fs_fscache_set_key(inode, &st->qid);
117 v9fs_cache_inode_get_cookie(inode);
118#endif
119 retval = v9fs_get_acl(inode, fid);
120 if (retval)
121 goto error;
122
123 unlock_new_inode(inode);
124 return inode;
125error:
126 unlock_new_inode(inode);
127 iput(inode);
128 return ERR_PTR(retval);
129
130}
131
89struct inode * 132struct inode *
90v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, 133v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
91 struct super_block *sb) 134 struct super_block *sb)
92{ 135{
93 struct inode *ret = NULL;
94 int err;
95 struct p9_stat_dotl *st; 136 struct p9_stat_dotl *st;
137 struct inode *inode = NULL;
96 138
97 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 139 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
98 if (IS_ERR(st)) 140 if (IS_ERR(st))
99 return ERR_CAST(st); 141 return ERR_CAST(st);
100 142
101 ret = v9fs_get_inode(sb, st->st_mode); 143 inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st);
102 if (IS_ERR(ret)) {
103 err = PTR_ERR(ret);
104 goto error;
105 }
106
107 v9fs_stat2inode_dotl(st, ret);
108 ret->i_ino = v9fs_qid2ino(&st->qid);
109#ifdef CONFIG_9P_FSCACHE
110 v9fs_vcookie_set_qid(ret, &st->qid);
111 v9fs_cache_inode_get_cookie(ret);
112#endif
113 err = v9fs_get_acl(ret, fid);
114 if (err) {
115 iput(ret);
116 goto error;
117 }
118 kfree(st);
119 return ret;
120error:
121 kfree(st); 144 kfree(st);
122 return ERR_PTR(err); 145 return inode;
123} 146}
124 147
125/** 148/**
@@ -136,16 +159,17 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
136 struct nameidata *nd) 159 struct nameidata *nd)
137{ 160{
138 int err = 0; 161 int err = 0;
139 char *name = NULL;
140 gid_t gid; 162 gid_t gid;
141 int flags; 163 int flags;
142 mode_t mode; 164 mode_t mode;
143 struct v9fs_session_info *v9ses; 165 char *name = NULL;
144 struct p9_fid *fid = NULL;
145 struct p9_fid *dfid, *ofid;
146 struct file *filp; 166 struct file *filp;
147 struct p9_qid qid; 167 struct p9_qid qid;
148 struct inode *inode; 168 struct inode *inode;
169 struct p9_fid *fid = NULL;
170 struct v9fs_inode *v9inode;
171 struct p9_fid *dfid, *ofid, *inode_fid;
172 struct v9fs_session_info *v9ses;
149 struct posix_acl *pacl = NULL, *dacl = NULL; 173 struct posix_acl *pacl = NULL, *dacl = NULL;
150 174
151 v9ses = v9fs_inode2v9ses(dir); 175 v9ses = v9fs_inode2v9ses(dir);
@@ -196,6 +220,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
196 err); 220 err);
197 goto error; 221 goto error;
198 } 222 }
223 v9fs_invalidate_inode_attr(dir);
199 224
200 /* instantiate inode and assign the unopened fid to the dentry */ 225 /* instantiate inode and assign the unopened fid to the dentry */
201 fid = p9_client_walk(dfid, 1, &name, 1); 226 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -205,7 +230,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
205 fid = NULL; 230 fid = NULL;
206 goto error; 231 goto error;
207 } 232 }
208 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 233 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
209 if (IS_ERR(inode)) { 234 if (IS_ERR(inode)) {
210 err = PTR_ERR(inode); 235 err = PTR_ERR(inode);
211 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 236 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -219,6 +244,22 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
219 /* Now set the ACL based on the default value */ 244 /* Now set the ACL based on the default value */
220 v9fs_set_create_acl(dentry, dacl, pacl); 245 v9fs_set_create_acl(dentry, dacl, pacl);
221 246
247 v9inode = V9FS_I(inode);
248 if (v9ses->cache && !v9inode->writeback_fid) {
249 /*
250 * clone a fid and add it to writeback_fid
251 * we do it during open time instead of
252 * page dirty time via write_begin/page_mkwrite
253 * because we want write after unlink usecase
254 * to work.
255 */
256 inode_fid = v9fs_writeback_fid(dentry);
257 if (IS_ERR(inode_fid)) {
258 err = PTR_ERR(inode_fid);
259 goto error;
260 }
261 v9inode->writeback_fid = (void *) inode_fid;
262 }
222 /* Since we are opening a file, assign the open fid to the file */ 263 /* Since we are opening a file, assign the open fid to the file */
223 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 264 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
224 if (IS_ERR(filp)) { 265 if (IS_ERR(filp)) {
@@ -226,6 +267,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
226 return PTR_ERR(filp); 267 return PTR_ERR(filp);
227 } 268 }
228 filp->private_data = ofid; 269 filp->private_data = ofid;
270#ifdef CONFIG_9P_FSCACHE
271 if (v9ses->cache)
272 v9fs_cache_inode_set_cookie(inode, filp);
273#endif
229 return 0; 274 return 0;
230 275
231error: 276error:
@@ -300,7 +345,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
300 goto error; 345 goto error;
301 } 346 }
302 347
303 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 348 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
304 if (IS_ERR(inode)) { 349 if (IS_ERR(inode)) {
305 err = PTR_ERR(inode); 350 err = PTR_ERR(inode);
306 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 351 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -327,7 +372,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
327 } 372 }
328 /* Now set the ACL based on the default value */ 373 /* Now set the ACL based on the default value */
329 v9fs_set_create_acl(dentry, dacl, pacl); 374 v9fs_set_create_acl(dentry, dacl, pacl);
330 375 inc_nlink(dir);
376 v9fs_invalidate_inode_attr(dir);
331error: 377error:
332 if (fid) 378 if (fid)
333 p9_client_clunk(fid); 379 p9_client_clunk(fid);
@@ -346,9 +392,10 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
346 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 392 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
347 err = -EPERM; 393 err = -EPERM;
348 v9ses = v9fs_inode2v9ses(dentry->d_inode); 394 v9ses = v9fs_inode2v9ses(dentry->d_inode);
349 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 395 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
350 return simple_getattr(mnt, dentry, stat); 396 generic_fillattr(dentry->d_inode, stat);
351 397 return 0;
398 }
352 fid = v9fs_fid_lookup(dentry); 399 fid = v9fs_fid_lookup(dentry);
353 if (IS_ERR(fid)) 400 if (IS_ERR(fid))
354 return PTR_ERR(fid); 401 return PTR_ERR(fid);
@@ -406,16 +453,20 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
406 if (IS_ERR(fid)) 453 if (IS_ERR(fid))
407 return PTR_ERR(fid); 454 return PTR_ERR(fid);
408 455
409 retval = p9_client_setattr(fid, &p9attr);
410 if (retval < 0)
411 return retval;
412
413 if ((iattr->ia_valid & ATTR_SIZE) && 456 if ((iattr->ia_valid & ATTR_SIZE) &&
414 iattr->ia_size != i_size_read(dentry->d_inode)) { 457 iattr->ia_size != i_size_read(dentry->d_inode)) {
415 retval = vmtruncate(dentry->d_inode, iattr->ia_size); 458 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
416 if (retval) 459 if (retval)
417 return retval; 460 return retval;
418 } 461 }
462 /* Write all dirty data */
463 if (S_ISREG(dentry->d_inode->i_mode))
464 filemap_write_and_wait(dentry->d_inode->i_mapping);
465
466 retval = p9_client_setattr(fid, &p9attr);
467 if (retval < 0)
468 return retval;
469 v9fs_invalidate_inode_attr(dentry->d_inode);
419 470
420 setattr_copy(dentry->d_inode, iattr); 471 setattr_copy(dentry->d_inode, iattr);
421 mark_inode_dirty(dentry->d_inode); 472 mark_inode_dirty(dentry->d_inode);
@@ -439,6 +490,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
439void 490void
440v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) 491v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
441{ 492{
493 struct v9fs_inode *v9inode = V9FS_I(inode);
442 494
443 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { 495 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
444 inode->i_atime.tv_sec = stat->st_atime_sec; 496 inode->i_atime.tv_sec = stat->st_atime_sec;
@@ -497,20 +549,21 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
497 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION 549 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
498 * because the inode structure does not have fields for them. 550 * because the inode structure does not have fields for them.
499 */ 551 */
552 v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
500} 553}
501 554
502static int 555static int
503v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, 556v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
504 const char *symname) 557 const char *symname)
505{ 558{
506 struct v9fs_session_info *v9ses;
507 struct p9_fid *dfid;
508 struct p9_fid *fid = NULL;
509 struct inode *inode;
510 struct p9_qid qid;
511 char *name;
512 int err; 559 int err;
513 gid_t gid; 560 gid_t gid;
561 char *name;
562 struct p9_qid qid;
563 struct inode *inode;
564 struct p9_fid *dfid;
565 struct p9_fid *fid = NULL;
566 struct v9fs_session_info *v9ses;
514 567
515 name = (char *) dentry->d_name.name; 568 name = (char *) dentry->d_name.name;
516 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", 569 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
@@ -534,6 +587,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
534 goto error; 587 goto error;
535 } 588 }
536 589
590 v9fs_invalidate_inode_attr(dir);
537 if (v9ses->cache) { 591 if (v9ses->cache) {
538 /* Now walk from the parent so we can get an unopened fid. */ 592 /* Now walk from the parent so we can get an unopened fid. */
539 fid = p9_client_walk(dfid, 1, &name, 1); 593 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -546,7 +600,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
546 } 600 }
547 601
548 /* instantiate inode and assign the unopened fid to dentry */ 602 /* instantiate inode and assign the unopened fid to dentry */
549 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 603 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
550 if (IS_ERR(inode)) { 604 if (IS_ERR(inode)) {
551 err = PTR_ERR(inode); 605 err = PTR_ERR(inode);
552 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 606 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -588,10 +642,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
588 struct dentry *dentry) 642 struct dentry *dentry)
589{ 643{
590 int err; 644 int err;
591 struct p9_fid *dfid, *oldfid;
592 char *name; 645 char *name;
593 struct v9fs_session_info *v9ses;
594 struct dentry *dir_dentry; 646 struct dentry *dir_dentry;
647 struct p9_fid *dfid, *oldfid;
648 struct v9fs_session_info *v9ses;
595 649
596 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", 650 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
597 dir->i_ino, old_dentry->d_name.name, 651 dir->i_ino, old_dentry->d_name.name,
@@ -616,29 +670,17 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
616 return err; 670 return err;
617 } 671 }
618 672
673 v9fs_invalidate_inode_attr(dir);
619 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 674 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
620 /* Get the latest stat info from server. */ 675 /* Get the latest stat info from server. */
621 struct p9_fid *fid; 676 struct p9_fid *fid;
622 struct p9_stat_dotl *st;
623
624 fid = v9fs_fid_lookup(old_dentry); 677 fid = v9fs_fid_lookup(old_dentry);
625 if (IS_ERR(fid)) 678 if (IS_ERR(fid))
626 return PTR_ERR(fid); 679 return PTR_ERR(fid);
627 680
628 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 681 v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
629 if (IS_ERR(st))
630 return PTR_ERR(st);
631
632 v9fs_stat2inode_dotl(st, old_dentry->d_inode);
633
634 kfree(st);
635 } else {
636 /* Caching disabled. No need to get upto date stat info.
637 * This dentry will be released immediately. So, just hold the
638 * inode
639 */
640 ihold(old_dentry->d_inode);
641 } 682 }
683 ihold(old_dentry->d_inode);
642 d_instantiate(dentry, old_dentry->d_inode); 684 d_instantiate(dentry, old_dentry->d_inode);
643 685
644 return err; 686 return err;
@@ -657,12 +699,12 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
657 dev_t rdev) 699 dev_t rdev)
658{ 700{
659 int err; 701 int err;
702 gid_t gid;
660 char *name; 703 char *name;
661 mode_t mode; 704 mode_t mode;
662 struct v9fs_session_info *v9ses; 705 struct v9fs_session_info *v9ses;
663 struct p9_fid *fid = NULL, *dfid = NULL; 706 struct p9_fid *fid = NULL, *dfid = NULL;
664 struct inode *inode; 707 struct inode *inode;
665 gid_t gid;
666 struct p9_qid qid; 708 struct p9_qid qid;
667 struct dentry *dir_dentry; 709 struct dentry *dir_dentry;
668 struct posix_acl *dacl = NULL, *pacl = NULL; 710 struct posix_acl *dacl = NULL, *pacl = NULL;
@@ -699,6 +741,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
699 if (err < 0) 741 if (err < 0)
700 goto error; 742 goto error;
701 743
744 v9fs_invalidate_inode_attr(dir);
702 /* instantiate inode and assign the unopened fid to the dentry */ 745 /* instantiate inode and assign the unopened fid to the dentry */
703 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 746 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
704 fid = p9_client_walk(dfid, 1, &name, 1); 747 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -710,7 +753,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
710 goto error; 753 goto error;
711 } 754 }
712 755
713 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 756 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
714 if (IS_ERR(inode)) { 757 if (IS_ERR(inode)) {
715 err = PTR_ERR(inode); 758 err = PTR_ERR(inode);
716 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 759 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -782,6 +825,31 @@ ndset:
782 return NULL; 825 return NULL;
783} 826}
784 827
828int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
829{
830 loff_t i_size;
831 struct p9_stat_dotl *st;
832 struct v9fs_session_info *v9ses;
833
834 v9ses = v9fs_inode2v9ses(inode);
835 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
836 if (IS_ERR(st))
837 return PTR_ERR(st);
838
839 spin_lock(&inode->i_lock);
840 /*
841 * We don't want to refresh inode->i_size,
842 * because we may have cached data
843 */
844 i_size = inode->i_size;
845 v9fs_stat2inode_dotl(st, inode);
846 if (v9ses->cache)
847 inode->i_size = i_size;
848 spin_unlock(&inode->i_lock);
849 kfree(st);
850 return 0;
851}
852
785const struct inode_operations v9fs_dir_inode_operations_dotl = { 853const struct inode_operations v9fs_dir_inode_operations_dotl = {
786 .create = v9fs_vfs_create_dotl, 854 .create = v9fs_vfs_create_dotl,
787 .lookup = v9fs_vfs_lookup, 855 .lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index dbaabe3b813..09fd08d1606 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -86,12 +86,15 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
86 } else 86 } else
87 sb->s_op = &v9fs_super_ops; 87 sb->s_op = &v9fs_super_ops;
88 sb->s_bdi = &v9ses->bdi; 88 sb->s_bdi = &v9ses->bdi;
89 if (v9ses->cache)
90 sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
89 91
90 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 92 sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
91 MS_NOATIME; 93 if (!v9ses->cache)
94 sb->s_flags |= MS_SYNCHRONOUS;
92 95
93#ifdef CONFIG_9P_FS_POSIX_ACL 96#ifdef CONFIG_9P_FS_POSIX_ACL
94 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT) 97 if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
95 sb->s_flags |= MS_POSIXACL; 98 sb->s_flags |= MS_POSIXACL;
96#endif 99#endif
97 100
@@ -151,7 +154,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
151 retval = PTR_ERR(inode); 154 retval = PTR_ERR(inode);
152 goto release_sb; 155 goto release_sb;
153 } 156 }
154
155 root = d_alloc_root(inode); 157 root = d_alloc_root(inode);
156 if (!root) { 158 if (!root) {
157 iput(inode); 159 iput(inode);
@@ -166,7 +168,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
166 retval = PTR_ERR(st); 168 retval = PTR_ERR(st);
167 goto release_sb; 169 goto release_sb;
168 } 170 }
169 171 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
170 v9fs_stat2inode_dotl(st, root->d_inode); 172 v9fs_stat2inode_dotl(st, root->d_inode);
171 kfree(st); 173 kfree(st);
172 } else { 174 } else {
@@ -183,10 +185,21 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
183 p9stat_free(st); 185 p9stat_free(st);
184 kfree(st); 186 kfree(st);
185 } 187 }
188 v9fs_fid_add(root, fid);
186 retval = v9fs_get_acl(inode, fid); 189 retval = v9fs_get_acl(inode, fid);
187 if (retval) 190 if (retval)
188 goto release_sb; 191 goto release_sb;
189 v9fs_fid_add(root, fid); 192 /*
193 * Add the root fid to session info. This is used
194 * for file system sync. We want a cloned fid here
195 * so that we can do a sync_filesystem after a
196 * shrink_dcache_for_umount
197 */
198 v9ses->root_fid = v9fs_fid_clone(root);
199 if (IS_ERR(v9ses->root_fid)) {
200 retval = PTR_ERR(v9ses->root_fid);
201 goto release_sb;
202 }
190 203
191 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 204 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
192 return dget(sb->s_root); 205 return dget(sb->s_root);
@@ -197,15 +210,11 @@ close_session:
197 v9fs_session_close(v9ses); 210 v9fs_session_close(v9ses);
198 kfree(v9ses); 211 kfree(v9ses);
199 return ERR_PTR(retval); 212 return ERR_PTR(retval);
200
201release_sb: 213release_sb:
202 /* 214 /*
203 * we will do the session_close and root dentry release 215 * we will do the session_close and root dentry
204 * in the below call. But we need to clunk fid, because we haven't 216 * release in the below call.
205 * attached the fid to dentry so it won't get clunked
206 * automatically.
207 */ 217 */
208 p9_client_clunk(fid);
209 deactivate_locked_super(sb); 218 deactivate_locked_super(sb);
210 return ERR_PTR(retval); 219 return ERR_PTR(retval);
211} 220}
@@ -223,7 +232,7 @@ static void v9fs_kill_super(struct super_block *s)
223 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 232 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
224 233
225 kill_anon_super(s); 234 kill_anon_super(s);
226 235 p9_client_clunk(v9ses->root_fid);
227 v9fs_session_cancel(v9ses); 236 v9fs_session_cancel(v9ses);
228 v9fs_session_close(v9ses); 237 v9fs_session_close(v9ses);
229 kfree(v9ses); 238 kfree(v9ses);
@@ -276,11 +285,31 @@ done:
276 return res; 285 return res;
277} 286}
278 287
288static int v9fs_sync_fs(struct super_block *sb, int wait)
289{
290 struct v9fs_session_info *v9ses = sb->s_fs_info;
291
292 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
293 return p9_client_sync_fs(v9ses->root_fid);
294}
295
296static int v9fs_drop_inode(struct inode *inode)
297{
298 struct v9fs_session_info *v9ses;
299 v9ses = v9fs_inode2v9ses(inode);
300 if (v9ses->cache)
301 return generic_drop_inode(inode);
302 /*
303 * in case of non cached mode always drop the
304 * the inode because we want the inode attribute
305 * to always match that on the server.
306 */
307 return 1;
308}
309
279static const struct super_operations v9fs_super_ops = { 310static const struct super_operations v9fs_super_ops = {
280#ifdef CONFIG_9P_FSCACHE
281 .alloc_inode = v9fs_alloc_inode, 311 .alloc_inode = v9fs_alloc_inode,
282 .destroy_inode = v9fs_destroy_inode, 312 .destroy_inode = v9fs_destroy_inode,
283#endif
284 .statfs = simple_statfs, 313 .statfs = simple_statfs,
285 .evict_inode = v9fs_evict_inode, 314 .evict_inode = v9fs_evict_inode,
286 .show_options = generic_show_options, 315 .show_options = generic_show_options,
@@ -288,11 +317,11 @@ static const struct super_operations v9fs_super_ops = {
288}; 317};
289 318
290static const struct super_operations v9fs_super_ops_dotl = { 319static const struct super_operations v9fs_super_ops_dotl = {
291#ifdef CONFIG_9P_FSCACHE
292 .alloc_inode = v9fs_alloc_inode, 320 .alloc_inode = v9fs_alloc_inode,
293 .destroy_inode = v9fs_destroy_inode, 321 .destroy_inode = v9fs_destroy_inode,
294#endif 322 .sync_fs = v9fs_sync_fs,
295 .statfs = v9fs_statfs, 323 .statfs = v9fs_statfs,
324 .drop_inode = v9fs_drop_inode,
296 .evict_inode = v9fs_evict_inode, 325 .evict_inode = v9fs_evict_inode,
297 .show_options = generic_show_options, 326 .show_options = generic_show_options,
298 .umount_begin = v9fs_umount_begin, 327 .umount_begin = v9fs_umount_begin,
@@ -303,5 +332,5 @@ struct file_system_type v9fs_fs_type = {
303 .mount = v9fs_mount, 332 .mount = v9fs_mount,
304 .kill_sb = v9fs_kill_super, 333 .kill_sb = v9fs_kill_super,
305 .owner = THIS_MODULE, 334 .owner = THIS_MODULE,
306 .fs_flags = FS_RENAME_DOES_D_MOVE, 335 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT,
307}; 336};
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57ed..7cb53aafac1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
47 def_bool n 47 def_bool n
48 48
49config EXPORTFS 49config EXPORTFS
50 tristate 50 bool
51 51
52config FILE_LOCKING 52config FILE_LOCKING
53 bool "Enable POSIX file locking API" if EXPERT 53 bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c..ba01202844c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
48obj-$(CONFIG_NFS_COMMON) += nfs_common/ 48obj-$(CONFIG_NFS_COMMON) += nfs_common/
49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
50 50
51obj-$(CONFIG_FHANDLE) += fhandle.o
52
51obj-y += quota/ 53obj-y += quota/
52 54
53obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/aio.c b/fs/aio.c
index 26869cde395..7f54f43b8f7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -85,7 +85,7 @@ static int __init aio_setup(void)
85 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 85 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
87 87
88 aio_wq = create_workqueue("aio"); 88 aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); 89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
90 BUG_ON(!aio_wq || !abe_pool); 90 BUG_ON(!aio_wq || !abe_pool);
91 91
@@ -577,7 +577,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
577 spin_lock(&fput_lock); 577 spin_lock(&fput_lock);
578 list_add(&req->ki_list, &fput_head); 578 list_add(&req->ki_list, &fput_head);
579 spin_unlock(&fput_lock); 579 spin_unlock(&fput_lock);
580 queue_work(aio_wq, &fput_work); 580 schedule_work(&fput_work);
581 } else { 581 } else {
582 req->ki_filp = NULL; 582 req->ki_filp = NULL;
583 really_put_req(ctx, req); 583 really_put_req(ctx, req);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b..b4ffad859ad 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
21 int len = *max_len; 21 int len = *max_len;
22 int type; 22 int type;
23 23
24 if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || 24 if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
25 (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) 25 *max_len = BTRFS_FID_SIZE_CONNECTABLE;
26 return 255; 26 return 255;
27 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
28 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 return 255;
30 }
27 31
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 93323ac26b0..512c3d1da08 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4807,9 +4807,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4807 int err; 4807 int err;
4808 int drop_inode = 0; 4808 int drop_inode = 0;
4809 4809
4810 if (inode->i_nlink == 0)
4811 return -ENOENT;
4812
4813 /* do not allow sys_link's with other subvols of the same device */ 4810 /* do not allow sys_link's with other subvols of the same device */
4814 if (root->objectid != BTRFS_I(inode)->root->objectid) 4811 if (root->objectid != BTRFS_I(inode)->root->objectid)
4815 return -EPERM; 4812 return -EPERM;
diff --git a/fs/compat.c b/fs/compat.c
index 691c3fd8ce1..c6d31a3bab8 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
262 */ 262 */
263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) 263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
264{ 264{
265 struct path path; 265 struct kstatfs tmp;
266 int error; 266 int error = user_statfs(pathname, &tmp);
267 267 if (!error)
268 error = user_path(pathname, &path); 268 error = put_compat_statfs(buf, &tmp);
269 if (!error) {
270 struct kstatfs tmp;
271 error = vfs_statfs(&path, &tmp);
272 if (!error)
273 error = put_compat_statfs(buf, &tmp);
274 path_put(&path);
275 }
276 return error; 269 return error;
277} 270}
278 271
279asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) 272asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
280{ 273{
281 struct file * file;
282 struct kstatfs tmp; 274 struct kstatfs tmp;
283 int error; 275 int error = fd_statfs(fd, &tmp);
284
285 error = -EBADF;
286 file = fget(fd);
287 if (!file)
288 goto out;
289 error = vfs_statfs(&file->f_path, &tmp);
290 if (!error) 276 if (!error)
291 error = put_compat_statfs(buf, &tmp); 277 error = put_compat_statfs(buf, &tmp);
292 fput(file);
293out:
294 return error; 278 return error;
295} 279}
296 280
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
329 313
330asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) 314asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
331{ 315{
332 struct path path; 316 struct kstatfs tmp;
333 int error; 317 int error;
334 318
335 if (sz != sizeof(*buf)) 319 if (sz != sizeof(*buf))
336 return -EINVAL; 320 return -EINVAL;
337 321
338 error = user_path(pathname, &path); 322 error = user_statfs(pathname, &tmp);
339 if (!error) { 323 if (!error)
340 struct kstatfs tmp; 324 error = put_compat_statfs64(buf, &tmp);
341 error = vfs_statfs(&path, &tmp);
342 if (!error)
343 error = put_compat_statfs64(buf, &tmp);
344 path_put(&path);
345 }
346 return error; 325 return error;
347} 326}
348 327
349asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 328asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
350{ 329{
351 struct file * file;
352 struct kstatfs tmp; 330 struct kstatfs tmp;
353 int error; 331 int error;
354 332
355 if (sz != sizeof(*buf)) 333 if (sz != sizeof(*buf))
356 return -EINVAL; 334 return -EINVAL;
357 335
358 error = -EBADF; 336 error = fd_statfs(fd, &tmp);
359 file = fget(fd);
360 if (!file)
361 goto out;
362 error = vfs_statfs(&file->f_path, &tmp);
363 if (!error) 337 if (!error)
364 error = put_compat_statfs64(buf, &tmp); 338 error = put_compat_statfs64(buf, &tmp);
365 fput(file);
366out:
367 return error; 339 return error;
368} 340}
369 341
@@ -2312,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
2312} 2284}
2313 2285
2314#endif /* CONFIG_TIMERFD */ 2286#endif /* CONFIG_TIMERFD */
2287
2288#ifdef CONFIG_FHANDLE
2289/*
2290 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
2291 * doesn't set the O_LARGEFILE flag.
2292 */
2293asmlinkage long
2294compat_sys_open_by_handle_at(int mountdirfd,
2295 struct file_handle __user *handle, int flags)
2296{
2297 return do_handle_open(mountdirfd, handle, flags);
2298}
2299#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 611ffe928c0..a39fe47c466 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
296 __releases(parent->d_lock) 296 __releases(parent->d_lock)
297 __releases(dentry->d_inode->i_lock) 297 __releases(dentry->d_inode->i_lock)
298{ 298{
299 dentry->d_parent = NULL;
300 list_del(&dentry->d_u.d_child); 299 list_del(&dentry->d_u.d_child);
300 /*
301 * Inform try_to_ascend() that we are no longer attached to the
302 * dentry tree
303 */
304 dentry->d_flags |= DCACHE_DISCONNECTED;
301 if (parent) 305 if (parent)
302 spin_unlock(&parent->d_lock); 306 spin_unlock(&parent->d_lock);
303 dentry_iput(dentry); 307 dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
1012} 1016}
1013 1017
1014/* 1018/*
1019 * This tries to ascend one level of parenthood, but
1020 * we can race with renaming, so we need to re-check
1021 * the parenthood after dropping the lock and check
1022 * that the sequence number still matches.
1023 */
1024static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1025{
1026 struct dentry *new = old->d_parent;
1027
1028 rcu_read_lock();
1029 spin_unlock(&old->d_lock);
1030 spin_lock(&new->d_lock);
1031
1032 /*
1033 * might go back up the wrong parent if we have had a rename
1034 * or deletion
1035 */
1036 if (new != old->d_parent ||
1037 (old->d_flags & DCACHE_DISCONNECTED) ||
1038 (!locked && read_seqretry(&rename_lock, seq))) {
1039 spin_unlock(&new->d_lock);
1040 new = NULL;
1041 }
1042 rcu_read_unlock();
1043 return new;
1044}
1045
1046
1047/*
1015 * Search for at least 1 mount point in the dentry's subdirs. 1048 * Search for at least 1 mount point in the dentry's subdirs.
1016 * We descend to the next level whenever the d_subdirs 1049 * We descend to the next level whenever the d_subdirs
1017 * list is non-empty and continue searching. 1050 * list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
1066 * All done at this level ... ascend and resume the search. 1099 * All done at this level ... ascend and resume the search.
1067 */ 1100 */
1068 if (this_parent != parent) { 1101 if (this_parent != parent) {
1069 struct dentry *tmp; 1102 struct dentry *child = this_parent;
1070 struct dentry *child; 1103 this_parent = try_to_ascend(this_parent, locked, seq);
1071 1104 if (!this_parent)
1072 tmp = this_parent->d_parent;
1073 rcu_read_lock();
1074 spin_unlock(&this_parent->d_lock);
1075 child = this_parent;
1076 this_parent = tmp;
1077 spin_lock(&this_parent->d_lock);
1078 /* might go back up the wrong parent if we have had a rename
1079 * or deletion */
1080 if (this_parent != child->d_parent ||
1081 (!locked && read_seqretry(&rename_lock, seq))) {
1082 spin_unlock(&this_parent->d_lock);
1083 rcu_read_unlock();
1084 goto rename_retry; 1105 goto rename_retry;
1085 }
1086 rcu_read_unlock();
1087 next = child->d_u.d_child.next; 1106 next = child->d_u.d_child.next;
1088 goto resume; 1107 goto resume;
1089 } 1108 }
@@ -1181,24 +1200,10 @@ resume:
1181 * All done at this level ... ascend and resume the search. 1200 * All done at this level ... ascend and resume the search.
1182 */ 1201 */
1183 if (this_parent != parent) { 1202 if (this_parent != parent) {
1184 struct dentry *tmp; 1203 struct dentry *child = this_parent;
1185 struct dentry *child; 1204 this_parent = try_to_ascend(this_parent, locked, seq);
1186 1205 if (!this_parent)
1187 tmp = this_parent->d_parent;
1188 rcu_read_lock();
1189 spin_unlock(&this_parent->d_lock);
1190 child = this_parent;
1191 this_parent = tmp;
1192 spin_lock(&this_parent->d_lock);
1193 /* might go back up the wrong parent if we have had a rename
1194 * or deletion */
1195 if (this_parent != child->d_parent ||
1196 (!locked && read_seqretry(&rename_lock, seq))) {
1197 spin_unlock(&this_parent->d_lock);
1198 rcu_read_unlock();
1199 goto rename_retry; 1206 goto rename_retry;
1200 }
1201 rcu_read_unlock();
1202 next = child->d_u.d_child.next; 1207 next = child->d_u.d_child.next;
1203 goto resume; 1208 goto resume;
1204 } 1209 }
@@ -2942,28 +2947,14 @@ resume:
2942 spin_unlock(&dentry->d_lock); 2947 spin_unlock(&dentry->d_lock);
2943 } 2948 }
2944 if (this_parent != root) { 2949 if (this_parent != root) {
2945 struct dentry *tmp; 2950 struct dentry *child = this_parent;
2946 struct dentry *child;
2947
2948 tmp = this_parent->d_parent;
2949 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2951 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2950 this_parent->d_flags |= DCACHE_GENOCIDE; 2952 this_parent->d_flags |= DCACHE_GENOCIDE;
2951 this_parent->d_count--; 2953 this_parent->d_count--;
2952 } 2954 }
2953 rcu_read_lock(); 2955 this_parent = try_to_ascend(this_parent, locked, seq);
2954 spin_unlock(&this_parent->d_lock); 2956 if (!this_parent)
2955 child = this_parent;
2956 this_parent = tmp;
2957 spin_lock(&this_parent->d_lock);
2958 /* might go back up the wrong parent if we have had a rename
2959 * or deletion */
2960 if (this_parent != child->d_parent ||
2961 (!locked && read_seqretry(&rename_lock, seq))) {
2962 spin_unlock(&this_parent->d_lock);
2963 rcu_read_unlock();
2964 goto rename_retry; 2957 goto rename_retry;
2965 }
2966 rcu_read_unlock();
2967 next = child->d_u.d_child.next; 2958 next = child->d_u.d_child.next;
2968 goto resume; 2959 goto resume;
2969 } 2960 }
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6a..ba99e1abb1a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
115 struct file *file; 115 struct file *file;
116 char *tmp = getname(library); 116 char *tmp = getname(library);
117 int error = PTR_ERR(tmp); 117 int error = PTR_ERR(tmp);
118 static const struct open_flags uselib_flags = {
119 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
120 .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
121 .intent = LOOKUP_OPEN
122 };
118 123
119 if (IS_ERR(tmp)) 124 if (IS_ERR(tmp))
120 goto out; 125 goto out;
121 126
122 file = do_filp_open(AT_FDCWD, tmp, 127 file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 128 putname(tmp);
126 error = PTR_ERR(file); 129 error = PTR_ERR(file);
127 if (IS_ERR(file)) 130 if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
721{ 724{
722 struct file *file; 725 struct file *file;
723 int err; 726 int err;
727 static const struct open_flags open_exec_flags = {
728 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
729 .acc_mode = MAY_EXEC | MAY_OPEN,
730 .intent = LOOKUP_OPEN
731 };
724 732
725 file = do_filp_open(AT_FDCWD, name, 733 file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 734 if (IS_ERR(file))
729 goto out; 735 goto out;
730 736
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd..b05acb79613 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
320 struct inode * inode = dentry->d_inode; 320 struct inode * inode = dentry->d_inode;
321 int len = *max_len; 321 int len = *max_len;
322 int type = FILEID_INO32_GEN; 322 int type = FILEID_INO32_GEN;
323 323
324 if (len < 2 || (connectable && len < 4)) 324 if (connectable && (len < 4)) {
325 *max_len = 4;
326 return 255;
327 } else if (len < 2) {
328 *max_len = 2;
325 return 255; 329 return 255;
330 }
326 331
327 len = 2; 332 len = 2;
328 fid->i32.ino = inode->i_ino; 333 fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
369 /* 374 /*
370 * Try to get any dentry for the given file handle from the filesystem. 375 * Try to get any dentry for the given file handle from the filesystem.
371 */ 376 */
377 if (!nop || !nop->fh_to_dentry)
378 return ERR_PTR(-ESTALE);
372 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 379 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
373 if (!result) 380 if (!result)
374 result = ERR_PTR(-ESTALE); 381 result = ERR_PTR(-ESTALE);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 9dba3bd69d9..0521a007ae6 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
2253 2253
2254 dquot_initialize(dir); 2254 dquot_initialize(dir);
2255 2255
2256 /*
2257 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2258 * otherwise has the potential to corrupt the orphan inode list.
2259 */
2260 if (inode->i_nlink == 0)
2261 return -ENOENT;
2262
2263retry: 2256retry:
2264 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2265 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2258 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f247..9cc19a1dea8 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1936 sb->s_qcop = &ext3_qctl_operations; 1936 sb->s_qcop = &ext3_qctl_operations;
1937 sb->dq_op = &ext3_quota_operations; 1937 sb->dq_op = &ext3_quota_operations;
1938#endif 1938#endif
1939 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1939 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1940 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1940 mutex_init(&sbi->s_orphan_lock); 1941 mutex_init(&sbi->s_orphan_lock);
1941 mutex_init(&sbi->s_resize_lock); 1942 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c..e781b7ea563 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
2304 2304
2305 dquot_initialize(dir); 2305 dquot_initialize(dir);
2306 2306
2307 /*
2308 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2309 * otherwise has the potential to corrupt the orphan inode list.
2310 */
2311 if (inode->i_nlink == 0)
2312 return -ENOENT;
2313
2314retry: 2307retry:
2315 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2308 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2316 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2309 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b..203f9e4a70b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3415 sb->s_qcop = &ext4_qctl_operations; 3415 sb->s_qcop = &ext4_qctl_operations;
3416 sb->dq_op = &ext4_quota_operations; 3416 sb->dq_op = &ext4_quota_operations;
3417#endif 3417#endif
3418 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3419
3418 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3420 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3419 mutex_init(&sbi->s_orphan_lock); 3421 mutex_init(&sbi->s_orphan_lock);
3420 mutex_init(&sbi->s_resize_lock); 3422 mutex_init(&sbi->s_resize_lock);
@@ -3509,7 +3511,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3509 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); 3511 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
3510 3512
3511no_journal: 3513no_journal:
3512 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 3514 /*
3515 * The maximum number of concurrent works can be high and
3516 * concurrency isn't really necessary. Limit it to 1.
3517 */
3518 EXT4_SB(sb)->dio_unwritten_wq =
3519 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
3513 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3520 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3514 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3521 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3515 goto failed_mount_wq; 3522 goto failed_mount_wq;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd..0e277ec4b61 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
757 struct inode *inode = de->d_inode; 757 struct inode *inode = de->d_inode;
758 u32 ipos_h, ipos_m, ipos_l; 758 u32 ipos_h, ipos_m, ipos_l;
759 759
760 if (len < 5) 760 if (len < 5) {
761 *lenp = 5;
761 return 255; /* no room */ 762 return 255; /* no room */
763 }
762 764
763 ipos_h = MSDOS_I(inode)->i_pos >> 8; 765 ipos_h = MSDOS_I(inode)->i_pos >> 8;
764 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; 766 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bd..6c82e5bac03 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
131SYSCALL_DEFINE1(dup, unsigned int, fildes) 131SYSCALL_DEFINE1(dup, unsigned int, fildes)
132{ 132{
133 int ret = -EBADF; 133 int ret = -EBADF;
134 struct file *file = fget(fildes); 134 struct file *file = fget_raw(fildes);
135 135
136 if (file) { 136 if (file) {
137 ret = get_unused_fd(); 137 ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
426 return err; 426 return err;
427} 427}
428 428
429static int check_fcntl_cmd(unsigned cmd)
430{
431 switch (cmd) {
432 case F_DUPFD:
433 case F_DUPFD_CLOEXEC:
434 case F_GETFD:
435 case F_SETFD:
436 case F_GETFL:
437 return 1;
438 }
439 return 0;
440}
441
429SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 442SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
430{ 443{
431 struct file *filp; 444 struct file *filp;
432 long err = -EBADF; 445 long err = -EBADF;
433 446
434 filp = fget(fd); 447 filp = fget_raw(fd);
435 if (!filp) 448 if (!filp)
436 goto out; 449 goto out;
437 450
451 if (unlikely(filp->f_mode & FMODE_PATH)) {
452 if (!check_fcntl_cmd(cmd)) {
453 fput(filp);
454 goto out;
455 }
456 }
457
438 err = security_file_fcntl(filp, cmd, arg); 458 err = security_file_fcntl(filp, cmd, arg);
439 if (err) { 459 if (err) {
440 fput(filp); 460 fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
456 long err; 476 long err;
457 477
458 err = -EBADF; 478 err = -EBADF;
459 filp = fget(fd); 479 filp = fget_raw(fd);
460 if (!filp) 480 if (!filp)
461 goto out; 481 goto out;
462 482
483 if (unlikely(filp->f_mode & FMODE_PATH)) {
484 if (!check_fcntl_cmd(cmd)) {
485 fput(filp);
486 goto out;
487 }
488 }
489
463 err = security_file_fcntl(filp, cmd, arg); 490 err = security_file_fcntl(filp, cmd, arg);
464 if (err) { 491 if (err) {
465 fput(filp); 492 fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 835 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
809 * is defined as O_NONBLOCK on some platforms and not on others. 836 * is defined as O_NONBLOCK on some platforms and not on others.
810 */ 837 */
811 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 838 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
812 O_RDONLY | O_WRONLY | O_RDWR | 839 O_RDONLY | O_WRONLY | O_RDWR |
813 O_CREAT | O_EXCL | O_NOCTTY | 840 O_CREAT | O_EXCL | O_NOCTTY |
814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 841 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
815 __O_SYNC | O_DSYNC | FASYNC | 842 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 843 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 844 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 __FMODE_EXEC 845 __FMODE_EXEC | O_PATH
819 )); 846 ));
820 847
821 fasync_cache = kmem_cache_create("fasync_cache", 848 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 00000000000..bf93ad2bee0
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
1#include <linux/syscalls.h>
2#include <linux/slab.h>
3#include <linux/fs.h>
4#include <linux/file.h>
5#include <linux/mount.h>
6#include <linux/namei.h>
7#include <linux/exportfs.h>
8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h>
10#include <asm/uaccess.h>
11#include "internal.h"
12
13static long do_sys_name_to_handle(struct path *path,
14 struct file_handle __user *ufh,
15 int __user *mnt_id)
16{
17 long retval;
18 struct file_handle f_handle;
19 int handle_dwords, handle_bytes;
20 struct file_handle *handle = NULL;
21
22 /*
23 * We need t make sure wether the file system
24 * support decoding of the file handle
25 */
26 if (!path->mnt->mnt_sb->s_export_op ||
27 !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
28 return -EOPNOTSUPP;
29
30 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
31 return -EFAULT;
32
33 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
34 return -EINVAL;
35
36 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
37 GFP_KERNEL);
38 if (!handle)
39 return -ENOMEM;
40
41 /* convert handle size to multiple of sizeof(u32) */
42 handle_dwords = f_handle.handle_bytes >> 2;
43
44 /* we ask for a non connected handle */
45 retval = exportfs_encode_fh(path->dentry,
46 (struct fid *)handle->f_handle,
47 &handle_dwords, 0);
48 handle->handle_type = retval;
49 /* convert handle size to bytes */
50 handle_bytes = handle_dwords * sizeof(u32);
51 handle->handle_bytes = handle_bytes;
52 if ((handle->handle_bytes > f_handle.handle_bytes) ||
53 (retval == 255) || (retval == -ENOSPC)) {
54 /* As per old exportfs_encode_fh documentation
55 * we could return ENOSPC to indicate overflow
56 * But file system returned 255 always. So handle
57 * both the values
58 */
59 /*
60 * set the handle size to zero so we copy only
61 * non variable part of the file_handle
62 */
63 handle_bytes = 0;
64 retval = -EOVERFLOW;
65 } else
66 retval = 0;
67 /* copy the mount id */
68 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
69 copy_to_user(ufh, handle,
70 sizeof(struct file_handle) + handle_bytes))
71 retval = -EFAULT;
72 kfree(handle);
73 return retval;
74}
75
76/**
77 * sys_name_to_handle_at: convert name to handle
78 * @dfd: directory relative to which name is interpreted if not absolute
79 * @name: name that should be converted to handle.
80 * @handle: resulting file handle
81 * @mnt_id: mount id of the file system containing the file
82 * @flag: flag value to indicate whether to follow symlink or not
83 *
84 * @handle->handle_size indicate the space available to store the
85 * variable part of the file handle in bytes. If there is not
86 * enough space, the field is updated to return the minimum
87 * value required.
88 */
89SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
90 struct file_handle __user *, handle, int __user *, mnt_id,
91 int, flag)
92{
93 struct path path;
94 int lookup_flags;
95 int err;
96
97 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
98 return -EINVAL;
99
100 lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
101 if (flag & AT_EMPTY_PATH)
102 lookup_flags |= LOOKUP_EMPTY;
103 err = user_path_at(dfd, name, lookup_flags, &path);
104 if (!err) {
105 err = do_sys_name_to_handle(&path, handle, mnt_id);
106 path_put(&path);
107 }
108 return err;
109}
110
111static struct vfsmount *get_vfsmount_from_fd(int fd)
112{
113 struct path path;
114
115 if (fd == AT_FDCWD) {
116 struct fs_struct *fs = current->fs;
117 spin_lock(&fs->lock);
118 path = fs->pwd;
119 mntget(path.mnt);
120 spin_unlock(&fs->lock);
121 } else {
122 int fput_needed;
123 struct file *file = fget_light(fd, &fput_needed);
124 if (!file)
125 return ERR_PTR(-EBADF);
126 path = file->f_path;
127 mntget(path.mnt);
128 fput_light(file, fput_needed);
129 }
130 return path.mnt;
131}
132
133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
134{
135 return 1;
136}
137
138static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
139 struct path *path)
140{
141 int retval = 0;
142 int handle_dwords;
143
144 path->mnt = get_vfsmount_from_fd(mountdirfd);
145 if (IS_ERR(path->mnt)) {
146 retval = PTR_ERR(path->mnt);
147 goto out_err;
148 }
149 /* change the handle size to multiple of sizeof(u32) */
150 handle_dwords = handle->handle_bytes >> 2;
151 path->dentry = exportfs_decode_fh(path->mnt,
152 (struct fid *)handle->f_handle,
153 handle_dwords, handle->handle_type,
154 vfs_dentry_acceptable, NULL);
155 if (IS_ERR(path->dentry)) {
156 retval = PTR_ERR(path->dentry);
157 goto out_mnt;
158 }
159 return 0;
160out_mnt:
161 mntput(path->mnt);
162out_err:
163 return retval;
164}
165
166static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
167 struct path *path)
168{
169 int retval = 0;
170 struct file_handle f_handle;
171 struct file_handle *handle = NULL;
172
173 /*
174 * With handle we don't look at the execute bit on the
175 * the directory. Ideally we would like CAP_DAC_SEARCH.
176 * But we don't have that
177 */
178 if (!capable(CAP_DAC_READ_SEARCH)) {
179 retval = -EPERM;
180 goto out_err;
181 }
182 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
183 retval = -EFAULT;
184 goto out_err;
185 }
186 if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
187 (f_handle.handle_bytes == 0)) {
188 retval = -EINVAL;
189 goto out_err;
190 }
191 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
192 GFP_KERNEL);
193 if (!handle) {
194 retval = -ENOMEM;
195 goto out_err;
196 }
197 /* copy the full handle */
198 if (copy_from_user(handle, ufh,
199 sizeof(struct file_handle) +
200 f_handle.handle_bytes)) {
201 retval = -EFAULT;
202 goto out_handle;
203 }
204
205 retval = do_handle_to_path(mountdirfd, handle, path);
206
207out_handle:
208 kfree(handle);
209out_err:
210 return retval;
211}
212
213long do_handle_open(int mountdirfd,
214 struct file_handle __user *ufh, int open_flag)
215{
216 long retval = 0;
217 struct path path;
218 struct file *file;
219 int fd;
220
221 retval = handle_to_path(mountdirfd, ufh, &path);
222 if (retval)
223 return retval;
224
225 fd = get_unused_fd_flags(open_flag);
226 if (fd < 0) {
227 path_put(&path);
228 return fd;
229 }
230 file = file_open_root(path.dentry, path.mnt, "", open_flag);
231 if (IS_ERR(file)) {
232 put_unused_fd(fd);
233 retval = PTR_ERR(file);
234 } else {
235 retval = fd;
236 fsnotify_open(file);
237 fd_install(fd, file);
238 }
239 path_put(&path);
240 return retval;
241}
242
243/**
244 * sys_open_by_handle_at: Open the file handle
245 * @mountdirfd: directory file descriptor
246 * @handle: file handle to be opened
247 * @flag: open flags.
248 *
249 * @mountdirfd indicate the directory file descriptor
250 * of the mount point. file handle is decoded relative
251 * to the vfsmount pointed by the @mountdirfd. @flags
252 * value is same as the open(2) flags.
253 */
254SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
255 struct file_handle __user *, handle,
256 int, flags)
257{
258 long ret;
259
260 if (force_o_largefile())
261 flags |= O_LARGEFILE;
262
263 ret = do_handle_open(mountdirfd, handle, flags);
264 return ret;
265}
diff --git a/fs/file_table.c b/fs/file_table.c
index cbeec70ee31..bfab973c6c5 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -279,11 +279,10 @@ struct file *fget(unsigned int fd)
279 rcu_read_lock(); 279 rcu_read_lock();
280 file = fcheck_files(files, fd); 280 file = fcheck_files(files, fd);
281 if (file) { 281 if (file) {
282 if (!atomic_long_inc_not_zero(&file->f_count)) { 282 /* File object ref couldn't be taken */
283 /* File object ref couldn't be taken */ 283 if (file->f_mode & FMODE_PATH ||
284 rcu_read_unlock(); 284 !atomic_long_inc_not_zero(&file->f_count))
285 return NULL; 285 file = NULL;
286 }
287 } 286 }
288 rcu_read_unlock(); 287 rcu_read_unlock();
289 288
@@ -292,6 +291,25 @@ struct file *fget(unsigned int fd)
292 291
293EXPORT_SYMBOL(fget); 292EXPORT_SYMBOL(fget);
294 293
294struct file *fget_raw(unsigned int fd)
295{
296 struct file *file;
297 struct files_struct *files = current->files;
298
299 rcu_read_lock();
300 file = fcheck_files(files, fd);
301 if (file) {
302 /* File object ref couldn't be taken */
303 if (!atomic_long_inc_not_zero(&file->f_count))
304 file = NULL;
305 }
306 rcu_read_unlock();
307
308 return file;
309}
310
311EXPORT_SYMBOL(fget_raw);
312
295/* 313/*
296 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 314 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
297 * 315 *
@@ -316,6 +334,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
316 *fput_needed = 0; 334 *fput_needed = 0;
317 if (atomic_read(&files->count) == 1) { 335 if (atomic_read(&files->count) == 1) {
318 file = fcheck_files(files, fd); 336 file = fcheck_files(files, fd);
337 if (file && (file->f_mode & FMODE_PATH))
338 file = NULL;
339 } else {
340 rcu_read_lock();
341 file = fcheck_files(files, fd);
342 if (file) {
343 if (!(file->f_mode & FMODE_PATH) &&
344 atomic_long_inc_not_zero(&file->f_count))
345 *fput_needed = 1;
346 else
347 /* Didn't get the reference, someone's freed */
348 file = NULL;
349 }
350 rcu_read_unlock();
351 }
352
353 return file;
354}
355
356struct file *fget_raw_light(unsigned int fd, int *fput_needed)
357{
358 struct file *file;
359 struct files_struct *files = current->files;
360
361 *fput_needed = 0;
362 if (atomic_read(&files->count) == 1) {
363 file = fcheck_files(files, fd);
319 } else { 364 } else {
320 rcu_read_lock(); 365 rcu_read_lock();
321 file = fcheck_files(files, fd); 366 file = fcheck_files(files, fd);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd..051b1a08452 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
637 u64 nodeid; 637 u64 nodeid;
638 u32 generation; 638 u32 generation;
639 639
640 if (*max_len < len) 640 if (*max_len < len) {
641 *max_len = len;
641 return 255; 642 return 255;
643 }
642 644
643 nodeid = get_fuse_inode(inode)->nodeid; 645 nodeid = get_fuse_inode(inode)->nodeid;
644 generation = inode->i_generation; 646 generation = inode->i_generation;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7118f1a780a..cbc07155b1a 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
80 struct posix_acl *acl; 80 struct posix_acl *acl;
81 int error; 81 int error;
82 82
83 if (flags & IPERM_FLAG_RCU) 83 if (flags & IPERM_FLAG_RCU) {
84 return -ECHILD; 84 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
85 return -ECHILD;
86 return -EAGAIN;
87 }
85 88
86 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); 89 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
87 if (IS_ERR(acl)) 90 if (IS_ERR(acl))
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4f36f8832b9..aad77e4f61b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -695,6 +695,7 @@ out:
695 if (error == 0) 695 if (error == 0)
696 return 0; 696 return 0;
697 697
698 unlock_page(page);
698 page_cache_release(page); 699 page_cache_release(page);
699 700
700 gfs2_trans_end(sdp); 701 gfs2_trans_end(sdp);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c4039d5eef..ef3dc4b9fae 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -21,6 +21,7 @@
21#include "meta_io.h" 21#include "meta_io.h"
22#include "quota.h" 22#include "quota.h"
23#include "rgrp.h" 23#include "rgrp.h"
24#include "super.h"
24#include "trans.h" 25#include "trans.h"
25#include "dir.h" 26#include "dir.h"
26#include "util.h" 27#include "util.h"
@@ -757,7 +758,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
757 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 758 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
758 struct gfs2_rgrp_list rlist; 759 struct gfs2_rgrp_list rlist;
759 u64 bn, bstart; 760 u64 bn, bstart;
760 u32 blen; 761 u32 blen, btotal;
761 __be64 *p; 762 __be64 *p;
762 unsigned int rg_blocks = 0; 763 unsigned int rg_blocks = 0;
763 int metadata; 764 int metadata;
@@ -839,6 +840,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
839 840
840 bstart = 0; 841 bstart = 0;
841 blen = 0; 842 blen = 0;
843 btotal = 0;
842 844
843 for (p = top; p < bottom; p++) { 845 for (p = top; p < bottom; p++) {
844 if (!*p) 846 if (!*p)
@@ -851,9 +853,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
851 else { 853 else {
852 if (bstart) { 854 if (bstart) {
853 if (metadata) 855 if (metadata)
854 gfs2_free_meta(ip, bstart, blen); 856 __gfs2_free_meta(ip, bstart, blen);
855 else 857 else
856 gfs2_free_data(ip, bstart, blen); 858 __gfs2_free_data(ip, bstart, blen);
859
860 btotal += blen;
857 } 861 }
858 862
859 bstart = bn; 863 bstart = bn;
@@ -865,11 +869,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
865 } 869 }
866 if (bstart) { 870 if (bstart) {
867 if (metadata) 871 if (metadata)
868 gfs2_free_meta(ip, bstart, blen); 872 __gfs2_free_meta(ip, bstart, blen);
869 else 873 else
870 gfs2_free_data(ip, bstart, blen); 874 __gfs2_free_data(ip, bstart, blen);
875
876 btotal += blen;
871 } 877 }
872 878
879 gfs2_statfs_change(sdp, 0, +btotal, 0);
880 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
881 ip->i_inode.i_gid);
882
873 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 883 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
874 884
875 gfs2_dinode_out(ip, dibh->b_data); 885 gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f..b5a5e60df0d 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
36 struct super_block *sb = inode->i_sb; 36 struct super_block *sb = inode->i_sb;
37 struct gfs2_inode *ip = GFS2_I(inode); 37 struct gfs2_inode *ip = GFS2_I(inode);
38 38
39 if (*len < GFS2_SMALL_FH_SIZE || 39 if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
40 (connectable && *len < GFS2_LARGE_FH_SIZE)) 40 *len = GFS2_LARGE_FH_SIZE;
41 return 255; 41 return 255;
42 } else if (*len < GFS2_SMALL_FH_SIZE) {
43 *len = GFS2_SMALL_FH_SIZE;
44 return 255;
45 }
42 46
43 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 47 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
44 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); 48 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7cfdcb91336..4074b952b05 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -448,15 +448,20 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
448{ 448{
449 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 449 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
450 450
451 if (!(file->f_flags & O_NOATIME)) { 451 if (!(file->f_flags & O_NOATIME) &&
452 !IS_NOATIME(&ip->i_inode)) {
452 struct gfs2_holder i_gh; 453 struct gfs2_holder i_gh;
453 int error; 454 int error;
454 455
455 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 456 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
456 error = gfs2_glock_nq(&i_gh); 457 error = gfs2_glock_nq(&i_gh);
457 file_accessed(file); 458 if (error == 0) {
458 if (error == 0) 459 file_accessed(file);
459 gfs2_glock_dq_uninit(&i_gh); 460 gfs2_glock_dq(&i_gh);
461 }
462 gfs2_holder_uninit(&i_gh);
463 if (error)
464 return error;
460 } 465 }
461 vma->vm_ops = &gfs2_vm_ops; 466 vma->vm_ops = &gfs2_vm_ops;
462 vma->vm_flags |= VM_CAN_NONLINEAR; 467 vma->vm_flags |= VM_CAN_NONLINEAR;
@@ -617,8 +622,7 @@ static void empty_write_end(struct page *page, unsigned from,
617{ 622{
618 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 623 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
619 624
620 page_zero_new_buffers(page, from, to); 625 zero_user(page, from, to-from);
621 flush_dcache_page(page);
622 mark_page_accessed(page); 626 mark_page_accessed(page);
623 627
624 if (!gfs2_is_writeback(ip)) 628 if (!gfs2_is_writeback(ip))
@@ -627,36 +631,43 @@ static void empty_write_end(struct page *page, unsigned from,
627 block_commit_write(page, from, to); 631 block_commit_write(page, from, to);
628} 632}
629 633
630static int write_empty_blocks(struct page *page, unsigned from, unsigned to) 634static int needs_empty_write(sector_t block, struct inode *inode)
631{ 635{
632 unsigned start, end, next;
633 struct buffer_head *bh, *head;
634 int error; 636 int error;
637 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
635 638
636 if (!page_has_buffers(page)) { 639 bh_map.b_size = 1 << inode->i_blkbits;
637 error = __block_write_begin(page, from, to - from, gfs2_block_map); 640 error = gfs2_block_map(inode, block, &bh_map, 0);
638 if (unlikely(error)) 641 if (unlikely(error))
639 return error; 642 return error;
643 return !buffer_mapped(&bh_map);
644}
640 645
641 empty_write_end(page, from, to); 646static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
642 return 0; 647{
643 } 648 struct inode *inode = page->mapping->host;
649 unsigned start, end, next, blksize;
650 sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
651 int ret;
644 652
645 bh = head = page_buffers(page); 653 blksize = 1 << inode->i_blkbits;
646 next = end = 0; 654 next = end = 0;
647 while (next < from) { 655 while (next < from) {
648 next += bh->b_size; 656 next += blksize;
649 bh = bh->b_this_page; 657 block++;
650 } 658 }
651 start = next; 659 start = next;
652 do { 660 do {
653 next += bh->b_size; 661 next += blksize;
654 if (buffer_mapped(bh)) { 662 ret = needs_empty_write(block, inode);
663 if (unlikely(ret < 0))
664 return ret;
665 if (ret == 0) {
655 if (end) { 666 if (end) {
656 error = __block_write_begin(page, start, end - start, 667 ret = __block_write_begin(page, start, end - start,
657 gfs2_block_map); 668 gfs2_block_map);
658 if (unlikely(error)) 669 if (unlikely(ret))
659 return error; 670 return ret;
660 empty_write_end(page, start, end); 671 empty_write_end(page, start, end);
661 end = 0; 672 end = 0;
662 } 673 }
@@ -664,13 +675,13 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
664 } 675 }
665 else 676 else
666 end = next; 677 end = next;
667 bh = bh->b_this_page; 678 block++;
668 } while (next < to); 679 } while (next < to);
669 680
670 if (end) { 681 if (end) {
671 error = __block_write_begin(page, start, end - start, gfs2_block_map); 682 ret = __block_write_begin(page, start, end - start, gfs2_block_map);
672 if (unlikely(error)) 683 if (unlikely(ret))
673 return error; 684 return ret;
674 empty_write_end(page, start, end); 685 empty_write_end(page, start, end);
675 } 686 }
676 687
@@ -976,8 +987,10 @@ static void do_unflock(struct file *file, struct file_lock *fl)
976 987
977 mutex_lock(&fp->f_fl_mutex); 988 mutex_lock(&fp->f_fl_mutex);
978 flock_lock_file_wait(file, fl); 989 flock_lock_file_wait(file, fl);
979 if (fl_gh->gh_gl) 990 if (fl_gh->gh_gl) {
980 gfs2_glock_dq_uninit(fl_gh); 991 gfs2_glock_dq_wait(fl_gh);
992 gfs2_holder_uninit(fl_gh);
993 }
981 mutex_unlock(&fp->f_fl_mutex); 994 mutex_unlock(&fp->f_fl_mutex);
982} 995}
983 996
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7cd9a5a68d5..e2431313491 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -26,6 +26,9 @@
26#include <linux/freezer.h> 26#include <linux/freezer.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/jiffies.h> 28#include <linux/jiffies.h>
29#include <linux/rcupdate.h>
30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h>
29 32
30#include "gfs2.h" 33#include "gfs2.h"
31#include "incore.h" 34#include "incore.h"
@@ -41,10 +44,6 @@
41#define CREATE_TRACE_POINTS 44#define CREATE_TRACE_POINTS
42#include "trace_gfs2.h" 45#include "trace_gfs2.h"
43 46
44struct gfs2_gl_hash_bucket {
45 struct hlist_head hb_list;
46};
47
48struct gfs2_glock_iter { 47struct gfs2_glock_iter {
49 int hash; /* hash bucket index */ 48 int hash; /* hash bucket index */
50 struct gfs2_sbd *sdp; /* incore superblock */ 49 struct gfs2_sbd *sdp; /* incore superblock */
@@ -54,7 +53,6 @@ struct gfs2_glock_iter {
54 53
55typedef void (*glock_examiner) (struct gfs2_glock * gl); 54typedef void (*glock_examiner) (struct gfs2_glock * gl);
56 55
57static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
58static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); 56static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
59#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) 57#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
60static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 58static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
@@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock);
70#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 68#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
71#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) 69#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
72 70
73static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; 71static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
74static struct dentry *gfs2_root; 72static struct dentry *gfs2_root;
75 73
76/*
77 * Despite what you might think, the numbers below are not arbitrary :-)
78 * They are taken from the ipv4 routing hash code, which is well tested
79 * and thus should be nearly optimal. Later on we might tweek the numbers
80 * but for now this should be fine.
81 *
82 * The reason for putting the locks in a separate array from the list heads
83 * is that we can have fewer locks than list heads and save memory. We use
84 * the same hash function for both, but with a different hash mask.
85 */
86#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
87 defined(CONFIG_PROVE_LOCKING)
88
89#ifdef CONFIG_LOCKDEP
90# define GL_HASH_LOCK_SZ 256
91#else
92# if NR_CPUS >= 32
93# define GL_HASH_LOCK_SZ 4096
94# elif NR_CPUS >= 16
95# define GL_HASH_LOCK_SZ 2048
96# elif NR_CPUS >= 8
97# define GL_HASH_LOCK_SZ 1024
98# elif NR_CPUS >= 4
99# define GL_HASH_LOCK_SZ 512
100# else
101# define GL_HASH_LOCK_SZ 256
102# endif
103#endif
104
105/* We never want more locks than chains */
106#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
107# undef GL_HASH_LOCK_SZ
108# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
109#endif
110
111static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
112
113static inline rwlock_t *gl_lock_addr(unsigned int x)
114{
115 return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
116}
117#else /* not SMP, so no spinlocks required */
118static inline rwlock_t *gl_lock_addr(unsigned int x)
119{
120 return NULL;
121}
122#endif
123
124/** 74/**
125 * gl_hash() - Turn glock number into hash bucket number 75 * gl_hash() - Turn glock number into hash bucket number
126 * @lock: The glock number 76 * @lock: The glock number
@@ -141,25 +91,35 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
141 return h; 91 return h;
142} 92}
143 93
144/** 94static inline void spin_lock_bucket(unsigned int hash)
145 * glock_free() - Perform a few checks and then release struct gfs2_glock 95{
146 * @gl: The glock to release 96 struct hlist_bl_head *bl = &gl_hash_table[hash];
147 * 97 bit_spin_lock(0, (unsigned long *)bl);
148 * Also calls lock module to release its internal structure for this glock. 98}
149 *
150 */
151 99
152static void glock_free(struct gfs2_glock *gl) 100static inline void spin_unlock_bucket(unsigned int hash)
101{
102 struct hlist_bl_head *bl = &gl_hash_table[hash];
103 __bit_spin_unlock(0, (unsigned long *)bl);
104}
105
106static void gfs2_glock_dealloc(struct rcu_head *rcu)
107{
108 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
109
110 if (gl->gl_ops->go_flags & GLOF_ASPACE)
111 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
112 else
113 kmem_cache_free(gfs2_glock_cachep, gl);
114}
115
116void gfs2_glock_free(struct gfs2_glock *gl)
153{ 117{
154 struct gfs2_sbd *sdp = gl->gl_sbd; 118 struct gfs2_sbd *sdp = gl->gl_sbd;
155 struct address_space *mapping = gfs2_glock2aspace(gl);
156 struct kmem_cache *cachep = gfs2_glock_cachep;
157 119
158 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 120 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
159 trace_gfs2_glock_put(gl); 121 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
160 if (mapping) 122 wake_up(&sdp->sd_glock_wait);
161 cachep = gfs2_glock_aspace_cachep;
162 sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl);
163} 123}
164 124
165/** 125/**
@@ -185,34 +145,49 @@ static int demote_ok(const struct gfs2_glock *gl)
185{ 145{
186 const struct gfs2_glock_operations *glops = gl->gl_ops; 146 const struct gfs2_glock_operations *glops = gl->gl_ops;
187 147
148 /* assert_spin_locked(&gl->gl_spin); */
149
188 if (gl->gl_state == LM_ST_UNLOCKED) 150 if (gl->gl_state == LM_ST_UNLOCKED)
189 return 0; 151 return 0;
190 if (!list_empty(&gl->gl_holders)) 152 if (test_bit(GLF_LFLUSH, &gl->gl_flags))
153 return 0;
154 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
155 !list_empty(&gl->gl_holders))
191 return 0; 156 return 0;
192 if (glops->go_demote_ok) 157 if (glops->go_demote_ok)
193 return glops->go_demote_ok(gl); 158 return glops->go_demote_ok(gl);
194 return 1; 159 return 1;
195} 160}
196 161
162
197/** 163/**
198 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 164 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
199 * @gl: the glock 165 * @gl: the glock
200 * 166 *
167 * If the glock is demotable, then we add it (or move it) to the end
168 * of the glock LRU list.
201 */ 169 */
202 170
203static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 171static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
204{ 172{
205 int may_reclaim; 173 if (demote_ok(gl)) {
206 may_reclaim = (demote_ok(gl) && 174 spin_lock(&lru_lock);
207 (atomic_read(&gl->gl_ref) == 1 || 175
208 (gl->gl_name.ln_type == LM_TYPE_INODE && 176 if (!list_empty(&gl->gl_lru))
209 atomic_read(&gl->gl_ref) <= 2))); 177 list_del_init(&gl->gl_lru);
210 spin_lock(&lru_lock); 178 else
211 if (list_empty(&gl->gl_lru) && may_reclaim) { 179 atomic_inc(&lru_count);
180
212 list_add_tail(&gl->gl_lru, &lru_list); 181 list_add_tail(&gl->gl_lru, &lru_list);
213 atomic_inc(&lru_count); 182 spin_unlock(&lru_lock);
214 } 183 }
215 spin_unlock(&lru_lock); 184}
185
186void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
187{
188 spin_lock(&gl->gl_spin);
189 __gfs2_glock_schedule_for_reclaim(gl);
190 spin_unlock(&gl->gl_spin);
216} 191}
217 192
218/** 193/**
@@ -227,7 +202,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
227{ 202{
228 if (atomic_dec_and_test(&gl->gl_ref)) 203 if (atomic_dec_and_test(&gl->gl_ref))
229 GLOCK_BUG_ON(gl, 1); 204 GLOCK_BUG_ON(gl, 1);
230 gfs2_glock_schedule_for_reclaim(gl);
231} 205}
232 206
233/** 207/**
@@ -236,30 +210,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
236 * 210 *
237 */ 211 */
238 212
239int gfs2_glock_put(struct gfs2_glock *gl) 213void gfs2_glock_put(struct gfs2_glock *gl)
240{ 214{
241 int rv = 0; 215 struct gfs2_sbd *sdp = gl->gl_sbd;
216 struct address_space *mapping = gfs2_glock2aspace(gl);
242 217
243 write_lock(gl_lock_addr(gl->gl_hash)); 218 if (atomic_dec_and_test(&gl->gl_ref)) {
244 if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { 219 spin_lock_bucket(gl->gl_hash);
245 hlist_del(&gl->gl_list); 220 hlist_bl_del_rcu(&gl->gl_list);
221 spin_unlock_bucket(gl->gl_hash);
222 spin_lock(&lru_lock);
246 if (!list_empty(&gl->gl_lru)) { 223 if (!list_empty(&gl->gl_lru)) {
247 list_del_init(&gl->gl_lru); 224 list_del_init(&gl->gl_lru);
248 atomic_dec(&lru_count); 225 atomic_dec(&lru_count);
249 } 226 }
250 spin_unlock(&lru_lock); 227 spin_unlock(&lru_lock);
251 write_unlock(gl_lock_addr(gl->gl_hash));
252 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 228 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
253 glock_free(gl); 229 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
254 rv = 1; 230 trace_gfs2_glock_put(gl);
255 goto out; 231 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
256 } 232 }
257 spin_lock(&gl->gl_spin);
258 gfs2_glock_schedule_for_reclaim(gl);
259 spin_unlock(&gl->gl_spin);
260 write_unlock(gl_lock_addr(gl->gl_hash));
261out:
262 return rv;
263} 233}
264 234
265/** 235/**
@@ -275,17 +245,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
275 const struct lm_lockname *name) 245 const struct lm_lockname *name)
276{ 246{
277 struct gfs2_glock *gl; 247 struct gfs2_glock *gl;
278 struct hlist_node *h; 248 struct hlist_bl_node *h;
279 249
280 hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { 250 hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
281 if (!lm_name_equal(&gl->gl_name, name)) 251 if (!lm_name_equal(&gl->gl_name, name))
282 continue; 252 continue;
283 if (gl->gl_sbd != sdp) 253 if (gl->gl_sbd != sdp)
284 continue; 254 continue;
285 255 if (atomic_inc_not_zero(&gl->gl_ref))
286 atomic_inc(&gl->gl_ref); 256 return gl;
287
288 return gl;
289 } 257 }
290 258
291 return NULL; 259 return NULL;
@@ -743,10 +711,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
743 struct gfs2_glock *gl, *tmp; 711 struct gfs2_glock *gl, *tmp;
744 unsigned int hash = gl_hash(sdp, &name); 712 unsigned int hash = gl_hash(sdp, &name);
745 struct address_space *mapping; 713 struct address_space *mapping;
714 struct kmem_cache *cachep;
746 715
747 read_lock(gl_lock_addr(hash)); 716 rcu_read_lock();
748 gl = search_bucket(hash, sdp, &name); 717 gl = search_bucket(hash, sdp, &name);
749 read_unlock(gl_lock_addr(hash)); 718 rcu_read_unlock();
750 719
751 *glp = gl; 720 *glp = gl;
752 if (gl) 721 if (gl)
@@ -755,9 +724,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
755 return -ENOENT; 724 return -ENOENT;
756 725
757 if (glops->go_flags & GLOF_ASPACE) 726 if (glops->go_flags & GLOF_ASPACE)
758 gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL); 727 cachep = gfs2_glock_aspace_cachep;
759 else 728 else
760 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); 729 cachep = gfs2_glock_cachep;
730 gl = kmem_cache_alloc(cachep, GFP_KERNEL);
761 if (!gl) 731 if (!gl)
762 return -ENOMEM; 732 return -ENOMEM;
763 733
@@ -790,15 +760,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
790 mapping->writeback_index = 0; 760 mapping->writeback_index = 0;
791 } 761 }
792 762
793 write_lock(gl_lock_addr(hash)); 763 spin_lock_bucket(hash);
794 tmp = search_bucket(hash, sdp, &name); 764 tmp = search_bucket(hash, sdp, &name);
795 if (tmp) { 765 if (tmp) {
796 write_unlock(gl_lock_addr(hash)); 766 spin_unlock_bucket(hash);
797 glock_free(gl); 767 kmem_cache_free(cachep, gl);
768 atomic_dec(&sdp->sd_glock_disposal);
798 gl = tmp; 769 gl = tmp;
799 } else { 770 } else {
800 hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); 771 hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
801 write_unlock(gl_lock_addr(hash)); 772 spin_unlock_bucket(hash);
802 } 773 }
803 774
804 *glp = gl; 775 *glp = gl;
@@ -1007,13 +978,13 @@ fail:
1007 insert_pt = &gh2->gh_list; 978 insert_pt = &gh2->gh_list;
1008 } 979 }
1009 set_bit(GLF_QUEUED, &gl->gl_flags); 980 set_bit(GLF_QUEUED, &gl->gl_flags);
981 trace_gfs2_glock_queue(gh, 1);
1010 if (likely(insert_pt == NULL)) { 982 if (likely(insert_pt == NULL)) {
1011 list_add_tail(&gh->gh_list, &gl->gl_holders); 983 list_add_tail(&gh->gh_list, &gl->gl_holders);
1012 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 984 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
1013 goto do_cancel; 985 goto do_cancel;
1014 return; 986 return;
1015 } 987 }
1016 trace_gfs2_glock_queue(gh, 1);
1017 list_add_tail(&gh->gh_list, insert_pt); 988 list_add_tail(&gh->gh_list, insert_pt);
1018do_cancel: 989do_cancel:
1019 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); 990 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1113,6 +1084,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1113 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1084 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1114 fast_path = 1; 1085 fast_path = 1;
1115 } 1086 }
1087 __gfs2_glock_schedule_for_reclaim(gl);
1116 trace_gfs2_glock_queue(gh, 0); 1088 trace_gfs2_glock_queue(gh, 0);
1117 spin_unlock(&gl->gl_spin); 1089 spin_unlock(&gl->gl_spin);
1118 if (likely(fast_path)) 1090 if (likely(fast_path))
@@ -1276,10 +1248,8 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1276 1248
1277void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1249void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1278{ 1250{
1279 unsigned int x; 1251 while (num_gh--)
1280 1252 gfs2_glock_dq(&ghs[num_gh]);
1281 for (x = 0; x < num_gh; x++)
1282 gfs2_glock_dq(&ghs[x]);
1283} 1253}
1284 1254
1285/** 1255/**
@@ -1291,10 +1261,8 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1291 1261
1292void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) 1262void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1293{ 1263{
1294 unsigned int x; 1264 while (num_gh--)
1295 1265 gfs2_glock_dq_uninit(&ghs[num_gh]);
1296 for (x = 0; x < num_gh; x++)
1297 gfs2_glock_dq_uninit(&ghs[x]);
1298} 1266}
1299 1267
1300void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) 1268void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
@@ -1440,42 +1408,30 @@ static struct shrinker glock_shrinker = {
1440 * @sdp: the filesystem 1408 * @sdp: the filesystem
1441 * @bucket: the bucket 1409 * @bucket: the bucket
1442 * 1410 *
1443 * Returns: 1 if the bucket has entries
1444 */ 1411 */
1445 1412
1446static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, 1413static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
1447 unsigned int hash) 1414 unsigned int hash)
1448{ 1415{
1449 struct gfs2_glock *gl, *prev = NULL; 1416 struct gfs2_glock *gl;
1450 int has_entries = 0; 1417 struct hlist_bl_head *head = &gl_hash_table[hash];
1451 struct hlist_head *head = &gl_hash_table[hash].hb_list; 1418 struct hlist_bl_node *pos;
1452 1419
1453 read_lock(gl_lock_addr(hash)); 1420 rcu_read_lock();
1454 /* Can't use hlist_for_each_entry - don't want prefetch here */ 1421 hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
1455 if (hlist_empty(head)) 1422 if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
1456 goto out;
1457 gl = list_entry(head->first, struct gfs2_glock, gl_list);
1458 while(1) {
1459 if (!sdp || gl->gl_sbd == sdp) {
1460 gfs2_glock_hold(gl);
1461 read_unlock(gl_lock_addr(hash));
1462 if (prev)
1463 gfs2_glock_put(prev);
1464 prev = gl;
1465 examiner(gl); 1423 examiner(gl);
1466 has_entries = 1;
1467 read_lock(gl_lock_addr(hash));
1468 }
1469 if (gl->gl_list.next == NULL)
1470 break;
1471 gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
1472 } 1424 }
1473out: 1425 rcu_read_unlock();
1474 read_unlock(gl_lock_addr(hash));
1475 if (prev)
1476 gfs2_glock_put(prev);
1477 cond_resched(); 1426 cond_resched();
1478 return has_entries; 1427}
1428
1429static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1430{
1431 unsigned x;
1432
1433 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1434 examine_bucket(examiner, sdp, x);
1479} 1435}
1480 1436
1481 1437
@@ -1529,10 +1485,21 @@ static void clear_glock(struct gfs2_glock *gl)
1529 1485
1530void gfs2_glock_thaw(struct gfs2_sbd *sdp) 1486void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1531{ 1487{
1532 unsigned x; 1488 glock_hash_walk(thaw_glock, sdp);
1489}
1533 1490
1534 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) 1491static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1535 examine_bucket(thaw_glock, sdp, x); 1492{
1493 int ret;
1494 spin_lock(&gl->gl_spin);
1495 ret = __dump_glock(seq, gl);
1496 spin_unlock(&gl->gl_spin);
1497 return ret;
1498}
1499
1500static void dump_glock_func(struct gfs2_glock *gl)
1501{
1502 dump_glock(NULL, gl);
1536} 1503}
1537 1504
1538/** 1505/**
@@ -1545,13 +1512,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1545 1512
1546void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1513void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1547{ 1514{
1548 unsigned int x; 1515 glock_hash_walk(clear_glock, sdp);
1549
1550 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1551 examine_bucket(clear_glock, sdp, x);
1552 flush_workqueue(glock_workqueue); 1516 flush_workqueue(glock_workqueue);
1553 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); 1517 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
1554 gfs2_dump_lockstate(sdp); 1518 glock_hash_walk(dump_glock_func, sdp);
1555} 1519}
1556 1520
1557void gfs2_glock_finish_truncate(struct gfs2_inode *ip) 1521void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
@@ -1717,66 +1681,15 @@ out:
1717 return error; 1681 return error;
1718} 1682}
1719 1683
1720static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1721{
1722 int ret;
1723 spin_lock(&gl->gl_spin);
1724 ret = __dump_glock(seq, gl);
1725 spin_unlock(&gl->gl_spin);
1726 return ret;
1727}
1728 1684
1729/**
1730 * gfs2_dump_lockstate - print out the current lockstate
1731 * @sdp: the filesystem
1732 * @ub: the buffer to copy the information into
1733 *
1734 * If @ub is NULL, dump the lockstate to the console.
1735 *
1736 */
1737
1738static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
1739{
1740 struct gfs2_glock *gl;
1741 struct hlist_node *h;
1742 unsigned int x;
1743 int error = 0;
1744
1745 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
1746
1747 read_lock(gl_lock_addr(x));
1748
1749 hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
1750 if (gl->gl_sbd != sdp)
1751 continue;
1752
1753 error = dump_glock(NULL, gl);
1754 if (error)
1755 break;
1756 }
1757
1758 read_unlock(gl_lock_addr(x));
1759
1760 if (error)
1761 break;
1762 }
1763
1764
1765 return error;
1766}
1767 1685
1768 1686
1769int __init gfs2_glock_init(void) 1687int __init gfs2_glock_init(void)
1770{ 1688{
1771 unsigned i; 1689 unsigned i;
1772 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { 1690 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
1773 INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); 1691 INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
1774 }
1775#ifdef GL_HASH_LOCK_SZ
1776 for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
1777 rwlock_init(&gl_hash_locks[i]);
1778 } 1692 }
1779#endif
1780 1693
1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1694 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1782 WQ_HIGHPRI | WQ_FREEZABLE, 0); 1695 WQ_HIGHPRI | WQ_FREEZABLE, 0);
@@ -1802,62 +1715,54 @@ void gfs2_glock_exit(void)
1802 destroy_workqueue(gfs2_delete_workqueue); 1715 destroy_workqueue(gfs2_delete_workqueue);
1803} 1716}
1804 1717
1718static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
1719{
1720 return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
1721 struct gfs2_glock, gl_list);
1722}
1723
1724static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
1725{
1726 return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
1727 struct gfs2_glock, gl_list);
1728}
1729
1805static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) 1730static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
1806{ 1731{
1807 struct gfs2_glock *gl; 1732 struct gfs2_glock *gl;
1808 1733
1809restart: 1734 do {
1810 read_lock(gl_lock_addr(gi->hash)); 1735 gl = gi->gl;
1811 gl = gi->gl; 1736 if (gl) {
1812 if (gl) { 1737 gi->gl = glock_hash_next(gl);
1813 gi->gl = hlist_entry(gl->gl_list.next, 1738 } else {
1814 struct gfs2_glock, gl_list); 1739 gi->gl = glock_hash_chain(gi->hash);
1815 } else { 1740 }
1816 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, 1741 while (gi->gl == NULL) {
1817 struct gfs2_glock, gl_list); 1742 gi->hash++;
1818 } 1743 if (gi->hash >= GFS2_GL_HASH_SIZE) {
1819 if (gi->gl) 1744 rcu_read_unlock();
1820 gfs2_glock_hold(gi->gl); 1745 return 1;
1821 read_unlock(gl_lock_addr(gi->hash)); 1746 }
1822 if (gl) 1747 gi->gl = glock_hash_chain(gi->hash);
1823 gfs2_glock_put(gl); 1748 }
1824 while (gi->gl == NULL) { 1749 /* Skip entries for other sb and dead entries */
1825 gi->hash++; 1750 } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
1826 if (gi->hash >= GFS2_GL_HASH_SIZE)
1827 return 1;
1828 read_lock(gl_lock_addr(gi->hash));
1829 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
1830 struct gfs2_glock, gl_list);
1831 if (gi->gl)
1832 gfs2_glock_hold(gi->gl);
1833 read_unlock(gl_lock_addr(gi->hash));
1834 }
1835
1836 if (gi->sdp != gi->gl->gl_sbd)
1837 goto restart;
1838 1751
1839 return 0; 1752 return 0;
1840} 1753}
1841 1754
1842static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
1843{
1844 if (gi->gl)
1845 gfs2_glock_put(gi->gl);
1846 gi->gl = NULL;
1847}
1848
1849static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 1755static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1850{ 1756{
1851 struct gfs2_glock_iter *gi = seq->private; 1757 struct gfs2_glock_iter *gi = seq->private;
1852 loff_t n = *pos; 1758 loff_t n = *pos;
1853 1759
1854 gi->hash = 0; 1760 gi->hash = 0;
1761 rcu_read_lock();
1855 1762
1856 do { 1763 do {
1857 if (gfs2_glock_iter_next(gi)) { 1764 if (gfs2_glock_iter_next(gi))
1858 gfs2_glock_iter_free(gi);
1859 return NULL; 1765 return NULL;
1860 }
1861 } while (n--); 1766 } while (n--);
1862 1767
1863 return gi->gl; 1768 return gi->gl;
@@ -1870,10 +1775,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1870 1775
1871 (*pos)++; 1776 (*pos)++;
1872 1777
1873 if (gfs2_glock_iter_next(gi)) { 1778 if (gfs2_glock_iter_next(gi))
1874 gfs2_glock_iter_free(gi);
1875 return NULL; 1779 return NULL;
1876 }
1877 1780
1878 return gi->gl; 1781 return gi->gl;
1879} 1782}
@@ -1881,7 +1784,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1881static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 1784static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1882{ 1785{
1883 struct gfs2_glock_iter *gi = seq->private; 1786 struct gfs2_glock_iter *gi = seq->private;
1884 gfs2_glock_iter_free(gi); 1787
1788 if (gi->gl)
1789 rcu_read_unlock();
1790 gi->gl = NULL;
1885} 1791}
1886 1792
1887static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 1793static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 691851ceb61..aea160690e9 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -118,7 +118,7 @@ struct lm_lockops {
118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); 118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
119 void (*lm_unmount) (struct gfs2_sbd *sdp); 119 void (*lm_unmount) (struct gfs2_sbd *sdp);
120 void (*lm_withdraw) (struct gfs2_sbd *sdp); 120 void (*lm_withdraw) (struct gfs2_sbd *sdp);
121 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); 121 void (*lm_put_lock) (struct gfs2_glock *gl);
122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, 122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
123 unsigned int flags); 123 unsigned int flags);
124 void (*lm_cancel) (struct gfs2_glock *gl); 124 void (*lm_cancel) (struct gfs2_glock *gl);
@@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
174 int create, struct gfs2_glock **glp); 174 int create, struct gfs2_glock **glp);
175void gfs2_glock_hold(struct gfs2_glock *gl); 175void gfs2_glock_hold(struct gfs2_glock *gl);
176void gfs2_glock_put_nolock(struct gfs2_glock *gl); 176void gfs2_glock_put_nolock(struct gfs2_glock *gl);
177int gfs2_glock_put(struct gfs2_glock *gl); 177void gfs2_glock_put(struct gfs2_glock *gl);
178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
179 struct gfs2_holder *gh); 179 struct gfs2_holder *gh);
180void gfs2_holder_reinit(unsigned int state, unsigned flags, 180void gfs2_holder_reinit(unsigned int state, unsigned flags,
@@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
223 return error; 223 return error;
224} 224}
225 225
226/* Lock Value Block functions */ 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228int gfs2_lvb_hold(struct gfs2_glock *gl); 228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229void gfs2_lvb_unhold(struct gfs2_glock *gl); 229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230 230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
233void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 233extern void gfs2_glock_free(struct gfs2_glock *gl);
234void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 234
235void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 235extern int __init gfs2_glock_init(void);
236void gfs2_glock_thaw(struct gfs2_sbd *sdp); 236extern void gfs2_glock_exit(void);
237 237
238int __init gfs2_glock_init(void); 238extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
239void gfs2_glock_exit(void); 239extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
240 240extern int gfs2_register_debugfs(void);
241int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); 241extern void gfs2_unregister_debugfs(void);
242void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
243int gfs2_register_debugfs(void);
244void gfs2_unregister_debugfs(void);
245 242
246extern const struct lm_lockops gfs2_dlm_ops; 243extern const struct lm_lockops gfs2_dlm_ops;
247 244
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 263561bf1a5..3754e3cbf02 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -56,20 +56,26 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
56 BUG_ON(current->journal_info); 56 BUG_ON(current->journal_info);
57 current->journal_info = &tr; 57 current->journal_info = &tr;
58 58
59 gfs2_log_lock(sdp); 59 spin_lock(&sdp->sd_ail_lock);
60 while (!list_empty(head)) { 60 while (!list_empty(head)) {
61 bd = list_entry(head->next, struct gfs2_bufdata, 61 bd = list_entry(head->next, struct gfs2_bufdata,
62 bd_ail_gl_list); 62 bd_ail_gl_list);
63 bh = bd->bd_bh; 63 bh = bd->bd_bh;
64 gfs2_remove_from_ail(bd); 64 gfs2_remove_from_ail(bd);
65 spin_unlock(&sdp->sd_ail_lock);
66
65 bd->bd_bh = NULL; 67 bd->bd_bh = NULL;
66 bh->b_private = NULL; 68 bh->b_private = NULL;
67 bd->bd_blkno = bh->b_blocknr; 69 bd->bd_blkno = bh->b_blocknr;
70 gfs2_log_lock(sdp);
68 gfs2_assert_withdraw(sdp, !buffer_busy(bh)); 71 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
69 gfs2_trans_add_revoke(sdp, bd); 72 gfs2_trans_add_revoke(sdp, bd);
73 gfs2_log_unlock(sdp);
74
75 spin_lock(&sdp->sd_ail_lock);
70 } 76 }
71 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 77 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
72 gfs2_log_unlock(sdp); 78 spin_unlock(&sdp->sd_ail_lock);
73 79
74 gfs2_trans_end(sdp); 80 gfs2_trans_end(sdp);
75 gfs2_log_flush(sdp, NULL); 81 gfs2_log_flush(sdp, NULL);
@@ -206,8 +212,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
206static int inode_go_demote_ok(const struct gfs2_glock *gl) 212static int inode_go_demote_ok(const struct gfs2_glock *gl)
207{ 213{
208 struct gfs2_sbd *sdp = gl->gl_sbd; 214 struct gfs2_sbd *sdp = gl->gl_sbd;
215 struct gfs2_holder *gh;
216
209 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) 217 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
210 return 0; 218 return 0;
219
220 if (!list_empty(&gl->gl_holders)) {
221 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
222 if (gh->gh_list.next != &gl->gl_holders)
223 return 0;
224 }
225
211 return 1; 226 return 1;
212} 227}
213 228
@@ -272,19 +287,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
272} 287}
273 288
274/** 289/**
275 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
276 * @gl: the glock
277 *
278 * Returns: 1 if it's ok
279 */
280
281static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
282{
283 const struct address_space *mapping = (const struct address_space *)(gl + 1);
284 return !mapping->nrpages;
285}
286
287/**
288 * rgrp_go_lock - operation done after an rgrp lock is locked by 290 * rgrp_go_lock - operation done after an rgrp lock is locked by
289 * a first holder on this node. 291 * a first holder on this node.
290 * @gl: the glock 292 * @gl: the glock
@@ -410,7 +412,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
410const struct gfs2_glock_operations gfs2_rgrp_glops = { 412const struct gfs2_glock_operations gfs2_rgrp_glops = {
411 .go_xmote_th = rgrp_go_sync, 413 .go_xmote_th = rgrp_go_sync,
412 .go_inval = rgrp_go_inval, 414 .go_inval = rgrp_go_inval,
413 .go_demote_ok = rgrp_go_demote_ok,
414 .go_lock = rgrp_go_lock, 415 .go_lock = rgrp_go_lock,
415 .go_unlock = rgrp_go_unlock, 416 .go_unlock = rgrp_go_unlock,
416 .go_dump = gfs2_rgrp_dump, 417 .go_dump = gfs2_rgrp_dump,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a79790c0627..870a89d6d4d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -15,6 +15,8 @@
15#include <linux/workqueue.h> 15#include <linux/workqueue.h>
16#include <linux/dlm.h> 16#include <linux/dlm.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/rcupdate.h>
19#include <linux/rculist_bl.h>
18 20
19#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
20#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
@@ -201,7 +203,7 @@ enum {
201}; 203};
202 204
203struct gfs2_glock { 205struct gfs2_glock {
204 struct hlist_node gl_list; 206 struct hlist_bl_node gl_list;
205 unsigned long gl_flags; /* GLF_... */ 207 unsigned long gl_flags; /* GLF_... */
206 struct lm_lockname gl_name; 208 struct lm_lockname gl_name;
207 atomic_t gl_ref; 209 atomic_t gl_ref;
@@ -234,6 +236,7 @@ struct gfs2_glock {
234 atomic_t gl_ail_count; 236 atomic_t gl_ail_count;
235 struct delayed_work gl_work; 237 struct delayed_work gl_work;
236 struct work_struct gl_delete; 238 struct work_struct gl_delete;
239 struct rcu_head gl_rcu;
237}; 240};
238 241
239#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ 242#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
@@ -314,6 +317,7 @@ enum {
314 QDF_USER = 0, 317 QDF_USER = 0,
315 QDF_CHANGE = 1, 318 QDF_CHANGE = 1,
316 QDF_LOCKED = 2, 319 QDF_LOCKED = 2,
320 QDF_REFRESH = 3,
317}; 321};
318 322
319struct gfs2_quota_data { 323struct gfs2_quota_data {
@@ -647,6 +651,7 @@ struct gfs2_sbd {
647 unsigned int sd_log_flush_head; 651 unsigned int sd_log_flush_head;
648 u64 sd_log_flush_wrapped; 652 u64 sd_log_flush_wrapped;
649 653
654 spinlock_t sd_ail_lock;
650 struct list_head sd_ail1_list; 655 struct list_head sd_ail1_list;
651 struct list_head sd_ail2_list; 656 struct list_head sd_ail2_list;
652 u64 sd_ail_sync_gen; 657 u64 sd_ail_sync_gen;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6e493aee28f..98c80d8c2a6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -22,7 +22,6 @@ static void gdlm_ast(void *arg)
22{ 22{
23 struct gfs2_glock *gl = arg; 23 struct gfs2_glock *gl = arg;
24 unsigned ret = gl->gl_state; 24 unsigned ret = gl->gl_state;
25 struct gfs2_sbd *sdp = gl->gl_sbd;
26 25
27 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 26 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
28 27
@@ -31,12 +30,7 @@ static void gdlm_ast(void *arg)
31 30
32 switch (gl->gl_lksb.sb_status) { 31 switch (gl->gl_lksb.sb_status) {
33 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 32 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
34 if (gl->gl_ops->go_flags & GLOF_ASPACE) 33 gfs2_glock_free(gl);
35 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
36 else
37 kmem_cache_free(gfs2_glock_cachep, gl);
38 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
39 wake_up(&sdp->sd_glock_wait);
40 return; 34 return;
41 case -DLM_ECANCEL: /* Cancel while getting lock */ 35 case -DLM_ECANCEL: /* Cancel while getting lock */
42 ret |= LM_OUT_CANCELED; 36 ret |= LM_OUT_CANCELED;
@@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
164 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 158 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
165} 159}
166 160
167static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) 161static void gdlm_put_lock(struct gfs2_glock *gl)
168{ 162{
169 struct gfs2_sbd *sdp = gl->gl_sbd; 163 struct gfs2_sbd *sdp = gl->gl_sbd;
170 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 164 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
171 int error; 165 int error;
172 166
173 if (gl->gl_lksb.sb_lkid == 0) { 167 if (gl->gl_lksb.sb_lkid == 0) {
174 kmem_cache_free(cachep, gl); 168 gfs2_glock_free(gl);
175 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
176 wake_up(&sdp->sd_glock_wait);
177 return; 169 return;
178 } 170 }
179 171
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index eb01f3575e1..e7ed31f858d 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -67,7 +67,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
67 * @mapping: The associated mapping (maybe NULL) 67 * @mapping: The associated mapping (maybe NULL)
68 * @bd: The gfs2_bufdata to remove 68 * @bd: The gfs2_bufdata to remove
69 * 69 *
70 * The log lock _must_ be held when calling this function 70 * The ail lock _must_ be held when calling this function
71 * 71 *
72 */ 72 */
73 73
@@ -88,8 +88,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
88 */ 88 */
89 89
90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
91__releases(&sdp->sd_log_lock) 91__releases(&sdp->sd_ail_lock)
92__acquires(&sdp->sd_log_lock) 92__acquires(&sdp->sd_ail_lock)
93{ 93{
94 struct gfs2_bufdata *bd, *s; 94 struct gfs2_bufdata *bd, *s;
95 struct buffer_head *bh; 95 struct buffer_head *bh;
@@ -117,7 +117,7 @@ __acquires(&sdp->sd_log_lock)
117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); 117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
118 118
119 get_bh(bh); 119 get_bh(bh);
120 gfs2_log_unlock(sdp); 120 spin_unlock(&sdp->sd_ail_lock);
121 lock_buffer(bh); 121 lock_buffer(bh);
122 if (test_clear_buffer_dirty(bh)) { 122 if (test_clear_buffer_dirty(bh)) {
123 bh->b_end_io = end_buffer_write_sync; 123 bh->b_end_io = end_buffer_write_sync;
@@ -126,7 +126,7 @@ __acquires(&sdp->sd_log_lock)
126 unlock_buffer(bh); 126 unlock_buffer(bh);
127 brelse(bh); 127 brelse(bh);
128 } 128 }
129 gfs2_log_lock(sdp); 129 spin_lock(&sdp->sd_ail_lock);
130 130
131 retry = 1; 131 retry = 1;
132 break; 132 break;
@@ -175,10 +175,10 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
175 struct gfs2_ail *ai; 175 struct gfs2_ail *ai;
176 int done = 0; 176 int done = 0;
177 177
178 gfs2_log_lock(sdp); 178 spin_lock(&sdp->sd_ail_lock);
179 head = &sdp->sd_ail1_list; 179 head = &sdp->sd_ail1_list;
180 if (list_empty(head)) { 180 if (list_empty(head)) {
181 gfs2_log_unlock(sdp); 181 spin_unlock(&sdp->sd_ail_lock);
182 return; 182 return;
183 } 183 }
184 sync_gen = sdp->sd_ail_sync_gen++; 184 sync_gen = sdp->sd_ail_sync_gen++;
@@ -189,13 +189,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
189 if (ai->ai_sync_gen >= sync_gen) 189 if (ai->ai_sync_gen >= sync_gen)
190 continue; 190 continue;
191 ai->ai_sync_gen = sync_gen; 191 ai->ai_sync_gen = sync_gen;
192 gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */ 192 gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
193 done = 0; 193 done = 0;
194 break; 194 break;
195 } 195 }
196 } 196 }
197 197
198 gfs2_log_unlock(sdp); 198 spin_unlock(&sdp->sd_ail_lock);
199} 199}
200 200
201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) 201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
@@ -203,7 +203,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
203 struct gfs2_ail *ai, *s; 203 struct gfs2_ail *ai, *s;
204 int ret; 204 int ret;
205 205
206 gfs2_log_lock(sdp); 206 spin_lock(&sdp->sd_ail_lock);
207 207
208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { 208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
209 if (gfs2_ail1_empty_one(sdp, ai, flags)) 209 if (gfs2_ail1_empty_one(sdp, ai, flags))
@@ -214,7 +214,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
214 214
215 ret = list_empty(&sdp->sd_ail1_list); 215 ret = list_empty(&sdp->sd_ail1_list);
216 216
217 gfs2_log_unlock(sdp); 217 spin_unlock(&sdp->sd_ail_lock);
218 218
219 return ret; 219 return ret;
220} 220}
@@ -247,7 +247,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
247 int wrap = (new_tail < old_tail); 247 int wrap = (new_tail < old_tail);
248 int a, b, rm; 248 int a, b, rm;
249 249
250 gfs2_log_lock(sdp); 250 spin_lock(&sdp->sd_ail_lock);
251 251
252 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { 252 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
253 a = (old_tail <= ai->ai_first); 253 a = (old_tail <= ai->ai_first);
@@ -263,7 +263,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
263 kfree(ai); 263 kfree(ai);
264 } 264 }
265 265
266 gfs2_log_unlock(sdp); 266 spin_unlock(&sdp->sd_ail_lock);
267} 267}
268 268
269/** 269/**
@@ -421,7 +421,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
421 struct gfs2_ail *ai; 421 struct gfs2_ail *ai;
422 unsigned int tail; 422 unsigned int tail;
423 423
424 gfs2_log_lock(sdp); 424 spin_lock(&sdp->sd_ail_lock);
425 425
426 if (list_empty(&sdp->sd_ail1_list)) { 426 if (list_empty(&sdp->sd_ail1_list)) {
427 tail = sdp->sd_log_head; 427 tail = sdp->sd_log_head;
@@ -430,7 +430,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
430 tail = ai->ai_first; 430 tail = ai->ai_first;
431 } 431 }
432 432
433 gfs2_log_unlock(sdp); 433 spin_unlock(&sdp->sd_ail_lock);
434 434
435 return tail; 435 return tail;
436} 436}
@@ -743,10 +743,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
743 sdp->sd_log_commited_databuf = 0; 743 sdp->sd_log_commited_databuf = 0;
744 sdp->sd_log_commited_revoke = 0; 744 sdp->sd_log_commited_revoke = 0;
745 745
746 spin_lock(&sdp->sd_ail_lock);
746 if (!list_empty(&ai->ai_ail1_list)) { 747 if (!list_empty(&ai->ai_ail1_list)) {
747 list_add(&ai->ai_list, &sdp->sd_ail1_list); 748 list_add(&ai->ai_list, &sdp->sd_ail1_list);
748 ai = NULL; 749 ai = NULL;
749 } 750 }
751 spin_unlock(&sdp->sd_ail_lock);
750 gfs2_log_unlock(sdp); 752 gfs2_log_unlock(sdp);
751 trace_gfs2_log_flush(sdp, 0); 753 trace_gfs2_log_flush(sdp, 0);
752 up_write(&sdp->sd_log_flush_lock); 754 up_write(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index bf33f822058..e919abf25ec 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -51,8 +51,10 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
51 /* If this buffer is in the AIL and it has already been written 51 /* If this buffer is in the AIL and it has already been written
52 * to in-place disk block, remove it from the AIL. 52 * to in-place disk block, remove it from the AIL.
53 */ 53 */
54 spin_lock(&sdp->sd_ail_lock);
54 if (bd->bd_ail) 55 if (bd->bd_ail)
55 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 56 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
57 spin_unlock(&sdp->sd_ail_lock);
56 get_bh(bh); 58 get_bh(bh);
57 atomic_inc(&sdp->sd_log_pinned); 59 atomic_inc(&sdp->sd_log_pinned);
58 trace_gfs2_pin(bd, 1); 60 trace_gfs2_pin(bd, 1);
@@ -80,7 +82,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
80 mark_buffer_dirty(bh); 82 mark_buffer_dirty(bh);
81 clear_buffer_pinned(bh); 83 clear_buffer_pinned(bh);
82 84
83 gfs2_log_lock(sdp); 85 spin_lock(&sdp->sd_ail_lock);
84 if (bd->bd_ail) { 86 if (bd->bd_ail) {
85 list_del(&bd->bd_ail_st_list); 87 list_del(&bd->bd_ail_st_list);
86 brelse(bh); 88 brelse(bh);
@@ -91,9 +93,11 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
91 } 93 }
92 bd->bd_ail = ai; 94 bd->bd_ail = ai;
93 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 95 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
94 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 96 spin_unlock(&sdp->sd_ail_lock);
97
98 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
99 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
95 trace_gfs2_pin(bd, 0); 100 trace_gfs2_pin(bd, 0);
96 gfs2_log_unlock(sdp);
97 unlock_buffer(bh); 101 unlock_buffer(bh);
98 atomic_dec(&sdp->sd_log_pinned); 102 atomic_dec(&sdp->sd_log_pinned);
99} 103}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 72c31a315d9..888a5f5a1a5 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,6 +14,8 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/rcupdate.h>
18#include <linux/rculist_bl.h>
17#include <asm/atomic.h> 19#include <asm/atomic.h>
18 20
19#include "gfs2.h" 21#include "gfs2.h"
@@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo)
45{ 47{
46 struct gfs2_glock *gl = foo; 48 struct gfs2_glock *gl = foo;
47 49
48 INIT_HLIST_NODE(&gl->gl_list); 50 INIT_HLIST_BL_NODE(&gl->gl_list);
49 spin_lock_init(&gl->gl_spin); 51 spin_lock_init(&gl->gl_spin);
50 INIT_LIST_HEAD(&gl->gl_holders); 52 INIT_LIST_HEAD(&gl->gl_holders);
51 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
@@ -191,6 +193,8 @@ static void __exit exit_gfs2_fs(void)
191 unregister_filesystem(&gfs2meta_fs_type); 193 unregister_filesystem(&gfs2meta_fs_type);
192 destroy_workqueue(gfs_recovery_wq); 194 destroy_workqueue(gfs_recovery_wq);
193 195
196 rcu_barrier();
197
194 kmem_cache_destroy(gfs2_quotad_cachep); 198 kmem_cache_destroy(gfs2_quotad_cachep);
195 kmem_cache_destroy(gfs2_rgrpd_cachep); 199 kmem_cache_destroy(gfs2_rgrpd_cachep);
196 kmem_cache_destroy(gfs2_bufdata_cachep); 200 kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939739c7b3f..01d97f48655 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -326,6 +326,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
326 brelse(bh); 326 brelse(bh);
327 } 327 }
328 if (bd) { 328 if (bd) {
329 spin_lock(&sdp->sd_ail_lock);
329 if (bd->bd_ail) { 330 if (bd->bd_ail) {
330 gfs2_remove_from_ail(bd); 331 gfs2_remove_from_ail(bd);
331 bh->b_private = NULL; 332 bh->b_private = NULL;
@@ -333,6 +334,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
333 bd->bd_blkno = bh->b_blocknr; 334 bd->bd_blkno = bh->b_blocknr;
334 gfs2_trans_add_revoke(sdp, bd); 335 gfs2_trans_add_revoke(sdp, bd);
335 } 336 }
337 spin_unlock(&sdp->sd_ail_lock);
336 } 338 }
337 clear_buffer_dirty(bh); 339 clear_buffer_dirty(bh);
338 clear_buffer_uptodate(bh); 340 clear_buffer_uptodate(bh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 777927ce6f7..42ef24355af 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -99,6 +99,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
99 99
100 init_waitqueue_head(&sdp->sd_log_waitq); 100 init_waitqueue_head(&sdp->sd_log_waitq);
101 init_waitqueue_head(&sdp->sd_logd_waitq); 101 init_waitqueue_head(&sdp->sd_logd_waitq);
102 spin_lock_init(&sdp->sd_ail_lock);
102 INIT_LIST_HEAD(&sdp->sd_ail1_list); 103 INIT_LIST_HEAD(&sdp->sd_ail1_list);
103 INIT_LIST_HEAD(&sdp->sd_ail2_list); 104 INIT_LIST_HEAD(&sdp->sd_ail2_list);
104 105
@@ -928,17 +929,9 @@ static const match_table_t nolock_tokens = {
928 { Opt_err, NULL }, 929 { Opt_err, NULL },
929}; 930};
930 931
931static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
932{
933 struct gfs2_sbd *sdp = gl->gl_sbd;
934 kmem_cache_free(cachep, gl);
935 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
936 wake_up(&sdp->sd_glock_wait);
937}
938
939static const struct lm_lockops nolock_ops = { 932static const struct lm_lockops nolock_ops = {
940 .lm_proto_name = "lock_nolock", 933 .lm_proto_name = "lock_nolock",
941 .lm_put_lock = nolock_put_lock, 934 .lm_put_lock = gfs2_glock_free,
942 .lm_tokens = &nolock_tokens, 935 .lm_tokens = &nolock_tokens,
943}; 936};
944 937
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d8b26ac2e20..09e436a5072 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1026,9 +1026,9 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1026 1026
1027/** 1027/**
1028 * gfs2_permission - 1028 * gfs2_permission -
1029 * @inode: 1029 * @inode: The inode
1030 * @mask: 1030 * @mask: The mask to be tested
1031 * @nd: passed from Linux VFS, ignored by us 1031 * @flags: Indicates whether this is an RCU path walk or not
1032 * 1032 *
1033 * This may be called from the VFS directly, or from within GFS2 with the 1033 * This may be called from the VFS directly, or from within GFS2 with the
1034 * inode locked, so we look to see if the glock is already locked and only 1034 * inode locked, so we look to see if the glock is already locked and only
@@ -1044,11 +1044,11 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1044 int error; 1044 int error;
1045 int unlock = 0; 1045 int unlock = 0;
1046 1046
1047 if (flags & IPERM_FLAG_RCU)
1048 return -ECHILD;
1049 1047
1050 ip = GFS2_I(inode); 1048 ip = GFS2_I(inode);
1051 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1049 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1050 if (flags & IPERM_FLAG_RCU)
1051 return -ECHILD;
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error) 1053 if (error)
1054 return error; 1054 return error;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a689901963d..e23d9864c41 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -834,6 +834,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
834 goto out_end_trans; 834 goto out_end_trans;
835 835
836 do_qc(qd, -qd->qd_change_sync); 836 do_qc(qd, -qd->qd_change_sync);
837 set_bit(QDF_REFRESH, &qd->qd_flags);
837 } 838 }
838 839
839 error = 0; 840 error = 0;
@@ -929,6 +930,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
929{ 930{
930 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 931 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
931 struct gfs2_alloc *al = ip->i_alloc; 932 struct gfs2_alloc *al = ip->i_alloc;
933 struct gfs2_quota_data *qd;
932 unsigned int x; 934 unsigned int x;
933 int error = 0; 935 int error = 0;
934 936
@@ -942,7 +944,11 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
942 sort_qd, NULL); 944 sort_qd, NULL);
943 945
944 for (x = 0; x < al->al_qd_num; x++) { 946 for (x = 0; x < al->al_qd_num; x++) {
945 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]); 947 int force = NO_FORCE;
948 qd = al->al_qd[x];
949 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
950 force = FORCE;
951 error = do_glock(qd, force, &al->al_qd_ghs[x]);
946 if (error) 952 if (error)
947 break; 953 break;
948 } 954 }
@@ -1587,6 +1593,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1587 1593
1588 offset = qd2offset(qd); 1594 offset = qd2offset(qd);
1589 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); 1595 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
1596 if (gfs2_is_stuffed(ip))
1597 alloc_required = 1;
1590 if (alloc_required) { 1598 if (alloc_required) {
1591 al = gfs2_alloc_get(ip); 1599 al = gfs2_alloc_get(ip);
1592 if (al == NULL) 1600 if (al == NULL)
@@ -1600,7 +1608,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1600 blocks += gfs2_rg_blocks(al); 1608 blocks += gfs2_rg_blocks(al);
1601 } 1609 }
1602 1610
1603 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); 1611 /* Some quotas span block boundaries and can update two blocks,
1612 adding an extra block to the transaction to handle such quotas */
1613 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0);
1604 if (error) 1614 if (error)
1605 goto out_release; 1615 goto out_release;
1606 1616
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7293ea27020..cf930cd9664 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1602,7 +1602,7 @@ rgrp_error:
1602 * 1602 *
1603 */ 1603 */
1604 1604
1605void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) 1605void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1606{ 1606{
1607 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1607 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1608 struct gfs2_rgrpd *rgd; 1608 struct gfs2_rgrpd *rgd;
@@ -1617,7 +1617,21 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1618 1618
1619 gfs2_trans_add_rg(rgd); 1619 gfs2_trans_add_rg(rgd);
1620}
1620 1621
1622/**
1623 * gfs2_free_data - free a contiguous run of data block(s)
1624 * @ip: the inode these blocks are being freed from
1625 * @bstart: first block of a run of contiguous blocks
1626 * @blen: the length of the block run
1627 *
1628 */
1629
1630void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1631{
1632 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1633
1634 __gfs2_free_data(ip, bstart, blen);
1621 gfs2_statfs_change(sdp, 0, +blen, 0); 1635 gfs2_statfs_change(sdp, 0, +blen, 0);
1622 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1636 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1623} 1637}
@@ -1630,7 +1644,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1630 * 1644 *
1631 */ 1645 */
1632 1646
1633void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 1647void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1634{ 1648{
1635 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1649 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1636 struct gfs2_rgrpd *rgd; 1650 struct gfs2_rgrpd *rgd;
@@ -1645,10 +1659,24 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1645 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1659 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1646 1660
1647 gfs2_trans_add_rg(rgd); 1661 gfs2_trans_add_rg(rgd);
1662 gfs2_meta_wipe(ip, bstart, blen);
1663}
1648 1664
1665/**
1666 * gfs2_free_meta - free a contiguous run of data block(s)
1667 * @ip: the inode these blocks are being freed from
1668 * @bstart: first block of a run of contiguous blocks
1669 * @blen: the length of the block run
1670 *
1671 */
1672
1673void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1674{
1675 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1676
1677 __gfs2_free_meta(ip, bstart, blen);
1649 gfs2_statfs_change(sdp, 0, +blen, 0); 1678 gfs2_statfs_change(sdp, 0, +blen, 0);
1650 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1679 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1651 gfs2_meta_wipe(ip, bstart, blen);
1652} 1680}
1653 1681
1654void gfs2_unlink_di(struct inode *inode) 1682void gfs2_unlink_di(struct inode *inode)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 50c2bb04369..a80e3034ac4 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
54 54
55extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
55extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); 56extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
57extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
56extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 58extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
57extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); 59extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
58extern void gfs2_unlink_di(struct inode *inode); 60extern void gfs2_unlink_di(struct inode *inode);
diff --git a/fs/inode.c b/fs/inode.c
index 0647d80accf..9910c039f02 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -84,16 +84,13 @@ static struct hlist_head *inode_hashtable __read_mostly;
84DEFINE_SPINLOCK(inode_lock); 84DEFINE_SPINLOCK(inode_lock);
85 85
86/* 86/*
87 * iprune_sem provides exclusion between the kswapd or try_to_free_pages 87 * iprune_sem provides exclusion between the icache shrinking and the
88 * icache shrinking path, and the umount path. Without this exclusion, 88 * umount path.
89 * by the time prune_icache calls iput for the inode whose pages it has
90 * been invalidating, or by the time it calls clear_inode & destroy_inode
91 * from its final dispose_list, the struct super_block they refer to
92 * (for inode->i_sb->s_op) may already have been freed and reused.
93 * 89 *
94 * We make this an rwsem because the fastpath is icache shrinking. In 90 * We don't actually need it to protect anything in the umount path,
95 * some cases a filesystem may be doing a significant amount of work in 91 * but only need to cycle through it to make sure any inode that
96 * its inode reclaim code, so this should improve parallelism. 92 * prune_icache took off the LRU list has been fully torn down by the
93 * time we are past evict_inodes.
97 */ 94 */
98static DECLARE_RWSEM(iprune_sem); 95static DECLARE_RWSEM(iprune_sem);
99 96
@@ -516,17 +513,12 @@ void evict_inodes(struct super_block *sb)
516 struct inode *inode, *next; 513 struct inode *inode, *next;
517 LIST_HEAD(dispose); 514 LIST_HEAD(dispose);
518 515
519 down_write(&iprune_sem);
520
521 spin_lock(&inode_lock); 516 spin_lock(&inode_lock);
522 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 517 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
523 if (atomic_read(&inode->i_count)) 518 if (atomic_read(&inode->i_count))
524 continue; 519 continue;
525 520 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
526 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
527 WARN_ON(1);
528 continue; 521 continue;
529 }
530 522
531 inode->i_state |= I_FREEING; 523 inode->i_state |= I_FREEING;
532 524
@@ -542,6 +534,13 @@ void evict_inodes(struct super_block *sb)
542 spin_unlock(&inode_lock); 534 spin_unlock(&inode_lock);
543 535
544 dispose_list(&dispose); 536 dispose_list(&dispose);
537
538 /*
539 * Cycle through iprune_sem to make sure any inode that prune_icache
540 * moved off the list before we took the lock has been fully torn
541 * down.
542 */
543 down_write(&iprune_sem);
545 up_write(&iprune_sem); 544 up_write(&iprune_sem);
546} 545}
547 546
@@ -561,8 +560,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
561 struct inode *inode, *next; 560 struct inode *inode, *next;
562 LIST_HEAD(dispose); 561 LIST_HEAD(dispose);
563 562
564 down_write(&iprune_sem);
565
566 spin_lock(&inode_lock); 563 spin_lock(&inode_lock);
567 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 564 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
568 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 565 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
@@ -590,7 +587,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
590 spin_unlock(&inode_lock); 587 spin_unlock(&inode_lock);
591 588
592 dispose_list(&dispose); 589 dispose_list(&dispose);
593 up_write(&iprune_sem);
594 590
595 return busy; 591 return busy;
596} 592}
diff --git a/fs/internal.h b/fs/internal.h
index 9b976b57d7f..f3d15de44b1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,6 +106,19 @@ extern void put_super(struct super_block *sb);
106struct nameidata; 106struct nameidata;
107extern struct file *nameidata_to_filp(struct nameidata *); 107extern struct file *nameidata_to_filp(struct nameidata *);
108extern void release_open_intent(struct nameidata *); 108extern void release_open_intent(struct nameidata *);
109struct open_flags {
110 int open_flag;
111 int mode;
112 int acc_mode;
113 int intent;
114};
115extern struct file *do_filp_open(int dfd, const char *pathname,
116 const struct open_flags *op, int lookup_flags);
117extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
118 const char *, const struct open_flags *, int lookup_flags);
119
120extern long do_handle_open(int mountdirfd,
121 struct file_handle __user *ufh, int open_flag);
109 122
110/* 123/*
111 * inode.c 124 * inode.c
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb3847..dd4687ff30d 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
124 * offset of the inode and the upper 16 bits of fh32[1] to 124 * offset of the inode and the upper 16 bits of fh32[1] to
125 * hold the offset of the parent. 125 * hold the offset of the parent.
126 */ 126 */
127 127 if (connectable && (len < 5)) {
128 if (len < 3 || (connectable && len < 5)) 128 *max_len = 5;
129 return 255;
130 } else if (len < 3) {
131 *max_len = 3;
129 return 255; 132 return 255;
133 }
130 134
131 len = 3; 135 len = 3;
132 fh32[0] = ei->i_iget5_block; 136 fh32[0] = ei->i_iget5_block;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index c61600ece4f..eaaf2b511e8 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
809 if (ip->i_nlink == JFS_LINK_MAX) 809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK; 810 return -EMLINK;
811 811
812 if (ip->i_nlink == 0)
813 return -ENOENT;
814
815 dquot_initialize(dir); 812 dquot_initialize(dir);
816 813
817 tid = txBegin(ip->i_sb, 0); 814 tid = txBegin(ip->i_sb, 0);
diff --git a/fs/namei.c b/fs/namei.c
index a4689eb2df2..b912b7abe74 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
401{ 408{
402 struct fs_struct *fs = current->fs; 409 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry; 410 struct dentry *dentry = nd->path.dentry;
411 int want_root = 0;
404 412
405 BUG_ON(!(nd->flags & LOOKUP_RCU)); 413 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) { 414 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
415 want_root = 1;
407 spin_lock(&fs->lock); 416 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt || 417 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry) 418 nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
414 goto err; 423 goto err;
415 BUG_ON(nd->inode != dentry->d_inode); 424 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock); 425 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) { 426 if (want_root) {
418 path_get(&nd->root); 427 path_get(&nd->root);
419 spin_unlock(&fs->lock); 428 spin_unlock(&fs->lock);
420 } 429 }
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
427err: 436err:
428 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
429err_root: 438err_root:
430 if (nd->root.mnt) 439 if (want_root)
431 spin_unlock(&fs->lock); 440 spin_unlock(&fs->lock);
432 return -ECHILD; 441 return -ECHILD;
433} 442}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
454{ 463{
455 struct fs_struct *fs = current->fs; 464 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 465 struct dentry *parent = nd->path.dentry;
466 int want_root = 0;
457 467
458 BUG_ON(!(nd->flags & LOOKUP_RCU)); 468 BUG_ON(!(nd->flags & LOOKUP_RCU));
459 if (nd->root.mnt) { 469 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
470 want_root = 1;
460 spin_lock(&fs->lock); 471 spin_lock(&fs->lock);
461 if (nd->root.mnt != fs->root.mnt || 472 if (nd->root.mnt != fs->root.mnt ||
462 nd->root.dentry != fs->root.dentry) 473 nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
476 parent->d_count++; 487 parent->d_count++;
477 spin_unlock(&dentry->d_lock); 488 spin_unlock(&dentry->d_lock);
478 spin_unlock(&parent->d_lock); 489 spin_unlock(&parent->d_lock);
479 if (nd->root.mnt) { 490 if (want_root) {
480 path_get(&nd->root); 491 path_get(&nd->root);
481 spin_unlock(&fs->lock); 492 spin_unlock(&fs->lock);
482 } 493 }
@@ -490,7 +501,7 @@ err:
490 spin_unlock(&dentry->d_lock); 501 spin_unlock(&dentry->d_lock);
491 spin_unlock(&parent->d_lock); 502 spin_unlock(&parent->d_lock);
492err_root: 503err_root:
493 if (nd->root.mnt) 504 if (want_root)
494 spin_unlock(&fs->lock); 505 spin_unlock(&fs->lock);
495 return -ECHILD; 506 return -ECHILD;
496} 507}
@@ -498,8 +509,16 @@ err_root:
498/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 509/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
499static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 510static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
500{ 511{
501 if (nd->flags & LOOKUP_RCU) 512 if (nd->flags & LOOKUP_RCU) {
502 return nameidata_dentry_drop_rcu(nd, dentry); 513 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
514 nd->flags &= ~LOOKUP_RCU;
515 if (!(nd->flags & LOOKUP_ROOT))
516 nd->root.mnt = NULL;
517 rcu_read_unlock();
518 br_read_unlock(vfsmount_lock);
519 return -ECHILD;
520 }
521 }
503 return 0; 522 return 0;
504} 523}
505 524
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
518 537
519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 538 BUG_ON(!(nd->flags & LOOKUP_RCU));
520 nd->flags &= ~LOOKUP_RCU; 539 nd->flags &= ~LOOKUP_RCU;
521 nd->root.mnt = NULL; 540 if (!(nd->flags & LOOKUP_ROOT))
541 nd->root.mnt = NULL;
522 spin_lock(&dentry->d_lock); 542 spin_lock(&dentry->d_lock);
523 if (!__d_rcu_to_refcount(dentry, nd->seq)) 543 if (!__d_rcu_to_refcount(dentry, nd->seq))
524 goto err_unlock; 544 goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
539 return -ECHILD; 559 return -ECHILD;
540} 560}
541 561
542/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
543static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
544{
545 if (likely(nd->flags & LOOKUP_RCU))
546 return nameidata_drop_rcu_last(nd);
547 return 0;
548}
549
550/** 562/**
551 * release_open_intent - free up open intent resources 563 * release_open_intent - free up open intent resources
552 * @nd: pointer to nameidata 564 * @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
590 return dentry; 602 return dentry;
591} 603}
592 604
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
616static inline int need_reval_dot(struct dentry *dentry)
617{
618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
619 return 0;
620
621 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
622 return 0;
623
624 return 1;
625}
626
627/* 605/*
628 * force_reval_path - force revalidation of a dentry 606 * handle_reval_path - force revalidation of a dentry
629 * 607 *
630 * In some situations the path walking code will trust dentries without 608 * In some situations the path walking code will trust dentries without
631 * revalidating them. This causes problems for filesystems that depend on 609 * revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
639 * invalidate the dentry. It's up to the caller to handle putting references 617 * invalidate the dentry. It's up to the caller to handle putting references
640 * to the path if necessary. 618 * to the path if necessary.
641 */ 619 */
642static int 620static inline int handle_reval_path(struct nameidata *nd)
643force_reval_path(struct path *path, struct nameidata *nd)
644{ 621{
622 struct dentry *dentry = nd->path.dentry;
645 int status; 623 int status;
646 struct dentry *dentry = path->dentry;
647 624
648 /* 625 if (likely(!(nd->flags & LOOKUP_JUMPED)))
649 * only check on filesystems where it's possible for the dentry to 626 return 0;
650 * become stale. 627
651 */ 628 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
652 if (!need_reval_dot(dentry)) 629 return 0;
630
631 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
653 return 0; 632 return 0;
654 633
634 /* Note: we do not d_invalidate() */
655 status = d_revalidate(dentry, nd); 635 status = d_revalidate(dentry, nd);
656 if (status > 0) 636 if (status > 0)
657 return 0; 637 return 0;
658 638
659 if (!status) { 639 if (!status)
660 d_invalidate(dentry);
661 status = -ESTALE; 640 status = -ESTALE;
662 } 641
663 return status; 642 return status;
664} 643}
665 644
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
728 path_put(&nd->path); 707 path_put(&nd->path);
729 nd->path = nd->root; 708 nd->path = nd->root;
730 path_get(&nd->root); 709 path_get(&nd->root);
710 nd->flags |= LOOKUP_JUMPED;
731 } 711 }
732 nd->inode = nd->path.dentry->d_inode; 712 nd->inode = nd->path.dentry->d_inode;
733 713
@@ -757,19 +737,42 @@ static inline void path_to_nameidata(const struct path *path,
757 nd->path.dentry = path->dentry; 737 nd->path.dentry = path->dentry;
758} 738}
759 739
740static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
741{
742 struct inode *inode = link->dentry->d_inode;
743 if (!IS_ERR(cookie) && inode->i_op->put_link)
744 inode->i_op->put_link(link->dentry, nd, cookie);
745 path_put(link);
746}
747
760static __always_inline int 748static __always_inline int
761__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 749follow_link(struct path *link, struct nameidata *nd, void **p)
762{ 750{
763 int error; 751 int error;
764 struct dentry *dentry = link->dentry; 752 struct dentry *dentry = link->dentry;
765 753
766 BUG_ON(nd->flags & LOOKUP_RCU); 754 BUG_ON(nd->flags & LOOKUP_RCU);
767 755
756 if (link->mnt == nd->path.mnt)
757 mntget(link->mnt);
758
759 if (unlikely(current->total_link_count >= 40)) {
760 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
761 path_put(&nd->path);
762 return -ELOOP;
763 }
764 cond_resched();
765 current->total_link_count++;
766
768 touch_atime(link->mnt, dentry); 767 touch_atime(link->mnt, dentry);
769 nd_set_link(nd, NULL); 768 nd_set_link(nd, NULL);
770 769
771 if (link->mnt == nd->path.mnt) 770 error = security_inode_follow_link(link->dentry, nd);
772 mntget(link->mnt); 771 if (error) {
772 *p = ERR_PTR(error); /* no ->put_link(), please */
773 path_put(&nd->path);
774 return error;
775 }
773 776
774 nd->last_type = LAST_BIND; 777 nd->last_type = LAST_BIND;
775 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 778 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -780,56 +783,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
780 if (s) 783 if (s)
781 error = __vfs_follow_link(nd, s); 784 error = __vfs_follow_link(nd, s);
782 else if (nd->last_type == LAST_BIND) { 785 else if (nd->last_type == LAST_BIND) {
783 error = force_reval_path(&nd->path, nd); 786 nd->flags |= LOOKUP_JUMPED;
784 if (error) 787 nd->inode = nd->path.dentry->d_inode;
788 if (nd->inode->i_op->follow_link) {
789 /* stepped on a _really_ weird one */
785 path_put(&nd->path); 790 path_put(&nd->path);
791 error = -ELOOP;
792 }
786 } 793 }
787 } 794 }
788 return error; 795 return error;
789} 796}
790 797
791/*
792 * This limits recursive symlink follows to 8, while
793 * limiting consecutive symlinks to 40.
794 *
795 * Without that kind of total limit, nasty chains of consecutive
796 * symlinks can cause almost arbitrarily long lookups.
797 */
798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
799{
800 void *cookie;
801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
808 if (current->link_count >= MAX_NESTED_LINKS)
809 goto loop;
810 if (current->total_link_count >= 40)
811 goto loop;
812 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
813 cond_resched();
814 err = security_inode_follow_link(path->dentry, nd);
815 if (err)
816 goto loop;
817 current->link_count++;
818 current->total_link_count++;
819 nd->depth++;
820 err = __do_follow_link(path, nd, &cookie);
821 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
822 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
823 path_put(path);
824 current->link_count--;
825 nd->depth--;
826 return err;
827loop:
828 path_put_conditional(path, nd);
829 path_put(&nd->path);
830 return err;
831}
832
833static int follow_up_rcu(struct path *path) 798static int follow_up_rcu(struct path *path)
834{ 799{
835 struct vfsmount *parent; 800 struct vfsmount *parent;
@@ -1068,7 +1033,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1068 1033
1069 seq = read_seqcount_begin(&parent->d_seq); 1034 seq = read_seqcount_begin(&parent->d_seq);
1070 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1035 if (read_seqcount_retry(&old->d_seq, nd->seq))
1071 return -ECHILD; 1036 goto failed;
1072 inode = parent->d_inode; 1037 inode = parent->d_inode;
1073 nd->path.dentry = parent; 1038 nd->path.dentry = parent;
1074 nd->seq = seq; 1039 nd->seq = seq;
@@ -1081,8 +1046,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1081 } 1046 }
1082 __follow_mount_rcu(nd, &nd->path, &inode, true); 1047 __follow_mount_rcu(nd, &nd->path, &inode, true);
1083 nd->inode = inode; 1048 nd->inode = inode;
1084
1085 return 0; 1049 return 0;
1050
1051failed:
1052 nd->flags &= ~LOOKUP_RCU;
1053 if (!(nd->flags & LOOKUP_ROOT))
1054 nd->root.mnt = NULL;
1055 rcu_read_unlock();
1056 br_read_unlock(vfsmount_lock);
1057 return -ECHILD;
1086} 1058}
1087 1059
1088/* 1060/*
@@ -1216,68 +1188,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1216{ 1188{
1217 struct vfsmount *mnt = nd->path.mnt; 1189 struct vfsmount *mnt = nd->path.mnt;
1218 struct dentry *dentry, *parent = nd->path.dentry; 1190 struct dentry *dentry, *parent = nd->path.dentry;
1219 struct inode *dir; 1191 int need_reval = 1;
1192 int status = 1;
1220 int err; 1193 int err;
1221 1194
1222 /* 1195 /*
1223 * See if the low-level filesystem might want
1224 * to use its own hash..
1225 */
1226 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1227 err = parent->d_op->d_hash(parent, nd->inode, name);
1228 if (err < 0)
1229 return err;
1230 }
1231
1232 /*
1233 * Rename seqlock is not required here because in the off chance 1196 * Rename seqlock is not required here because in the off chance
1234 * of a false negative due to a concurrent rename, we're going to 1197 * of a false negative due to a concurrent rename, we're going to
1235 * do the non-racy lookup, below. 1198 * do the non-racy lookup, below.
1236 */ 1199 */
1237 if (nd->flags & LOOKUP_RCU) { 1200 if (nd->flags & LOOKUP_RCU) {
1238 unsigned seq; 1201 unsigned seq;
1239
1240 *inode = nd->inode; 1202 *inode = nd->inode;
1241 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1203 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1242 if (!dentry) { 1204 if (!dentry)
1243 if (nameidata_drop_rcu(nd)) 1205 goto unlazy;
1244 return -ECHILD; 1206
1245 goto need_lookup;
1246 }
1247 /* Memory barrier in read_seqcount_begin of child is enough */ 1207 /* Memory barrier in read_seqcount_begin of child is enough */
1248 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1208 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1249 return -ECHILD; 1209 return -ECHILD;
1250
1251 nd->seq = seq; 1210 nd->seq = seq;
1211
1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1212 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1253 dentry = do_revalidate_rcu(dentry, nd); 1213 status = d_revalidate(dentry, nd);
1254 if (!dentry) 1214 if (unlikely(status <= 0)) {
1255 goto need_lookup; 1215 if (status != -ECHILD)
1256 if (IS_ERR(dentry)) 1216 need_reval = 0;
1257 goto fail; 1217 goto unlazy;
1258 if (!(nd->flags & LOOKUP_RCU)) 1218 }
1259 goto done;
1260 } 1219 }
1261 path->mnt = mnt; 1220 path->mnt = mnt;
1262 path->dentry = dentry; 1221 path->dentry = dentry;
1263 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1222 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1264 return 0; 1223 return 0;
1265 if (nameidata_drop_rcu(nd)) 1224unlazy:
1266 return -ECHILD; 1225 if (dentry) {
1267 /* fallthru */ 1226 if (nameidata_dentry_drop_rcu(nd, dentry))
1227 return -ECHILD;
1228 } else {
1229 if (nameidata_drop_rcu(nd))
1230 return -ECHILD;
1231 }
1232 } else {
1233 dentry = __d_lookup(parent, name);
1268 } 1234 }
1269 dentry = __d_lookup(parent, name); 1235
1270 if (!dentry) 1236retry:
1271 goto need_lookup; 1237 if (unlikely(!dentry)) {
1272found: 1238 struct inode *dir = parent->d_inode;
1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1239 BUG_ON(nd->inode != dir);
1274 dentry = do_revalidate(dentry, nd); 1240
1275 if (!dentry) 1241 mutex_lock(&dir->i_mutex);
1276 goto need_lookup; 1242 dentry = d_lookup(parent, name);
1277 if (IS_ERR(dentry)) 1243 if (likely(!dentry)) {
1278 goto fail; 1244 dentry = d_alloc_and_lookup(parent, name, nd);
1245 if (IS_ERR(dentry)) {
1246 mutex_unlock(&dir->i_mutex);
1247 return PTR_ERR(dentry);
1248 }
1249 /* known good */
1250 need_reval = 0;
1251 status = 1;
1252 }
1253 mutex_unlock(&dir->i_mutex);
1279 } 1254 }
1280done: 1255 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1256 status = d_revalidate(dentry, nd);
1257 if (unlikely(status <= 0)) {
1258 if (status < 0) {
1259 dput(dentry);
1260 return status;
1261 }
1262 if (!d_invalidate(dentry)) {
1263 dput(dentry);
1264 dentry = NULL;
1265 need_reval = 1;
1266 goto retry;
1267 }
1268 }
1269
1281 path->mnt = mnt; 1270 path->mnt = mnt;
1282 path->dentry = dentry; 1271 path->dentry = dentry;
1283 err = follow_managed(path, nd->flags); 1272 err = follow_managed(path, nd->flags);
@@ -1287,39 +1276,113 @@ done:
1287 } 1276 }
1288 *inode = path->dentry->d_inode; 1277 *inode = path->dentry->d_inode;
1289 return 0; 1278 return 0;
1279}
1280
1281static inline int may_lookup(struct nameidata *nd)
1282{
1283 if (nd->flags & LOOKUP_RCU) {
1284 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1285 if (err != -ECHILD)
1286 return err;
1287 if (nameidata_drop_rcu(nd))
1288 return -ECHILD;
1289 }
1290 return exec_permission(nd->inode, 0);
1291}
1290 1292
1291need_lookup: 1293static inline int handle_dots(struct nameidata *nd, int type)
1292 dir = parent->d_inode; 1294{
1293 BUG_ON(nd->inode != dir); 1295 if (type == LAST_DOTDOT) {
1296 if (nd->flags & LOOKUP_RCU) {
1297 if (follow_dotdot_rcu(nd))
1298 return -ECHILD;
1299 } else
1300 follow_dotdot(nd);
1301 }
1302 return 0;
1303}
1294 1304
1295 mutex_lock(&dir->i_mutex); 1305static void terminate_walk(struct nameidata *nd)
1296 /* 1306{
1297 * First re-do the cached lookup just in case it was created 1307 if (!(nd->flags & LOOKUP_RCU)) {
1298 * while we waited for the directory semaphore, or the first 1308 path_put(&nd->path);
1299 * lookup failed due to an unrelated rename. 1309 } else {
1300 * 1310 nd->flags &= ~LOOKUP_RCU;
1301 * This could use version numbering or similar to avoid unnecessary 1311 if (!(nd->flags & LOOKUP_ROOT))
1302 * cache lookups, but then we'd have to do the first lookup in the 1312 nd->root.mnt = NULL;
1303 * non-racy way. However in the common case here, everything should 1313 rcu_read_unlock();
1304 * be hot in cache, so would it be a big win? 1314 br_read_unlock(vfsmount_lock);
1305 */
1306 dentry = d_lookup(parent, name);
1307 if (likely(!dentry)) {
1308 dentry = d_alloc_and_lookup(parent, name, nd);
1309 mutex_unlock(&dir->i_mutex);
1310 if (IS_ERR(dentry))
1311 goto fail;
1312 goto done;
1313 } 1315 }
1316}
1317
1318static inline int walk_component(struct nameidata *nd, struct path *path,
1319 struct qstr *name, int type, int follow)
1320{
1321 struct inode *inode;
1322 int err;
1314 /* 1323 /*
1315 * Uhhuh! Nasty case: the cache was re-populated while 1324 * "." and ".." are special - ".." especially so because it has
1316 * we waited on the semaphore. Need to revalidate. 1325 * to be able to know about the current root directory and
1326 * parent relationships.
1317 */ 1327 */
1318 mutex_unlock(&dir->i_mutex); 1328 if (unlikely(type != LAST_NORM))
1319 goto found; 1329 return handle_dots(nd, type);
1330 err = do_lookup(nd, name, path, &inode);
1331 if (unlikely(err)) {
1332 terminate_walk(nd);
1333 return err;
1334 }
1335 if (!inode) {
1336 path_to_nameidata(path, nd);
1337 terminate_walk(nd);
1338 return -ENOENT;
1339 }
1340 if (unlikely(inode->i_op->follow_link) && follow) {
1341 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
1342 return -ECHILD;
1343 BUG_ON(inode != path->dentry->d_inode);
1344 return 1;
1345 }
1346 path_to_nameidata(path, nd);
1347 nd->inode = inode;
1348 return 0;
1349}
1320 1350
1321fail: 1351/*
1322 return PTR_ERR(dentry); 1352 * This limits recursive symlink follows to 8, while
1353 * limiting consecutive symlinks to 40.
1354 *
1355 * Without that kind of total limit, nasty chains of consecutive
1356 * symlinks can cause almost arbitrarily long lookups.
1357 */
1358static inline int nested_symlink(struct path *path, struct nameidata *nd)
1359{
1360 int res;
1361
1362 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1363 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1364 path_put_conditional(path, nd);
1365 path_put(&nd->path);
1366 return -ELOOP;
1367 }
1368
1369 nd->depth++;
1370 current->link_count++;
1371
1372 do {
1373 struct path link = *path;
1374 void *cookie;
1375
1376 res = follow_link(&link, nd, &cookie);
1377 if (!res)
1378 res = walk_component(nd, path, &nd->last,
1379 nd->last_type, LOOKUP_FOLLOW);
1380 put_link(nd, &link, cookie);
1381 } while (res > 0);
1382
1383 current->link_count--;
1384 nd->depth--;
1385 return res;
1323} 1386}
1324 1387
1325/* 1388/*
@@ -1339,30 +1402,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1339 while (*name=='/') 1402 while (*name=='/')
1340 name++; 1403 name++;
1341 if (!*name) 1404 if (!*name)
1342 goto return_reval; 1405 return 0;
1343
1344 if (nd->depth)
1345 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1346 1406
1347 /* At this point we know we have a real path component. */ 1407 /* At this point we know we have a real path component. */
1348 for(;;) { 1408 for(;;) {
1349 struct inode *inode;
1350 unsigned long hash; 1409 unsigned long hash;
1351 struct qstr this; 1410 struct qstr this;
1352 unsigned int c; 1411 unsigned int c;
1412 int type;
1353 1413
1354 nd->flags |= LOOKUP_CONTINUE; 1414 nd->flags |= LOOKUP_CONTINUE;
1355 if (nd->flags & LOOKUP_RCU) { 1415
1356 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1416 err = may_lookup(nd);
1357 if (err == -ECHILD) {
1358 if (nameidata_drop_rcu(nd))
1359 return -ECHILD;
1360 goto exec_again;
1361 }
1362 } else {
1363exec_again:
1364 err = exec_permission(nd->inode, 0);
1365 }
1366 if (err) 1417 if (err)
1367 break; 1418 break;
1368 1419
@@ -1378,52 +1429,43 @@ exec_again:
1378 this.len = name - (const char *) this.name; 1429 this.len = name - (const char *) this.name;
1379 this.hash = end_name_hash(hash); 1430 this.hash = end_name_hash(hash);
1380 1431
1432 type = LAST_NORM;
1433 if (this.name[0] == '.') switch (this.len) {
1434 case 2:
1435 if (this.name[1] == '.') {
1436 type = LAST_DOTDOT;
1437 nd->flags |= LOOKUP_JUMPED;
1438 }
1439 break;
1440 case 1:
1441 type = LAST_DOT;
1442 }
1443 if (likely(type == LAST_NORM)) {
1444 struct dentry *parent = nd->path.dentry;
1445 nd->flags &= ~LOOKUP_JUMPED;
1446 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1447 err = parent->d_op->d_hash(parent, nd->inode,
1448 &this);
1449 if (err < 0)
1450 break;
1451 }
1452 }
1453
1381 /* remove trailing slashes? */ 1454 /* remove trailing slashes? */
1382 if (!c) 1455 if (!c)
1383 goto last_component; 1456 goto last_component;
1384 while (*++name == '/'); 1457 while (*++name == '/');
1385 if (!*name) 1458 if (!*name)
1386 goto last_with_slashes; 1459 goto last_component;
1387 1460
1388 /* 1461 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1389 * "." and ".." are special - ".." especially so because it has 1462 if (err < 0)
1390 * to be able to know about the current root directory and 1463 return err;
1391 * parent relationships.
1392 */
1393 if (this.name[0] == '.') switch (this.len) {
1394 default:
1395 break;
1396 case 2:
1397 if (this.name[1] != '.')
1398 break;
1399 if (nd->flags & LOOKUP_RCU) {
1400 if (follow_dotdot_rcu(nd))
1401 return -ECHILD;
1402 } else
1403 follow_dotdot(nd);
1404 /* fallthrough */
1405 case 1:
1406 continue;
1407 }
1408 /* This does the actual lookups.. */
1409 err = do_lookup(nd, &this, &next, &inode);
1410 if (err)
1411 break;
1412 err = -ENOENT;
1413 if (!inode)
1414 goto out_dput;
1415 1464
1416 if (inode->i_op->follow_link) { 1465 if (err) {
1417 err = do_follow_link(inode, &next, nd); 1466 err = nested_symlink(&next, nd);
1418 if (err) 1467 if (err)
1419 goto return_err; 1468 return err;
1420 nd->inode = nd->path.dentry->d_inode;
1421 err = -ENOENT;
1422 if (!nd->inode)
1423 break;
1424 } else {
1425 path_to_nameidata(&next, nd);
1426 nd->inode = inode;
1427 } 1469 }
1428 err = -ENOTDIR; 1470 err = -ENOTDIR;
1429 if (!nd->inode->i_op->lookup) 1471 if (!nd->inode->i_op->lookup)
@@ -1431,210 +1473,109 @@ exec_again:
1431 continue; 1473 continue;
1432 /* here ends the main loop */ 1474 /* here ends the main loop */
1433 1475
1434last_with_slashes:
1435 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1436last_component: 1476last_component:
1437 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1477 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1438 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1478 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1439 if (lookup_flags & LOOKUP_PARENT)
1440 goto lookup_parent;
1441 if (this.name[0] == '.') switch (this.len) {
1442 default:
1443 break;
1444 case 2:
1445 if (this.name[1] != '.')
1446 break;
1447 if (nd->flags & LOOKUP_RCU) {
1448 if (follow_dotdot_rcu(nd))
1449 return -ECHILD;
1450 } else
1451 follow_dotdot(nd);
1452 /* fallthrough */
1453 case 1:
1454 goto return_reval;
1455 }
1456 err = do_lookup(nd, &this, &next, &inode);
1457 if (err)
1458 break;
1459 if (inode && unlikely(inode->i_op->follow_link) &&
1460 (lookup_flags & LOOKUP_FOLLOW)) {
1461 err = do_follow_link(inode, &next, nd);
1462 if (err)
1463 goto return_err;
1464 nd->inode = nd->path.dentry->d_inode;
1465 } else {
1466 path_to_nameidata(&next, nd);
1467 nd->inode = inode;
1468 }
1469 err = -ENOENT;
1470 if (!nd->inode)
1471 break;
1472 if (lookup_flags & LOOKUP_DIRECTORY) {
1473 err = -ENOTDIR;
1474 if (!nd->inode->i_op->lookup)
1475 break;
1476 }
1477 goto return_base;
1478lookup_parent:
1479 nd->last = this; 1479 nd->last = this;
1480 nd->last_type = LAST_NORM; 1480 nd->last_type = type;
1481 if (this.name[0] != '.')
1482 goto return_base;
1483 if (this.len == 1)
1484 nd->last_type = LAST_DOT;
1485 else if (this.len == 2 && this.name[1] == '.')
1486 nd->last_type = LAST_DOTDOT;
1487 else
1488 goto return_base;
1489return_reval:
1490 /*
1491 * We bypassed the ordinary revalidation routines.
1492 * We may need to check the cached dentry for staleness.
1493 */
1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1497 /* Note: we do not d_invalidate() */
1498 err = d_revalidate(nd->path.dentry, nd);
1499 if (!err)
1500 err = -ESTALE;
1501 if (err < 0)
1502 break;
1503 return 0;
1504 }
1505return_base:
1506 if (nameidata_drop_rcu_last_maybe(nd))
1507 return -ECHILD;
1508 return 0; 1481 return 0;
1509out_dput:
1510 if (!(nd->flags & LOOKUP_RCU))
1511 path_put_conditional(&next, nd);
1512 break;
1513 } 1482 }
1514 if (!(nd->flags & LOOKUP_RCU)) 1483 terminate_walk(nd);
1515 path_put(&nd->path);
1516return_err:
1517 return err; 1484 return err;
1518} 1485}
1519 1486
1520static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1487static int path_init(int dfd, const char *name, unsigned int flags,
1521{ 1488 struct nameidata *nd, struct file **fp)
1522 current->total_link_count = 0;
1523
1524 return link_path_walk(name, nd);
1525}
1526
1527static inline int path_walk_simple(const char *name, struct nameidata *nd)
1528{
1529 current->total_link_count = 0;
1530
1531 return link_path_walk(name, nd);
1532}
1533
1534static int path_walk(const char *name, struct nameidata *nd)
1535{
1536 struct path save = nd->path;
1537 int result;
1538
1539 current->total_link_count = 0;
1540
1541 /* make sure the stuff we saved doesn't go away */
1542 path_get(&save);
1543
1544 result = link_path_walk(name, nd);
1545 if (result == -ESTALE) {
1546 /* nd->path had been dropped */
1547 current->total_link_count = 0;
1548 nd->path = save;
1549 nd->inode = save.dentry->d_inode;
1550 path_get(&nd->path);
1551 nd->flags |= LOOKUP_REVAL;
1552 result = link_path_walk(name, nd);
1553 }
1554
1555 path_put(&save);
1556
1557 return result;
1558}
1559
1560static void path_finish_rcu(struct nameidata *nd)
1561{
1562 if (nd->flags & LOOKUP_RCU) {
1563 /* RCU dangling. Cancel it. */
1564 nd->flags &= ~LOOKUP_RCU;
1565 nd->root.mnt = NULL;
1566 rcu_read_unlock();
1567 br_read_unlock(vfsmount_lock);
1568 }
1569 if (nd->file)
1570 fput(nd->file);
1571}
1572
1573static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1574{ 1489{
1575 int retval = 0; 1490 int retval = 0;
1576 int fput_needed; 1491 int fput_needed;
1577 struct file *file; 1492 struct file *file;
1578 1493
1579 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1494 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1580 nd->flags = flags | LOOKUP_RCU; 1495 nd->flags = flags | LOOKUP_JUMPED;
1581 nd->depth = 0; 1496 nd->depth = 0;
1497 if (flags & LOOKUP_ROOT) {
1498 struct inode *inode = nd->root.dentry->d_inode;
1499 if (*name) {
1500 if (!inode->i_op->lookup)
1501 return -ENOTDIR;
1502 retval = inode_permission(inode, MAY_EXEC);
1503 if (retval)
1504 return retval;
1505 }
1506 nd->path = nd->root;
1507 nd->inode = inode;
1508 if (flags & LOOKUP_RCU) {
1509 br_read_lock(vfsmount_lock);
1510 rcu_read_lock();
1511 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1512 } else {
1513 path_get(&nd->path);
1514 }
1515 return 0;
1516 }
1517
1582 nd->root.mnt = NULL; 1518 nd->root.mnt = NULL;
1583 nd->file = NULL;
1584 1519
1585 if (*name=='/') { 1520 if (*name=='/') {
1586 struct fs_struct *fs = current->fs; 1521 if (flags & LOOKUP_RCU) {
1587 unsigned seq; 1522 br_read_lock(vfsmount_lock);
1588 1523 rcu_read_lock();
1589 br_read_lock(vfsmount_lock); 1524 set_root_rcu(nd);
1590 rcu_read_lock(); 1525 } else {
1591 1526 set_root(nd);
1592 do { 1527 path_get(&nd->root);
1593 seq = read_seqcount_begin(&fs->seq); 1528 }
1594 nd->root = fs->root; 1529 nd->path = nd->root;
1595 nd->path = nd->root;
1596 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1597 } while (read_seqcount_retry(&fs->seq, seq));
1598
1599 } else if (dfd == AT_FDCWD) { 1530 } else if (dfd == AT_FDCWD) {
1600 struct fs_struct *fs = current->fs; 1531 if (flags & LOOKUP_RCU) {
1601 unsigned seq; 1532 struct fs_struct *fs = current->fs;
1602 1533 unsigned seq;
1603 br_read_lock(vfsmount_lock);
1604 rcu_read_lock();
1605 1534
1606 do { 1535 br_read_lock(vfsmount_lock);
1607 seq = read_seqcount_begin(&fs->seq); 1536 rcu_read_lock();
1608 nd->path = fs->pwd;
1609 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1610 } while (read_seqcount_retry(&fs->seq, seq));
1611 1537
1538 do {
1539 seq = read_seqcount_begin(&fs->seq);
1540 nd->path = fs->pwd;
1541 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1542 } while (read_seqcount_retry(&fs->seq, seq));
1543 } else {
1544 get_fs_pwd(current->fs, &nd->path);
1545 }
1612 } else { 1546 } else {
1613 struct dentry *dentry; 1547 struct dentry *dentry;
1614 1548
1615 file = fget_light(dfd, &fput_needed); 1549 file = fget_raw_light(dfd, &fput_needed);
1616 retval = -EBADF; 1550 retval = -EBADF;
1617 if (!file) 1551 if (!file)
1618 goto out_fail; 1552 goto out_fail;
1619 1553
1620 dentry = file->f_path.dentry; 1554 dentry = file->f_path.dentry;
1621 1555
1622 retval = -ENOTDIR; 1556 if (*name) {
1623 if (!S_ISDIR(dentry->d_inode->i_mode)) 1557 retval = -ENOTDIR;
1624 goto fput_fail; 1558 if (!S_ISDIR(dentry->d_inode->i_mode))
1559 goto fput_fail;
1625 1560
1626 retval = file_permission(file, MAY_EXEC); 1561 retval = file_permission(file, MAY_EXEC);
1627 if (retval) 1562 if (retval)
1628 goto fput_fail; 1563 goto fput_fail;
1564 }
1629 1565
1630 nd->path = file->f_path; 1566 nd->path = file->f_path;
1631 if (fput_needed) 1567 if (flags & LOOKUP_RCU) {
1632 nd->file = file; 1568 if (fput_needed)
1633 1569 *fp = file;
1634 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1570 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1635 br_read_lock(vfsmount_lock); 1571 br_read_lock(vfsmount_lock);
1636 rcu_read_lock(); 1572 rcu_read_lock();
1573 } else {
1574 path_get(&file->f_path);
1575 fput_light(file, fput_needed);
1576 }
1637 } 1577 }
1578
1638 nd->inode = nd->path.dentry->d_inode; 1579 nd->inode = nd->path.dentry->d_inode;
1639 return 0; 1580 return 0;
1640 1581
@@ -1644,60 +1585,23 @@ out_fail:
1644 return retval; 1585 return retval;
1645} 1586}
1646 1587
1647static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1588static inline int lookup_last(struct nameidata *nd, struct path *path)
1648{ 1589{
1649 int retval = 0; 1590 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1650 int fput_needed; 1591 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1651 struct file *file;
1652
1653 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1654 nd->flags = flags;
1655 nd->depth = 0;
1656 nd->root.mnt = NULL;
1657
1658 if (*name=='/') {
1659 set_root(nd);
1660 nd->path = nd->root;
1661 path_get(&nd->root);
1662 } else if (dfd == AT_FDCWD) {
1663 get_fs_pwd(current->fs, &nd->path);
1664 } else {
1665 struct dentry *dentry;
1666
1667 file = fget_light(dfd, &fput_needed);
1668 retval = -EBADF;
1669 if (!file)
1670 goto out_fail;
1671
1672 dentry = file->f_path.dentry;
1673
1674 retval = -ENOTDIR;
1675 if (!S_ISDIR(dentry->d_inode->i_mode))
1676 goto fput_fail;
1677
1678 retval = file_permission(file, MAY_EXEC);
1679 if (retval)
1680 goto fput_fail;
1681 1592
1682 nd->path = file->f_path; 1593 nd->flags &= ~LOOKUP_PARENT;
1683 path_get(&file->f_path); 1594 return walk_component(nd, path, &nd->last, nd->last_type,
1684 1595 nd->flags & LOOKUP_FOLLOW);
1685 fput_light(file, fput_needed);
1686 }
1687 nd->inode = nd->path.dentry->d_inode;
1688 return 0;
1689
1690fput_fail:
1691 fput_light(file, fput_needed);
1692out_fail:
1693 return retval;
1694} 1596}
1695 1597
1696/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1598/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1697static int do_path_lookup(int dfd, const char *name, 1599static int path_lookupat(int dfd, const char *name,
1698 unsigned int flags, struct nameidata *nd) 1600 unsigned int flags, struct nameidata *nd)
1699{ 1601{
1700 int retval; 1602 struct file *base = NULL;
1603 struct path path;
1604 int err;
1701 1605
1702 /* 1606 /*
1703 * Path walking is largely split up into 2 different synchronisation 1607 * Path walking is largely split up into 2 different synchronisation
@@ -1713,44 +1617,75 @@ static int do_path_lookup(int dfd, const char *name,
1713 * be handled by restarting a traditional ref-walk (which will always 1617 * be handled by restarting a traditional ref-walk (which will always
1714 * be able to complete). 1618 * be able to complete).
1715 */ 1619 */
1716 retval = path_init_rcu(dfd, name, flags, nd); 1620 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1717 if (unlikely(retval)) 1621
1718 return retval; 1622 if (unlikely(err))
1719 retval = path_walk_rcu(name, nd); 1623 return err;
1720 path_finish_rcu(nd); 1624
1721 if (nd->root.mnt) { 1625 current->total_link_count = 0;
1722 path_put(&nd->root); 1626 err = link_path_walk(name, nd);
1723 nd->root.mnt = NULL; 1627
1628 if (!err && !(flags & LOOKUP_PARENT)) {
1629 err = lookup_last(nd, &path);
1630 while (err > 0) {
1631 void *cookie;
1632 struct path link = path;
1633 nd->flags |= LOOKUP_PARENT;
1634 err = follow_link(&link, nd, &cookie);
1635 if (!err)
1636 err = lookup_last(nd, &path);
1637 put_link(nd, &link, cookie);
1638 }
1724 } 1639 }
1725 1640
1726 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1641 if (nd->flags & LOOKUP_RCU) {
1727 /* slower, locked walk */ 1642 /* went all way through without dropping RCU */
1728 if (retval == -ESTALE) 1643 BUG_ON(err);
1729 flags |= LOOKUP_REVAL; 1644 if (nameidata_drop_rcu_last(nd))
1730 retval = path_init(dfd, name, flags, nd); 1645 err = -ECHILD;
1731 if (unlikely(retval)) 1646 }
1732 return retval; 1647
1733 retval = path_walk(name, nd); 1648 if (!err)
1734 if (nd->root.mnt) { 1649 err = handle_reval_path(nd);
1735 path_put(&nd->root); 1650
1736 nd->root.mnt = NULL; 1651 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1652 if (!nd->inode->i_op->lookup) {
1653 path_put(&nd->path);
1654 return -ENOTDIR;
1737 } 1655 }
1738 } 1656 }
1739 1657
1658 if (base)
1659 fput(base);
1660
1661 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1662 path_put(&nd->root);
1663 nd->root.mnt = NULL;
1664 }
1665 return err;
1666}
1667
1668static int do_path_lookup(int dfd, const char *name,
1669 unsigned int flags, struct nameidata *nd)
1670{
1671 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1672 if (unlikely(retval == -ECHILD))
1673 retval = path_lookupat(dfd, name, flags, nd);
1674 if (unlikely(retval == -ESTALE))
1675 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1676
1740 if (likely(!retval)) { 1677 if (likely(!retval)) {
1741 if (unlikely(!audit_dummy_context())) { 1678 if (unlikely(!audit_dummy_context())) {
1742 if (nd->path.dentry && nd->inode) 1679 if (nd->path.dentry && nd->inode)
1743 audit_inode(name, nd->path.dentry); 1680 audit_inode(name, nd->path.dentry);
1744 } 1681 }
1745 } 1682 }
1746
1747 return retval; 1683 return retval;
1748} 1684}
1749 1685
1750int path_lookup(const char *name, unsigned int flags, 1686int kern_path_parent(const char *name, struct nameidata *nd)
1751 struct nameidata *nd)
1752{ 1687{
1753 return do_path_lookup(AT_FDCWD, name, flags, nd); 1688 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1754} 1689}
1755 1690
1756int kern_path(const char *name, unsigned int flags, struct path *path) 1691int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1774,29 +1709,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1774 const char *name, unsigned int flags, 1709 const char *name, unsigned int flags,
1775 struct nameidata *nd) 1710 struct nameidata *nd)
1776{ 1711{
1777 int retval; 1712 nd->root.dentry = dentry;
1778 1713 nd->root.mnt = mnt;
1779 /* same as do_path_lookup */ 1714 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1780 nd->last_type = LAST_ROOT; 1715 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1781 nd->flags = flags;
1782 nd->depth = 0;
1783
1784 nd->path.dentry = dentry;
1785 nd->path.mnt = mnt;
1786 path_get(&nd->path);
1787 nd->root = nd->path;
1788 path_get(&nd->root);
1789 nd->inode = nd->path.dentry->d_inode;
1790
1791 retval = path_walk(name, nd);
1792 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1793 nd->inode))
1794 audit_inode(name, nd->path.dentry);
1795
1796 path_put(&nd->root);
1797 nd->root.mnt = NULL;
1798
1799 return retval;
1800} 1716}
1801 1717
1802static struct dentry *__lookup_hash(struct qstr *name, 1718static struct dentry *__lookup_hash(struct qstr *name,
@@ -1811,17 +1727,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1811 return ERR_PTR(err); 1727 return ERR_PTR(err);
1812 1728
1813 /* 1729 /*
1814 * See if the low-level filesystem might want
1815 * to use its own hash..
1816 */
1817 if (base->d_flags & DCACHE_OP_HASH) {
1818 err = base->d_op->d_hash(base, inode, name);
1819 dentry = ERR_PTR(err);
1820 if (err < 0)
1821 goto out;
1822 }
1823
1824 /*
1825 * Don't bother with __d_lookup: callers are for creat as 1730 * Don't bother with __d_lookup: callers are for creat as
1826 * well as unlink, so a lot of the time it would cost 1731 * well as unlink, so a lot of the time it would cost
1827 * a double lookup. 1732 * a double lookup.
@@ -1833,7 +1738,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1833 1738
1834 if (!dentry) 1739 if (!dentry)
1835 dentry = d_alloc_and_lookup(base, name, nd); 1740 dentry = d_alloc_and_lookup(base, name, nd);
1836out: 1741
1837 return dentry; 1742 return dentry;
1838} 1743}
1839 1744
@@ -1847,28 +1752,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1847 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1752 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1848} 1753}
1849 1754
1850static int __lookup_one_len(const char *name, struct qstr *this,
1851 struct dentry *base, int len)
1852{
1853 unsigned long hash;
1854 unsigned int c;
1855
1856 this->name = name;
1857 this->len = len;
1858 if (!len)
1859 return -EACCES;
1860
1861 hash = init_name_hash();
1862 while (len--) {
1863 c = *(const unsigned char *)name++;
1864 if (c == '/' || c == '\0')
1865 return -EACCES;
1866 hash = partial_name_hash(c, hash);
1867 }
1868 this->hash = end_name_hash(hash);
1869 return 0;
1870}
1871
1872/** 1755/**
1873 * lookup_one_len - filesystem helper to lookup single pathname component 1756 * lookup_one_len - filesystem helper to lookup single pathname component
1874 * @name: pathname component to lookup 1757 * @name: pathname component to lookup
@@ -1882,14 +1765,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1882 */ 1765 */
1883struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1766struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1884{ 1767{
1885 int err;
1886 struct qstr this; 1768 struct qstr this;
1769 unsigned long hash;
1770 unsigned int c;
1887 1771
1888 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1772 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1889 1773
1890 err = __lookup_one_len(name, &this, base, len); 1774 this.name = name;
1891 if (err) 1775 this.len = len;
1892 return ERR_PTR(err); 1776 if (!len)
1777 return ERR_PTR(-EACCES);
1778
1779 hash = init_name_hash();
1780 while (len--) {
1781 c = *(const unsigned char *)name++;
1782 if (c == '/' || c == '\0')
1783 return ERR_PTR(-EACCES);
1784 hash = partial_name_hash(c, hash);
1785 }
1786 this.hash = end_name_hash(hash);
1787 /*
1788 * See if the low-level filesystem might want
1789 * to use its own hash..
1790 */
1791 if (base->d_flags & DCACHE_OP_HASH) {
1792 int err = base->d_op->d_hash(base, base->d_inode, &this);
1793 if (err < 0)
1794 return ERR_PTR(err);
1795 }
1893 1796
1894 return __lookup_hash(&this, base, NULL); 1797 return __lookup_hash(&this, base, NULL);
1895} 1798}
@@ -1898,7 +1801,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1898 struct path *path) 1801 struct path *path)
1899{ 1802{
1900 struct nameidata nd; 1803 struct nameidata nd;
1901 char *tmp = getname(name); 1804 char *tmp = getname_flags(name, flags);
1902 int err = PTR_ERR(tmp); 1805 int err = PTR_ERR(tmp);
1903 if (!IS_ERR(tmp)) { 1806 if (!IS_ERR(tmp)) {
1904 1807
@@ -2078,12 +1981,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2078 return error; 1981 return error;
2079} 1982}
2080 1983
2081int may_open(struct path *path, int acc_mode, int flag) 1984static int may_open(struct path *path, int acc_mode, int flag)
2082{ 1985{
2083 struct dentry *dentry = path->dentry; 1986 struct dentry *dentry = path->dentry;
2084 struct inode *inode = dentry->d_inode; 1987 struct inode *inode = dentry->d_inode;
2085 int error; 1988 int error;
2086 1989
1990 /* O_PATH? */
1991 if (!acc_mode)
1992 return 0;
1993
2087 if (!inode) 1994 if (!inode)
2088 return -ENOENT; 1995 return -ENOENT;
2089 1996
@@ -2152,34 +2059,6 @@ static int handle_truncate(struct file *filp)
2152} 2059}
2153 2060
2154/* 2061/*
2155 * Be careful about ever adding any more callers of this
2156 * function. Its flags must be in the namei format, not
2157 * what get passed to sys_open().
2158 */
2159static int __open_namei_create(struct nameidata *nd, struct path *path,
2160 int open_flag, int mode)
2161{
2162 int error;
2163 struct dentry *dir = nd->path.dentry;
2164
2165 if (!IS_POSIXACL(dir->d_inode))
2166 mode &= ~current_umask();
2167 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2168 if (error)
2169 goto out_unlock;
2170 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2171out_unlock:
2172 mutex_unlock(&dir->d_inode->i_mutex);
2173 dput(nd->path.dentry);
2174 nd->path.dentry = path->dentry;
2175
2176 if (error)
2177 return error;
2178 /* Don't check for write permission, don't truncate */
2179 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2180}
2181
2182/*
2183 * Note that while the flag value (low two bits) for sys_open means: 2062 * Note that while the flag value (low two bits) for sys_open means:
2184 * 00 - read-only 2063 * 00 - read-only
2185 * 01 - write-only 2064 * 01 - write-only
@@ -2203,126 +2082,115 @@ static inline int open_to_namei_flags(int flag)
2203 return flag; 2082 return flag;
2204} 2083}
2205 2084
2206static int open_will_truncate(int flag, struct inode *inode)
2207{
2208 /*
2209 * We'll never write to the fs underlying
2210 * a device file.
2211 */
2212 if (special_file(inode->i_mode))
2213 return 0;
2214 return (flag & O_TRUNC);
2215}
2216
2217static struct file *finish_open(struct nameidata *nd,
2218 int open_flag, int acc_mode)
2219{
2220 struct file *filp;
2221 int will_truncate;
2222 int error;
2223
2224 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2225 if (will_truncate) {
2226 error = mnt_want_write(nd->path.mnt);
2227 if (error)
2228 goto exit;
2229 }
2230 error = may_open(&nd->path, acc_mode, open_flag);
2231 if (error) {
2232 if (will_truncate)
2233 mnt_drop_write(nd->path.mnt);
2234 goto exit;
2235 }
2236 filp = nameidata_to_filp(nd);
2237 if (!IS_ERR(filp)) {
2238 error = ima_file_check(filp, acc_mode);
2239 if (error) {
2240 fput(filp);
2241 filp = ERR_PTR(error);
2242 }
2243 }
2244 if (!IS_ERR(filp)) {
2245 if (will_truncate) {
2246 error = handle_truncate(filp);
2247 if (error) {
2248 fput(filp);
2249 filp = ERR_PTR(error);
2250 }
2251 }
2252 }
2253 /*
2254 * It is now safe to drop the mnt write
2255 * because the filp has had a write taken
2256 * on its behalf.
2257 */
2258 if (will_truncate)
2259 mnt_drop_write(nd->path.mnt);
2260 path_put(&nd->path);
2261 return filp;
2262
2263exit:
2264 path_put(&nd->path);
2265 return ERR_PTR(error);
2266}
2267
2268/* 2085/*
2269 * Handle O_CREAT case for do_filp_open 2086 * Handle the last step of open()
2270 */ 2087 */
2271static struct file *do_last(struct nameidata *nd, struct path *path, 2088static struct file *do_last(struct nameidata *nd, struct path *path,
2272 int open_flag, int acc_mode, 2089 const struct open_flags *op, const char *pathname)
2273 int mode, const char *pathname)
2274{ 2090{
2275 struct dentry *dir = nd->path.dentry; 2091 struct dentry *dir = nd->path.dentry;
2092 struct dentry *dentry;
2093 int open_flag = op->open_flag;
2094 int will_truncate = open_flag & O_TRUNC;
2095 int want_write = 0;
2096 int acc_mode = op->acc_mode;
2276 struct file *filp; 2097 struct file *filp;
2277 int error = -EISDIR; 2098 int error;
2099
2100 nd->flags &= ~LOOKUP_PARENT;
2101 nd->flags |= op->intent;
2278 2102
2279 switch (nd->last_type) { 2103 switch (nd->last_type) {
2280 case LAST_DOTDOT: 2104 case LAST_DOTDOT:
2281 follow_dotdot(nd);
2282 dir = nd->path.dentry;
2283 case LAST_DOT: 2105 case LAST_DOT:
2284 if (need_reval_dot(dir)) { 2106 error = handle_dots(nd, nd->last_type);
2285 int status = d_revalidate(nd->path.dentry, nd); 2107 if (error)
2286 if (!status) 2108 return ERR_PTR(error);
2287 status = -ESTALE;
2288 if (status < 0) {
2289 error = status;
2290 goto exit;
2291 }
2292 }
2293 /* fallthrough */ 2109 /* fallthrough */
2294 case LAST_ROOT: 2110 case LAST_ROOT:
2295 goto exit; 2111 if (nd->flags & LOOKUP_RCU) {
2112 if (nameidata_drop_rcu_last(nd))
2113 return ERR_PTR(-ECHILD);
2114 }
2115 error = handle_reval_path(nd);
2116 if (error)
2117 goto exit;
2118 audit_inode(pathname, nd->path.dentry);
2119 if (open_flag & O_CREAT) {
2120 error = -EISDIR;
2121 goto exit;
2122 }
2123 goto ok;
2296 case LAST_BIND: 2124 case LAST_BIND:
2125 /* can't be RCU mode here */
2126 error = handle_reval_path(nd);
2127 if (error)
2128 goto exit;
2297 audit_inode(pathname, dir); 2129 audit_inode(pathname, dir);
2298 goto ok; 2130 goto ok;
2299 } 2131 }
2300 2132
2133 if (!(open_flag & O_CREAT)) {
2134 int symlink_ok = 0;
2135 if (nd->last.name[nd->last.len])
2136 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2137 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2138 symlink_ok = 1;
2139 /* we _can_ be in RCU mode here */
2140 error = walk_component(nd, path, &nd->last, LAST_NORM,
2141 !symlink_ok);
2142 if (error < 0)
2143 return ERR_PTR(error);
2144 if (error) /* symlink */
2145 return NULL;
2146 /* sayonara */
2147 if (nd->flags & LOOKUP_RCU) {
2148 if (nameidata_drop_rcu_last(nd))
2149 return ERR_PTR(-ECHILD);
2150 }
2151
2152 error = -ENOTDIR;
2153 if (nd->flags & LOOKUP_DIRECTORY) {
2154 if (!nd->inode->i_op->lookup)
2155 goto exit;
2156 }
2157 audit_inode(pathname, nd->path.dentry);
2158 goto ok;
2159 }
2160
2161 /* create side of things */
2162
2163 if (nd->flags & LOOKUP_RCU) {
2164 if (nameidata_drop_rcu_last(nd))
2165 return ERR_PTR(-ECHILD);
2166 }
2167
2168 audit_inode(pathname, dir);
2169 error = -EISDIR;
2301 /* trailing slashes? */ 2170 /* trailing slashes? */
2302 if (nd->last.name[nd->last.len]) 2171 if (nd->last.name[nd->last.len])
2303 goto exit; 2172 goto exit;
2304 2173
2305 mutex_lock(&dir->d_inode->i_mutex); 2174 mutex_lock(&dir->d_inode->i_mutex);
2306 2175
2307 path->dentry = lookup_hash(nd); 2176 dentry = lookup_hash(nd);
2308 path->mnt = nd->path.mnt; 2177 error = PTR_ERR(dentry);
2309 2178 if (IS_ERR(dentry)) {
2310 error = PTR_ERR(path->dentry);
2311 if (IS_ERR(path->dentry)) {
2312 mutex_unlock(&dir->d_inode->i_mutex); 2179 mutex_unlock(&dir->d_inode->i_mutex);
2313 goto exit; 2180 goto exit;
2314 } 2181 }
2315 2182
2316 if (IS_ERR(nd->intent.open.file)) { 2183 path->dentry = dentry;
2317 error = PTR_ERR(nd->intent.open.file); 2184 path->mnt = nd->path.mnt;
2318 goto exit_mutex_unlock;
2319 }
2320 2185
2321 /* Negative dentry, just create the file */ 2186 /* Negative dentry, just create the file */
2322 if (!path->dentry->d_inode) { 2187 if (!dentry->d_inode) {
2188 int mode = op->mode;
2189 if (!IS_POSIXACL(dir->d_inode))
2190 mode &= ~current_umask();
2323 /* 2191 /*
2324 * This write is needed to ensure that a 2192 * This write is needed to ensure that a
2325 * ro->rw transition does not occur between 2193 * rw->ro transition does not occur between
2326 * the time when the file is created and when 2194 * the time when the file is created and when
2327 * a permanent write count is taken through 2195 * a permanent write count is taken through
2328 * the 'struct file' in nameidata_to_filp(). 2196 * the 'struct file' in nameidata_to_filp().
@@ -2330,22 +2198,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2330 error = mnt_want_write(nd->path.mnt); 2198 error = mnt_want_write(nd->path.mnt);
2331 if (error) 2199 if (error)
2332 goto exit_mutex_unlock; 2200 goto exit_mutex_unlock;
2333 error = __open_namei_create(nd, path, open_flag, mode); 2201 want_write = 1;
2334 if (error) { 2202 /* Don't check for write permission, don't truncate */
2335 mnt_drop_write(nd->path.mnt); 2203 open_flag &= ~O_TRUNC;
2336 goto exit; 2204 will_truncate = 0;
2337 } 2205 acc_mode = MAY_OPEN;
2338 filp = nameidata_to_filp(nd); 2206 error = security_path_mknod(&nd->path, dentry, mode, 0);
2339 mnt_drop_write(nd->path.mnt); 2207 if (error)
2340 path_put(&nd->path); 2208 goto exit_mutex_unlock;
2341 if (!IS_ERR(filp)) { 2209 error = vfs_create(dir->d_inode, dentry, mode, nd);
2342 error = ima_file_check(filp, acc_mode); 2210 if (error)
2343 if (error) { 2211 goto exit_mutex_unlock;
2344 fput(filp); 2212 mutex_unlock(&dir->d_inode->i_mutex);
2345 filp = ERR_PTR(error); 2213 dput(nd->path.dentry);
2346 } 2214 nd->path.dentry = dentry;
2347 } 2215 goto common;
2348 return filp;
2349 } 2216 }
2350 2217
2351 /* 2218 /*
@@ -2375,7 +2242,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2375 if (S_ISDIR(nd->inode->i_mode)) 2242 if (S_ISDIR(nd->inode->i_mode))
2376 goto exit; 2243 goto exit;
2377ok: 2244ok:
2378 filp = finish_open(nd, open_flag, acc_mode); 2245 if (!S_ISREG(nd->inode->i_mode))
2246 will_truncate = 0;
2247
2248 if (will_truncate) {
2249 error = mnt_want_write(nd->path.mnt);
2250 if (error)
2251 goto exit;
2252 want_write = 1;
2253 }
2254common:
2255 error = may_open(&nd->path, acc_mode, open_flag);
2256 if (error)
2257 goto exit;
2258 filp = nameidata_to_filp(nd);
2259 if (!IS_ERR(filp)) {
2260 error = ima_file_check(filp, op->acc_mode);
2261 if (error) {
2262 fput(filp);
2263 filp = ERR_PTR(error);
2264 }
2265 }
2266 if (!IS_ERR(filp)) {
2267 if (will_truncate) {
2268 error = handle_truncate(filp);
2269 if (error) {
2270 fput(filp);
2271 filp = ERR_PTR(error);
2272 }
2273 }
2274 }
2275out:
2276 if (want_write)
2277 mnt_drop_write(nd->path.mnt);
2278 path_put(&nd->path);
2379 return filp; 2279 return filp;
2380 2280
2381exit_mutex_unlock: 2281exit_mutex_unlock:
@@ -2383,204 +2283,103 @@ exit_mutex_unlock:
2383exit_dput: 2283exit_dput:
2384 path_put_conditional(path, nd); 2284 path_put_conditional(path, nd);
2385exit: 2285exit:
2386 path_put(&nd->path); 2286 filp = ERR_PTR(error);
2387 return ERR_PTR(error); 2287 goto out;
2388} 2288}
2389 2289
2390/* 2290static struct file *path_openat(int dfd, const char *pathname,
2391 * Note that the low bits of the passed in "open_flag" 2291 struct nameidata *nd, const struct open_flags *op, int flags)
2392 * are not the same as in the local variable "flag". See
2393 * open_to_namei_flags() for more details.
2394 */
2395struct file *do_filp_open(int dfd, const char *pathname,
2396 int open_flag, int mode, int acc_mode)
2397{ 2292{
2293 struct file *base = NULL;
2398 struct file *filp; 2294 struct file *filp;
2399 struct nameidata nd;
2400 int error;
2401 struct path path; 2295 struct path path;
2402 int count = 0; 2296 int error;
2403 int flag = open_to_namei_flags(open_flag);
2404 int flags;
2405
2406 if (!(open_flag & O_CREAT))
2407 mode = 0;
2408
2409 /* Must never be set by userspace */
2410 open_flag &= ~FMODE_NONOTIFY;
2411
2412 /*
2413 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2414 * check for O_DSYNC if the need any syncing at all we enforce it's
2415 * always set instead of having to deal with possibly weird behaviour
2416 * for malicious applications setting only __O_SYNC.
2417 */
2418 if (open_flag & __O_SYNC)
2419 open_flag |= O_DSYNC;
2420
2421 if (!acc_mode)
2422 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2423
2424 /* O_TRUNC implies we need access checks for write permissions */
2425 if (open_flag & O_TRUNC)
2426 acc_mode |= MAY_WRITE;
2427
2428 /* Allow the LSM permission hook to distinguish append
2429 access from general write access. */
2430 if (open_flag & O_APPEND)
2431 acc_mode |= MAY_APPEND;
2432
2433 flags = LOOKUP_OPEN;
2434 if (open_flag & O_CREAT) {
2435 flags |= LOOKUP_CREATE;
2436 if (open_flag & O_EXCL)
2437 flags |= LOOKUP_EXCL;
2438 }
2439 if (open_flag & O_DIRECTORY)
2440 flags |= LOOKUP_DIRECTORY;
2441 if (!(open_flag & O_NOFOLLOW))
2442 flags |= LOOKUP_FOLLOW;
2443 2297
2444 filp = get_empty_filp(); 2298 filp = get_empty_filp();
2445 if (!filp) 2299 if (!filp)
2446 return ERR_PTR(-ENFILE); 2300 return ERR_PTR(-ENFILE);
2447 2301
2448 filp->f_flags = open_flag; 2302 filp->f_flags = op->open_flag;
2449 nd.intent.open.file = filp; 2303 nd->intent.open.file = filp;
2450 nd.intent.open.flags = flag; 2304 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2451 nd.intent.open.create_mode = mode; 2305 nd->intent.open.create_mode = op->mode;
2452
2453 if (open_flag & O_CREAT)
2454 goto creat;
2455 2306
2456 /* !O_CREAT, simple open */ 2307 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2457 error = do_path_lookup(dfd, pathname, flags, &nd);
2458 if (unlikely(error)) 2308 if (unlikely(error))
2459 goto out_filp2;
2460 error = -ELOOP;
2461 if (!(nd.flags & LOOKUP_FOLLOW)) {
2462 if (nd.inode->i_op->follow_link)
2463 goto out_path2;
2464 }
2465 error = -ENOTDIR;
2466 if (nd.flags & LOOKUP_DIRECTORY) {
2467 if (!nd.inode->i_op->lookup)
2468 goto out_path2;
2469 }
2470 audit_inode(pathname, nd.path.dentry);
2471 filp = finish_open(&nd, open_flag, acc_mode);
2472out2:
2473 release_open_intent(&nd);
2474 return filp;
2475
2476out_path2:
2477 path_put(&nd.path);
2478out_filp2:
2479 filp = ERR_PTR(error);
2480 goto out2;
2481
2482creat:
2483 /* OK, have to create the file. Find the parent. */
2484 error = path_init_rcu(dfd, pathname,
2485 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2486 if (error)
2487 goto out_filp; 2309 goto out_filp;
2488 error = path_walk_rcu(pathname, &nd);
2489 path_finish_rcu(&nd);
2490 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2491 /* slower, locked walk */
2492 if (error == -ESTALE) {
2493reval:
2494 flags |= LOOKUP_REVAL;
2495 }
2496 error = path_init(dfd, pathname,
2497 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2498 if (error)
2499 goto out_filp;
2500 2310
2501 error = path_walk_simple(pathname, &nd); 2311 current->total_link_count = 0;
2502 } 2312 error = link_path_walk(pathname, nd);
2503 if (unlikely(error)) 2313 if (unlikely(error))
2504 goto out_filp; 2314 goto out_filp;
2505 if (unlikely(!audit_dummy_context()))
2506 audit_inode(pathname, nd.path.dentry);
2507 2315
2508 /* 2316 filp = do_last(nd, &path, op, pathname);
2509 * We have the parent and last component.
2510 */
2511 nd.flags = flags;
2512 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2513 while (unlikely(!filp)) { /* trailing symlink */ 2317 while (unlikely(!filp)) { /* trailing symlink */
2514 struct path link = path; 2318 struct path link = path;
2515 struct inode *linki = link.dentry->d_inode;
2516 void *cookie; 2319 void *cookie;
2517 error = -ELOOP; 2320 if (!(nd->flags & LOOKUP_FOLLOW)) {
2518 if (!(nd.flags & LOOKUP_FOLLOW)) 2321 path_put_conditional(&path, nd);
2519 goto exit_dput; 2322 path_put(&nd->path);
2520 if (count++ == 32) 2323 filp = ERR_PTR(-ELOOP);
2521 goto exit_dput; 2324 break;
2522 /*
2523 * This is subtle. Instead of calling do_follow_link() we do
2524 * the thing by hands. The reason is that this way we have zero
2525 * link_count and path_walk() (called from ->follow_link)
2526 * honoring LOOKUP_PARENT. After that we have the parent and
2527 * last component, i.e. we are in the same situation as after
2528 * the first path_walk(). Well, almost - if the last component
2529 * is normal we get its copy stored in nd->last.name and we will
2530 * have to putname() it when we are done. Procfs-like symlinks
2531 * just set LAST_BIND.
2532 */
2533 nd.flags |= LOOKUP_PARENT;
2534 error = security_inode_follow_link(link.dentry, &nd);
2535 if (error)
2536 goto exit_dput;
2537 error = __do_follow_link(&link, &nd, &cookie);
2538 if (unlikely(error)) {
2539 if (!IS_ERR(cookie) && linki->i_op->put_link)
2540 linki->i_op->put_link(link.dentry, &nd, cookie);
2541 /* nd.path had been dropped */
2542 nd.path = link;
2543 goto out_path;
2544 } 2325 }
2545 nd.flags &= ~LOOKUP_PARENT; 2326 nd->flags |= LOOKUP_PARENT;
2546 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2327 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2547 if (linki->i_op->put_link) 2328 error = follow_link(&link, nd, &cookie);
2548 linki->i_op->put_link(link.dentry, &nd, cookie); 2329 if (unlikely(error))
2549 path_put(&link); 2330 filp = ERR_PTR(error);
2331 else
2332 filp = do_last(nd, &path, op, pathname);
2333 put_link(nd, &link, cookie);
2550 } 2334 }
2551out: 2335out:
2552 if (nd.root.mnt) 2336 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2553 path_put(&nd.root); 2337 path_put(&nd->root);
2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2338 if (base)
2555 goto reval; 2339 fput(base);
2556 release_open_intent(&nd); 2340 release_open_intent(nd);
2557 return filp; 2341 return filp;
2558 2342
2559exit_dput:
2560 path_put_conditional(&path, &nd);
2561out_path:
2562 path_put(&nd.path);
2563out_filp: 2343out_filp:
2564 filp = ERR_PTR(error); 2344 filp = ERR_PTR(error);
2565 goto out; 2345 goto out;
2566} 2346}
2567 2347
2568/** 2348struct file *do_filp_open(int dfd, const char *pathname,
2569 * filp_open - open file and return file pointer 2349 const struct open_flags *op, int flags)
2570 *
2571 * @filename: path to open
2572 * @flags: open flags as per the open(2) second argument
2573 * @mode: mode for the new file if O_CREAT is set, else ignored
2574 *
2575 * This is the helper to open a file from kernelspace if you really
2576 * have to. But in generally you should not do this, so please move
2577 * along, nothing to see here..
2578 */
2579struct file *filp_open(const char *filename, int flags, int mode)
2580{ 2350{
2581 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2351 struct nameidata nd;
2352 struct file *filp;
2353
2354 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2355 if (unlikely(filp == ERR_PTR(-ECHILD)))
2356 filp = path_openat(dfd, pathname, &nd, op, flags);
2357 if (unlikely(filp == ERR_PTR(-ESTALE)))
2358 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2359 return filp;
2360}
2361
2362struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2363 const char *name, const struct open_flags *op, int flags)
2364{
2365 struct nameidata nd;
2366 struct file *file;
2367
2368 nd.root.mnt = mnt;
2369 nd.root.dentry = dentry;
2370
2371 flags |= LOOKUP_ROOT;
2372
2373 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2374 return ERR_PTR(-ELOOP);
2375
2376 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2377 if (unlikely(file == ERR_PTR(-ECHILD)))
2378 file = path_openat(-1, name, &nd, op, flags);
2379 if (unlikely(file == ERR_PTR(-ESTALE)))
2380 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2381 return file;
2582} 2382}
2583EXPORT_SYMBOL(filp_open);
2584 2383
2585/** 2384/**
2586 * lookup_create - lookup a dentry, creating it if it doesn't exist 2385 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3119,7 +2918,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3119 return error; 2918 return error;
3120 2919
3121 mutex_lock(&inode->i_mutex); 2920 mutex_lock(&inode->i_mutex);
3122 error = dir->i_op->link(old_dentry, dir, new_dentry); 2921 /* Make sure we don't allow creating hardlink to an unlinked file */
2922 if (inode->i_nlink == 0)
2923 error = -ENOENT;
2924 else
2925 error = dir->i_op->link(old_dentry, dir, new_dentry);
3123 mutex_unlock(&inode->i_mutex); 2926 mutex_unlock(&inode->i_mutex);
3124 if (!error) 2927 if (!error)
3125 fsnotify_link(dir, inode, new_dentry); 2928 fsnotify_link(dir, inode, new_dentry);
@@ -3141,15 +2944,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3141 struct dentry *new_dentry; 2944 struct dentry *new_dentry;
3142 struct nameidata nd; 2945 struct nameidata nd;
3143 struct path old_path; 2946 struct path old_path;
2947 int how = 0;
3144 int error; 2948 int error;
3145 char *to; 2949 char *to;
3146 2950
3147 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2951 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3148 return -EINVAL; 2952 return -EINVAL;
2953 /*
2954 * To use null names we require CAP_DAC_READ_SEARCH
2955 * This ensures that not everyone will be able to create
2956 * handlink using the passed filedescriptor.
2957 */
2958 if (flags & AT_EMPTY_PATH) {
2959 if (!capable(CAP_DAC_READ_SEARCH))
2960 return -ENOENT;
2961 how = LOOKUP_EMPTY;
2962 }
2963
2964 if (flags & AT_SYMLINK_FOLLOW)
2965 how |= LOOKUP_FOLLOW;
3149 2966
3150 error = user_path_at(olddfd, oldname, 2967 error = user_path_at(olddfd, oldname, how, &old_path);
3151 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3152 &old_path);
3153 if (error) 2968 if (error)
3154 return error; 2969 return error;
3155 2970
@@ -3586,7 +3401,7 @@ EXPORT_SYMBOL(page_readlink);
3586EXPORT_SYMBOL(__page_symlink); 3401EXPORT_SYMBOL(__page_symlink);
3587EXPORT_SYMBOL(page_symlink); 3402EXPORT_SYMBOL(page_symlink);
3588EXPORT_SYMBOL(page_symlink_inode_operations); 3403EXPORT_SYMBOL(page_symlink_inode_operations);
3589EXPORT_SYMBOL(path_lookup); 3404EXPORT_SYMBOL(kern_path_parent);
3590EXPORT_SYMBOL(kern_path); 3405EXPORT_SYMBOL(kern_path);
3591EXPORT_SYMBOL(vfs_path_lookup); 3406EXPORT_SYMBOL(vfs_path_lookup);
3592EXPORT_SYMBOL(inode_permission); 3407EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index a66feed7311..e96e03782de 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
1002 .show = show_vfsmnt 1002 .show = show_vfsmnt
1003}; 1003};
1004 1004
1005static int uuid_is_nil(u8 *uuid)
1006{
1007 int i;
1008 u8 *cp = (u8 *)uuid;
1009
1010 for (i = 0; i < 16; i++) {
1011 if (*cp++)
1012 return 0;
1013 }
1014 return 1;
1015}
1016
1005static int show_mountinfo(struct seq_file *m, void *v) 1017static int show_mountinfo(struct seq_file *m, void *v)
1006{ 1018{
1007 struct proc_mounts *p = m->private; 1019 struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
1040 if (IS_MNT_UNBINDABLE(mnt)) 1052 if (IS_MNT_UNBINDABLE(mnt))
1041 seq_puts(m, " unbindable"); 1053 seq_puts(m, " unbindable");
1042 1054
1055 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1056 /* print the uuid */
1057 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1058
1043 /* Filesystem specific data */ 1059 /* Filesystem specific data */
1044 seq_puts(m, " - "); 1060 seq_puts(m, " - ");
1045 show_type(m, sb); 1061 show_type(m, sb);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2f8e61816d7..01768e5e2c9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1518,7 +1518,7 @@ static int nfsiod_start(void)
1518{ 1518{
1519 struct workqueue_struct *wq; 1519 struct workqueue_struct *wq;
1520 dprintk("RPC: creating workqueue nfsiod\n"); 1520 dprintk("RPC: creating workqueue nfsiod\n");
1521 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); 1521 wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
1522 if (wq == NULL) 1522 if (wq == NULL)
1523 return -ENOMEM; 1523 return -ENOMEM;
1524 nfsiod_workqueue = wq; 1524 nfsiod_workqueue = wq;
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242dd..124e8fcb0dd 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
22 22
23static struct file *do_open(char *name, int flags) 23static struct file *do_open(char *name, int flags)
24{ 24{
25 struct nameidata nd;
26 struct vfsmount *mnt; 25 struct vfsmount *mnt;
27 int error; 26 struct file *file;
28 27
29 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); 28 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
30 if (IS_ERR(mnt)) 29 if (IS_ERR(mnt))
31 return (struct file *)mnt; 30 return (struct file *)mnt;
32 31
33 error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); 32 file = file_open_root(mnt->mnt_root, mnt, name, flags);
34 mntput(mnt); /* drop do_kern_mount reference */
35 if (error)
36 return ERR_PTR(error);
37
38 if (flags == O_RDWR)
39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
40 else
41 error = may_open(&nd.path, MAY_WRITE, flags);
42 33
43 if (!error) 34 mntput(mnt); /* drop do_kern_mount reference */
44 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 35 return file;
45 current_cred());
46
47 path_put(&nd.path);
48 return ERR_PTR(error);
49} 36}
50 37
51static struct { 38static struct {
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4f..254652a9b54 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
197 dentry->d_name.len, dentry->d_name.name, 197 dentry->d_name.len, dentry->d_name.name,
198 fh, len, connectable); 198 fh, len, connectable);
199 199
200 if (len < 3 || (connectable && len < 6)) { 200 if (connectable && (len < 6)) {
201 mlog(ML_ERROR, "fh buffer is too small for encoding\n"); 201 *max_len = 6;
202 type = 255;
203 goto bail;
204 } else if (len < 3) {
205 *max_len = 3;
202 type = 255; 206 type = 255;
203 goto bail; 207 goto bail;
204 } 208 }
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 196fcb52d95..d5ab56cbe5c 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -114,7 +114,4 @@ int ocfs2_local_write_dquot(struct dquot *dquot);
114extern const struct dquot_operations ocfs2_quota_operations; 114extern const struct dquot_operations ocfs2_quota_operations;
115extern struct quota_format_type ocfs2_quota_format; 115extern struct quota_format_type ocfs2_quota_format;
116 116
117int ocfs2_quota_setup(void);
118void ocfs2_quota_shutdown(void);
119
120#endif /* _OCFS2_QUOTA_H */ 117#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4607923eb24..a73f6416648 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -63,8 +63,6 @@
63 * write to gf 63 * write to gf
64 */ 64 */
65 65
66static struct workqueue_struct *ocfs2_quota_wq = NULL;
67
68static void qsync_work_fn(struct work_struct *work); 66static void qsync_work_fn(struct work_struct *work);
69 67
70static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) 68static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
@@ -400,8 +398,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
400 OCFS2_QBLK_RESERVED_SPACE; 398 OCFS2_QBLK_RESERVED_SPACE;
401 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 399 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
402 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 400 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
403 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 401 schedule_delayed_work(&oinfo->dqi_sync_work,
404 msecs_to_jiffies(oinfo->dqi_syncms)); 402 msecs_to_jiffies(oinfo->dqi_syncms));
405 403
406out_err: 404out_err:
407 mlog_exit(status); 405 mlog_exit(status);
@@ -635,8 +633,8 @@ static void qsync_work_fn(struct work_struct *work)
635 struct super_block *sb = oinfo->dqi_gqinode->i_sb; 633 struct super_block *sb = oinfo->dqi_gqinode->i_sb;
636 634
637 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 635 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
638 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 636 schedule_delayed_work(&oinfo->dqi_sync_work,
639 msecs_to_jiffies(oinfo->dqi_syncms)); 637 msecs_to_jiffies(oinfo->dqi_syncms));
640} 638}
641 639
642/* 640/*
@@ -923,20 +921,3 @@ const struct dquot_operations ocfs2_quota_operations = {
923 .alloc_dquot = ocfs2_alloc_dquot, 921 .alloc_dquot = ocfs2_alloc_dquot,
924 .destroy_dquot = ocfs2_destroy_dquot, 922 .destroy_dquot = ocfs2_destroy_dquot,
925}; 923};
926
927int ocfs2_quota_setup(void)
928{
929 ocfs2_quota_wq = create_workqueue("o2quot");
930 if (!ocfs2_quota_wq)
931 return -ENOMEM;
932 return 0;
933}
934
935void ocfs2_quota_shutdown(void)
936{
937 if (ocfs2_quota_wq) {
938 flush_workqueue(ocfs2_quota_wq);
939 destroy_workqueue(ocfs2_quota_wq);
940 ocfs2_quota_wq = NULL;
941 }
942}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 35798b88042..c384d634872 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4380,7 +4380,7 @@ static int ocfs2_user_path_parent(const char __user *path,
4380 if (IS_ERR(s)) 4380 if (IS_ERR(s))
4381 return PTR_ERR(s); 4381 return PTR_ERR(s);
4382 4382
4383 error = path_lookup(s, LOOKUP_PARENT, nd); 4383 error = kern_path_parent(s, nd);
4384 if (error) 4384 if (error)
4385 putname(s); 4385 putname(s);
4386 else 4386 else
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 36c423fb063..236ed1bdca2 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1657,16 +1657,11 @@ static int __init ocfs2_init(void)
1657 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 1657 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
1658 } 1658 }
1659 1659
1660 status = ocfs2_quota_setup();
1661 if (status)
1662 goto leave;
1663
1664 ocfs2_set_locking_protocol(); 1660 ocfs2_set_locking_protocol();
1665 1661
1666 status = register_quota_format(&ocfs2_quota_format); 1662 status = register_quota_format(&ocfs2_quota_format);
1667leave: 1663leave:
1668 if (status < 0) { 1664 if (status < 0) {
1669 ocfs2_quota_shutdown();
1670 ocfs2_free_mem_caches(); 1665 ocfs2_free_mem_caches();
1671 exit_ocfs2_uptodate_cache(); 1666 exit_ocfs2_uptodate_cache();
1672 } 1667 }
@@ -1683,8 +1678,6 @@ static void __exit ocfs2_exit(void)
1683{ 1678{
1684 mlog_entry_void(); 1679 mlog_entry_void();
1685 1680
1686 ocfs2_quota_shutdown();
1687
1688 if (ocfs2_wq) { 1681 if (ocfs2_wq) {
1689 flush_workqueue(ocfs2_wq); 1682 flush_workqueue(ocfs2_wq);
1690 destroy_workqueue(ocfs2_wq); 1683 destroy_workqueue(ocfs2_wq);
diff --git a/fs/open.c b/fs/open.c
index 80d430ae3b2..f83ca80cc59 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -573,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
573{ 573{
574 struct path path; 574 struct path path;
575 int error = -EINVAL; 575 int error = -EINVAL;
576 int follow; 576 int lookup_flags;
577 577
578 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 578 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
579 goto out; 579 goto out;
580 580
581 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 581 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
582 error = user_path_at(dfd, filename, follow, &path); 582 if (flag & AT_EMPTY_PATH)
583 lookup_flags |= LOOKUP_EMPTY;
584 error = user_path_at(dfd, filename, lookup_flags, &path);
583 if (error) 585 if (error)
584 goto out; 586 goto out;
585 error = mnt_want_write(path.mnt); 587 error = mnt_want_write(path.mnt);
@@ -669,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
669 int (*open)(struct inode *, struct file *), 671 int (*open)(struct inode *, struct file *),
670 const struct cred *cred) 672 const struct cred *cred)
671{ 673{
674 static const struct file_operations empty_fops = {};
672 struct inode *inode; 675 struct inode *inode;
673 int error; 676 int error;
674 677
675 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 678 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
676 FMODE_PREAD | FMODE_PWRITE; 679 FMODE_PREAD | FMODE_PWRITE;
680
681 if (unlikely(f->f_flags & O_PATH))
682 f->f_mode = FMODE_PATH;
683
677 inode = dentry->d_inode; 684 inode = dentry->d_inode;
678 if (f->f_mode & FMODE_WRITE) { 685 if (f->f_mode & FMODE_WRITE) {
679 error = __get_file_write_access(inode, mnt); 686 error = __get_file_write_access(inode, mnt);
@@ -687,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
687 f->f_path.dentry = dentry; 694 f->f_path.dentry = dentry;
688 f->f_path.mnt = mnt; 695 f->f_path.mnt = mnt;
689 f->f_pos = 0; 696 f->f_pos = 0;
690 f->f_op = fops_get(inode->i_fop);
691 file_sb_list_add(f, inode->i_sb); 697 file_sb_list_add(f, inode->i_sb);
692 698
699 if (unlikely(f->f_mode & FMODE_PATH)) {
700 f->f_op = &empty_fops;
701 return f;
702 }
703
704 f->f_op = fops_get(inode->i_fop);
705
693 error = security_dentry_open(f, cred); 706 error = security_dentry_open(f, cred);
694 if (error) 707 if (error)
695 goto cleanup_all; 708 goto cleanup_all;
@@ -891,15 +904,110 @@ void fd_install(unsigned int fd, struct file *file)
891 904
892EXPORT_SYMBOL(fd_install); 905EXPORT_SYMBOL(fd_install);
893 906
907static inline int build_open_flags(int flags, int mode, struct open_flags *op)
908{
909 int lookup_flags = 0;
910 int acc_mode;
911
912 if (!(flags & O_CREAT))
913 mode = 0;
914 op->mode = mode;
915
916 /* Must never be set by userspace */
917 flags &= ~FMODE_NONOTIFY;
918
919 /*
920 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
921 * check for O_DSYNC if the need any syncing at all we enforce it's
922 * always set instead of having to deal with possibly weird behaviour
923 * for malicious applications setting only __O_SYNC.
924 */
925 if (flags & __O_SYNC)
926 flags |= O_DSYNC;
927
928 /*
929 * If we have O_PATH in the open flag. Then we
930 * cannot have anything other than the below set of flags
931 */
932 if (flags & O_PATH) {
933 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
934 acc_mode = 0;
935 } else {
936 acc_mode = MAY_OPEN | ACC_MODE(flags);
937 }
938
939 op->open_flag = flags;
940
941 /* O_TRUNC implies we need access checks for write permissions */
942 if (flags & O_TRUNC)
943 acc_mode |= MAY_WRITE;
944
945 /* Allow the LSM permission hook to distinguish append
946 access from general write access. */
947 if (flags & O_APPEND)
948 acc_mode |= MAY_APPEND;
949
950 op->acc_mode = acc_mode;
951
952 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
953
954 if (flags & O_CREAT) {
955 op->intent |= LOOKUP_CREATE;
956 if (flags & O_EXCL)
957 op->intent |= LOOKUP_EXCL;
958 }
959
960 if (flags & O_DIRECTORY)
961 lookup_flags |= LOOKUP_DIRECTORY;
962 if (!(flags & O_NOFOLLOW))
963 lookup_flags |= LOOKUP_FOLLOW;
964 return lookup_flags;
965}
966
967/**
968 * filp_open - open file and return file pointer
969 *
970 * @filename: path to open
971 * @flags: open flags as per the open(2) second argument
972 * @mode: mode for the new file if O_CREAT is set, else ignored
973 *
974 * This is the helper to open a file from kernelspace if you really
975 * have to. But in generally you should not do this, so please move
976 * along, nothing to see here..
977 */
978struct file *filp_open(const char *filename, int flags, int mode)
979{
980 struct open_flags op;
981 int lookup = build_open_flags(flags, mode, &op);
982 return do_filp_open(AT_FDCWD, filename, &op, lookup);
983}
984EXPORT_SYMBOL(filp_open);
985
986struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
987 const char *filename, int flags)
988{
989 struct open_flags op;
990 int lookup = build_open_flags(flags, 0, &op);
991 if (flags & O_CREAT)
992 return ERR_PTR(-EINVAL);
993 if (!filename && (flags & O_DIRECTORY))
994 if (!dentry->d_inode->i_op->lookup)
995 return ERR_PTR(-ENOTDIR);
996 return do_file_open_root(dentry, mnt, filename, &op, lookup);
997}
998EXPORT_SYMBOL(file_open_root);
999
894long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 1000long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
895{ 1001{
1002 struct open_flags op;
1003 int lookup = build_open_flags(flags, mode, &op);
896 char *tmp = getname(filename); 1004 char *tmp = getname(filename);
897 int fd = PTR_ERR(tmp); 1005 int fd = PTR_ERR(tmp);
898 1006
899 if (!IS_ERR(tmp)) { 1007 if (!IS_ERR(tmp)) {
900 fd = get_unused_fd_flags(flags); 1008 fd = get_unused_fd_flags(flags);
901 if (fd >= 0) { 1009 if (fd >= 0) {
902 struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); 1010 struct file *f = do_filp_open(dfd, tmp, &op, lookup);
903 if (IS_ERR(f)) { 1011 if (IS_ERR(f)) {
904 put_unused_fd(fd); 1012 put_unused_fd(fd);
905 fd = PTR_ERR(f); 1013 fd = PTR_ERR(f);
@@ -969,8 +1077,10 @@ int filp_close(struct file *filp, fl_owner_t id)
969 if (filp->f_op && filp->f_op->flush) 1077 if (filp->f_op && filp->f_op->flush)
970 retval = filp->f_op->flush(filp, id); 1078 retval = filp->f_op->flush(filp, id);
971 1079
972 dnotify_flush(filp, id); 1080 if (likely(!(filp->f_mode & FMODE_PATH))) {
973 locks_remove_posix(filp, id); 1081 dnotify_flush(filp, id);
1082 locks_remove_posix(filp, id);
1083 }
974 fput(filp); 1084 fput(filp);
975 return retval; 1085 return retval;
976} 1086}
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index be03a0b08b4..764b86a0196 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,7 +10,7 @@
10#include "check.h" 10#include "check.h"
11#include "osf.h" 11#include "osf.h"
12 12
13#define MAX_OSF_PARTITIONS 8 13#define MAX_OSF_PARTITIONS 18
14 14
15int osf_partition(struct parsed_partitions *state) 15int osf_partition(struct parsed_partitions *state)
16{ 16{
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e..1bba24bad82 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1593 struct inode *inode = dentry->d_inode; 1593 struct inode *inode = dentry->d_inode;
1594 int maxlen = *lenp; 1594 int maxlen = *lenp;
1595 1595
1596 if (maxlen < 3) 1596 if (need_parent && (maxlen < 5)) {
1597 *lenp = 5;
1597 return 255; 1598 return 255;
1599 } else if (maxlen < 3) {
1600 *lenp = 3;
1601 return 255;
1602 }
1598 1603
1599 data[0] = inode->i_ino; 1604 data[0] = inode->i_ino;
1600 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1605 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3eea859e699..c77514bd577 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2876,7 +2876,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2876 reiserfs_mounted_fs_count++; 2876 reiserfs_mounted_fs_count++;
2877 if (reiserfs_mounted_fs_count <= 1) { 2877 if (reiserfs_mounted_fs_count <= 1) {
2878 reiserfs_write_unlock(sb); 2878 reiserfs_write_unlock(sb);
2879 commit_wq = create_workqueue("reiserfs"); 2879 commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
2880 reiserfs_write_lock(sb); 2880 reiserfs_write_lock(sb);
2881 } 2881 }
2882 2882
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 82f45542dcf..118662690cd 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1123,10 +1123,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1123 reiserfs_write_unlock(dir->i_sb); 1123 reiserfs_write_unlock(dir->i_sb);
1124 return -EMLINK; 1124 return -EMLINK;
1125 } 1125 }
1126 if (inode->i_nlink == 0) {
1127 reiserfs_write_unlock(dir->i_sb);
1128 return -ENOENT;
1129 }
1130 1126
1131 /* inc before scheduling so reiserfs_unlink knows we are here */ 1127 /* inc before scheduling so reiserfs_unlink knows we are here */
1132 inc_nlink(inode); 1128 inc_nlink(inode);
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b70..961039121cb 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
75 int error = -EINVAL; 75 int error = -EINVAL;
76 int lookup_flags = 0; 76 int lookup_flags = 0;
77 77
78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) 78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
79 AT_EMPTY_PATH)) != 0)
79 goto out; 80 goto out;
80 81
81 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 if (!(flag & AT_SYMLINK_NOFOLLOW))
82 lookup_flags |= LOOKUP_FOLLOW; 83 lookup_flags |= LOOKUP_FOLLOW;
83 if (flag & AT_NO_AUTOMOUNT) 84 if (flag & AT_NO_AUTOMOUNT)
84 lookup_flags |= LOOKUP_NO_AUTOMOUNT; 85 lookup_flags |= LOOKUP_NO_AUTOMOUNT;
86 if (flag & AT_EMPTY_PATH)
87 lookup_flags |= LOOKUP_EMPTY;
85 88
86 error = user_path_at(dfd, filename, lookup_flags, &path); 89 error = user_path_at(dfd, filename, lookup_flags, &path);
87 if (error) 90 if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
297 if (bufsiz <= 0) 300 if (bufsiz <= 0)
298 return -EINVAL; 301 return -EINVAL;
299 302
300 error = user_path_at(dfd, pathname, 0, &path); 303 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
301 if (!error) { 304 if (!error) {
302 struct inode *inode = path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
303 306
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996..8244924dec5 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
73} 73}
74EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
75 75
76static int do_statfs_native(struct path *path, struct statfs *buf) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct kstatfs st; 78 struct path path;
79 int retval; 79 int error = user_path(pathname, &path);
80 if (!error) {
81 error = vfs_statfs(&path, st);
82 path_put(&path);
83 }
84 return error;
85}
80 86
81 retval = vfs_statfs(path, &st); 87int fd_statfs(int fd, struct kstatfs *st)
82 if (retval) 88{
83 return retval; 89 struct file *file = fget(fd);
90 int error = -EBADF;
91 if (file) {
92 error = vfs_statfs(&file->f_path, st);
93 fput(file);
94 }
95 return error;
96}
84 97
85 if (sizeof(*buf) == sizeof(st)) 98static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
86 memcpy(buf, &st, sizeof(st)); 99{
100 struct statfs buf;
101
102 if (sizeof(buf) == sizeof(*st))
103 memcpy(&buf, st, sizeof(*st));
87 else { 104 else {
88 if (sizeof buf->f_blocks == 4) { 105 if (sizeof buf.f_blocks == 4) {
89 if ((st.f_blocks | st.f_bfree | st.f_bavail | 106 if ((st->f_blocks | st->f_bfree | st->f_bavail |
90 st.f_bsize | st.f_frsize) & 107 st->f_bsize | st->f_frsize) &
91 0xffffffff00000000ULL) 108 0xffffffff00000000ULL)
92 return -EOVERFLOW; 109 return -EOVERFLOW;
93 /* 110 /*
94 * f_files and f_ffree may be -1; it's okay to stuff 111 * f_files and f_ffree may be -1; it's okay to stuff
95 * that into 32 bits 112 * that into 32 bits
96 */ 113 */
97 if (st.f_files != -1 && 114 if (st->f_files != -1 &&
98 (st.f_files & 0xffffffff00000000ULL)) 115 (st->f_files & 0xffffffff00000000ULL))
99 return -EOVERFLOW; 116 return -EOVERFLOW;
100 if (st.f_ffree != -1 && 117 if (st->f_ffree != -1 &&
101 (st.f_ffree & 0xffffffff00000000ULL)) 118 (st->f_ffree & 0xffffffff00000000ULL))
102 return -EOVERFLOW; 119 return -EOVERFLOW;
103 } 120 }
104 121
105 buf->f_type = st.f_type; 122 buf.f_type = st->f_type;
106 buf->f_bsize = st.f_bsize; 123 buf.f_bsize = st->f_bsize;
107 buf->f_blocks = st.f_blocks; 124 buf.f_blocks = st->f_blocks;
108 buf->f_bfree = st.f_bfree; 125 buf.f_bfree = st->f_bfree;
109 buf->f_bavail = st.f_bavail; 126 buf.f_bavail = st->f_bavail;
110 buf->f_files = st.f_files; 127 buf.f_files = st->f_files;
111 buf->f_ffree = st.f_ffree; 128 buf.f_ffree = st->f_ffree;
112 buf->f_fsid = st.f_fsid; 129 buf.f_fsid = st->f_fsid;
113 buf->f_namelen = st.f_namelen; 130 buf.f_namelen = st->f_namelen;
114 buf->f_frsize = st.f_frsize; 131 buf.f_frsize = st->f_frsize;
115 buf->f_flags = st.f_flags; 132 buf.f_flags = st->f_flags;
116 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 133 memset(buf.f_spare, 0, sizeof(buf.f_spare));
117 } 134 }
135 if (copy_to_user(p, &buf, sizeof(buf)))
136 return -EFAULT;
118 return 0; 137 return 0;
119} 138}
120 139
121static int do_statfs64(struct path *path, struct statfs64 *buf) 140static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
122{ 141{
123 struct kstatfs st; 142 struct statfs64 buf;
124 int retval; 143 if (sizeof(buf) == sizeof(*st))
125 144 memcpy(&buf, st, sizeof(*st));
126 retval = vfs_statfs(path, &st);
127 if (retval)
128 return retval;
129
130 if (sizeof(*buf) == sizeof(st))
131 memcpy(buf, &st, sizeof(st));
132 else { 145 else {
133 buf->f_type = st.f_type; 146 buf.f_type = st->f_type;
134 buf->f_bsize = st.f_bsize; 147 buf.f_bsize = st->f_bsize;
135 buf->f_blocks = st.f_blocks; 148 buf.f_blocks = st->f_blocks;
136 buf->f_bfree = st.f_bfree; 149 buf.f_bfree = st->f_bfree;
137 buf->f_bavail = st.f_bavail; 150 buf.f_bavail = st->f_bavail;
138 buf->f_files = st.f_files; 151 buf.f_files = st->f_files;
139 buf->f_ffree = st.f_ffree; 152 buf.f_ffree = st->f_ffree;
140 buf->f_fsid = st.f_fsid; 153 buf.f_fsid = st->f_fsid;
141 buf->f_namelen = st.f_namelen; 154 buf.f_namelen = st->f_namelen;
142 buf->f_frsize = st.f_frsize; 155 buf.f_frsize = st->f_frsize;
143 buf->f_flags = st.f_flags; 156 buf.f_flags = st->f_flags;
144 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 157 memset(buf.f_spare, 0, sizeof(buf.f_spare));
145 } 158 }
159 if (copy_to_user(p, &buf, sizeof(buf)))
160 return -EFAULT;
146 return 0; 161 return 0;
147} 162}
148 163
149SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) 164SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
150{ 165{
151 struct path path; 166 struct kstatfs st;
152 int error; 167 int error = user_statfs(pathname, &st);
153 168 if (!error)
154 error = user_path(pathname, &path); 169 error = do_statfs_native(&st, buf);
155 if (!error) {
156 struct statfs tmp;
157 error = do_statfs_native(&path, &tmp);
158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
159 error = -EFAULT;
160 path_put(&path);
161 }
162 return error; 170 return error;
163} 171}
164 172
165SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) 173SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
166{ 174{
167 struct path path; 175 struct kstatfs st;
168 long error; 176 int error;
169
170 if (sz != sizeof(*buf)) 177 if (sz != sizeof(*buf))
171 return -EINVAL; 178 return -EINVAL;
172 error = user_path(pathname, &path); 179 error = user_statfs(pathname, &st);
173 if (!error) { 180 if (!error)
174 struct statfs64 tmp; 181 error = do_statfs64(&st, buf);
175 error = do_statfs64(&path, &tmp);
176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
177 error = -EFAULT;
178 path_put(&path);
179 }
180 return error; 182 return error;
181} 183}
182 184
183SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) 185SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
184{ 186{
185 struct file *file; 187 struct kstatfs st;
186 struct statfs tmp; 188 int error = fd_statfs(fd, &st);
187 int error; 189 if (!error)
188 190 error = do_statfs_native(&st, buf);
189 error = -EBADF;
190 file = fget(fd);
191 if (!file)
192 goto out;
193 error = do_statfs_native(&file->f_path, &tmp);
194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
195 error = -EFAULT;
196 fput(file);
197out:
198 return error; 191 return error;
199} 192}
200 193
201SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) 194SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
202{ 195{
203 struct file *file; 196 struct kstatfs st;
204 struct statfs64 tmp;
205 int error; 197 int error;
206 198
207 if (sz != sizeof(*buf)) 199 if (sz != sizeof(*buf))
208 return -EINVAL; 200 return -EINVAL;
209 201
210 error = -EBADF; 202 error = fd_statfs(fd, &st);
211 file = fget(fd); 203 if (!error)
212 if (!file) 204 error = do_statfs64(&st, buf);
213 goto out;
214 error = do_statfs64(&file->f_path, &tmp);
215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
216 error = -EFAULT;
217 fput(file);
218out:
219 return error; 205 return error;
220} 206}
221 207
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7..7217d67a80a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 522 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 523 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 524
525 /*
526 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
527 * otherwise has the potential to corrupt the orphan inode list.
528 *
529 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
530 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
531 * lock 'dirA->i_mutex', so this is possible. Both of the functions
532 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
533 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
534 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
535 * to the list of orphans. After this, 'vfs_link()' will link
536 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
537 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
538 * to the list of orphans.
539 */
540 if (inode->i_nlink == 0)
541 return -ENOENT;
542
543 err = dbg_check_synced_i_size(inode); 525 err = dbg_check_synced_i_size(inode);
544 if (err) 526 if (err)
545 return err; 527 return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index b7c338d5e9d..f1dce848ef9 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1286,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1286 struct fid *fid = (struct fid *)fh; 1286 struct fid *fid = (struct fid *)fh;
1287 int type = FILEID_UDF_WITHOUT_PARENT; 1287 int type = FILEID_UDF_WITHOUT_PARENT;
1288 1288
1289 if (len < 3 || (connectable && len < 5)) 1289 if (connectable && (len < 5)) {
1290 *lenp = 5;
1291 return 255;
1292 } else if (len < 3) {
1293 *lenp = 3;
1290 return 255; 1294 return 255;
1295 }
1291 1296
1292 *lenp = 3; 1297 *lenp = 3;
1293 fid->udf.block = location.logicalBlockNum; 1298 fid->udf.block = location.logicalBlockNum;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ac1c7e8378d..f83a4c830a6 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -2022,11 +2022,12 @@ xfs_buf_init(void)
2022 if (!xfslogd_workqueue) 2022 if (!xfslogd_workqueue)
2023 goto out_free_buf_zone; 2023 goto out_free_buf_zone;
2024 2024
2025 xfsdatad_workqueue = create_workqueue("xfsdatad"); 2025 xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
2026 if (!xfsdatad_workqueue) 2026 if (!xfsdatad_workqueue)
2027 goto out_destroy_xfslogd_workqueue; 2027 goto out_destroy_xfslogd_workqueue;
2028 2028
2029 xfsconvertd_workqueue = create_workqueue("xfsconvertd"); 2029 xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
2030 WQ_MEM_RECLAIM, 1);
2030 if (!xfsconvertd_workqueue) 2031 if (!xfsconvertd_workqueue)
2031 goto out_destroy_xfsdatad_workqueue; 2032 goto out_destroy_xfsdatad_workqueue;
2032 2033
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fd..f4f878fc008 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
89 * seven combinations work. The real answer is "don't use v2". 89 * seven combinations work. The real answer is "don't use v2".
90 */ 90 */
91 len = xfs_fileid_length(fileid_type); 91 len = xfs_fileid_length(fileid_type);
92 if (*max_len < len) 92 if (*max_len < len) {
93 *max_len = len;
93 return 255; 94 return 255;
95 }
94 *max_len = len; 96 *max_len = len;
95 97
96 switch (fileid_type) { 98 switch (fileid_type) {
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index edfa178bafb..4aff5639573 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void)
309 if (!xfs_mru_elem_zone) 309 if (!xfs_mru_elem_zone)
310 goto out; 310 goto out;
311 311
312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); 312 xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
313 if (!xfs_mru_reap_wq) 313 if (!xfs_mru_reap_wq)
314 goto out_destroy_mru_elem_zone; 314 goto out_destroy_mru_elem_zone;
315 315